1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode
;
63 typedef struct minipool_fixup Mfix
;
65 void (*arm_lang_output_object_attributes_hook
)(void);
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets
*arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
73 HOST_WIDE_INT
, rtx
, rtx
, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx
, int);
76 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
77 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
78 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
79 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
80 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
81 inline static int thumb1_index_register_rtx_p (rtx
, int);
82 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT
, enum rtx_code
);
86 static rtx
emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx
, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx
, int);
90 static void arm_print_operand_address (FILE *, rtx
);
91 static bool arm_print_operand_punct_valid_p (unsigned char code
);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
93 static arm_cc
get_arm_condition_code (rtx
);
94 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
95 static rtx
is_jump_table (rtx
);
96 static const char *output_multi_immediate (rtx
*, const char *, const char *,
98 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
99 static struct machine_function
*arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx
is_jump_table (rtx
);
102 static HOST_WIDE_INT
get_jump_table_size (rtx
);
103 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
104 static Mnode
*add_minipool_forward_ref (Mfix
*);
105 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
106 static Mnode
*add_minipool_backward_ref (Mfix
*);
107 static void assign_minipool_offsets (Mfix
*);
108 static void arm_print_value (FILE *, rtx
);
109 static void dump_minipool (rtx
);
110 static int arm_barrier_cost (rtx
);
111 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
112 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
113 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree
);
120 static unsigned long arm_compute_func_type (void);
121 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
122 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
123 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
129 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT
);
130 static int arm_comp_type_attributes (const_tree
, const_tree
);
131 static void arm_set_default_type_attributes (tree
);
132 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
133 static int count_insns_for_constant (HOST_WIDE_INT
, int);
134 static int arm_get_strip_length (int);
135 static bool arm_function_ok_for_sibcall (tree
, tree
);
136 static enum machine_mode
arm_promote_function_mode (const_tree
,
137 enum machine_mode
, int *,
139 static bool arm_return_in_memory (const_tree
, const_tree
);
140 static rtx
arm_function_value (const_tree
, const_tree
, bool);
141 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
143 static void arm_internal_label (FILE *, const char *, unsigned long);
144 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
146 static bool arm_have_conditional_execution (void);
147 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
148 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
149 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
150 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
151 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
152 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
153 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
154 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
155 static bool arm_rtx_costs (rtx
, int, int, int *, bool);
156 static int arm_address_cost (rtx
, bool);
157 static bool arm_memory_load_p (rtx
);
158 static bool arm_cirrus_insn_p (rtx
);
159 static void cirrus_reorg (rtx
);
160 static void arm_init_builtins (void);
161 static void arm_init_iwmmxt_builtins (void);
162 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
163 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
164 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
165 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
166 static tree
arm_builtin_decl (unsigned, bool);
167 static void emit_constant_insn (rtx cond
, rtx pattern
);
168 static rtx
emit_set_insn (rtx
, rtx
);
169 static int arm_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
171 static rtx
arm_function_arg (CUMULATIVE_ARGS
*, enum machine_mode
,
173 static void arm_function_arg_advance (CUMULATIVE_ARGS
*, enum machine_mode
,
175 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
176 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
178 static int aapcs_select_return_coproc (const_tree
, const_tree
);
180 #ifdef OBJECT_FORMAT_ELF
181 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
182 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
185 static void arm_encode_section_info (tree
, rtx
, int);
188 static void arm_file_end (void);
189 static void arm_file_start (void);
191 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
193 static bool arm_pass_by_reference (CUMULATIVE_ARGS
*,
194 enum machine_mode
, const_tree
, bool);
195 static bool arm_promote_prototypes (const_tree
);
196 static bool arm_default_short_enums (void);
197 static bool arm_align_anon_bitfield (void);
198 static bool arm_return_in_msb (const_tree
);
199 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
200 static bool arm_return_in_memory (const_tree
, const_tree
);
202 static void arm_unwind_emit (FILE *, rtx
);
203 static bool arm_output_ttype (rtx
);
204 static void arm_asm_emit_except_personality (rtx
);
205 static void arm_asm_init_sections (void);
207 static enum unwind_info_type
arm_except_unwind_info (struct gcc_options
*);
208 static void arm_dwarf_handle_frame_unspec (const char *, rtx
, int);
209 static rtx
arm_dwarf_register_span (rtx
);
211 static tree
arm_cxx_guard_type (void);
212 static bool arm_cxx_guard_mask_bit (void);
213 static tree
arm_get_cookie_size (tree
);
214 static bool arm_cookie_has_size (void);
215 static bool arm_cxx_cdtor_returns_this (void);
216 static bool arm_cxx_key_method_may_be_inline (void);
217 static void arm_cxx_determine_class_data_visibility (tree
);
218 static bool arm_cxx_class_data_always_comdat (void);
219 static bool arm_cxx_use_aeabi_atexit (void);
220 static void arm_init_libfuncs (void);
221 static tree
arm_build_builtin_va_list (void);
222 static void arm_expand_builtin_va_start (tree
, rtx
);
223 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
224 static void arm_option_override (void);
225 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
226 static bool arm_cannot_copy_insn_p (rtx
);
227 static bool arm_tls_symbol_p (rtx x
);
228 static int arm_issue_rate (void);
229 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
230 static bool arm_output_addr_const_extra (FILE *, rtx
);
231 static bool arm_allocate_stack_slots_for_args (void);
232 static const char *arm_invalid_parameter_type (const_tree t
);
233 static const char *arm_invalid_return_type (const_tree t
);
234 static tree
arm_promoted_type (const_tree t
);
235 static tree
arm_convert_to_type (tree type
, tree expr
);
236 static bool arm_scalar_mode_supported_p (enum machine_mode
);
237 static bool arm_frame_pointer_required (void);
238 static bool arm_can_eliminate (const int, const int);
239 static void arm_asm_trampoline_template (FILE *);
240 static void arm_trampoline_init (rtx
, tree
, rtx
);
241 static rtx
arm_trampoline_adjust_address (rtx
);
242 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
243 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
244 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
245 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
246 static bool arm_array_mode_supported_p (enum machine_mode
,
247 unsigned HOST_WIDE_INT
);
248 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
249 static bool arm_class_likely_spilled_p (reg_class_t
);
250 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
251 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
255 static void arm_conditional_register_usage (void);
256 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
257 static unsigned int arm_autovectorize_vector_sizes (void);
258 static int arm_default_branch_cost (bool, bool);
261 /* Table of machine attributes. */
262 static const struct attribute_spec arm_attribute_table
[] =
264 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
265 affects_type_identity } */
266 /* Function calls made to this symbol must be done indirectly, because
267 it may lie outside of the 26 bit addressing range of a normal function
269 { "long_call", 0, 0, false, true, true, NULL
, false },
270 /* Whereas these functions are always known to reside within the 26 bit
272 { "short_call", 0, 0, false, true, true, NULL
, false },
273 /* Specify the procedure call conventions for a function. */
274 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
276 /* Interrupt Service Routines have special prologue and epilogue requirements. */
277 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
279 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
281 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
284 /* ARM/PE has three new attributes:
286 dllexport - for exporting a function/variable that will live in a dll
287 dllimport - for importing a function/variable from a dll
289 Microsoft allows multiple declspecs in one __declspec, separating
290 them with spaces. We do NOT support this. Instead, use __declspec
293 { "dllimport", 0, 0, true, false, false, NULL
, false },
294 { "dllexport", 0, 0, true, false, false, NULL
, false },
295 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
297 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
298 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
299 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
300 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
303 { NULL
, 0, 0, false, false, false, NULL
, false }
306 /* Set default optimization options. */
307 static const struct default_options arm_option_optimization_table
[] =
309 /* Enable section anchors by default at -O1 or higher. */
310 { OPT_LEVELS_1_PLUS
, OPT_fsection_anchors
, NULL
, 1 },
311 { OPT_LEVELS_1_PLUS
, OPT_fomit_frame_pointer
, NULL
, 1 },
312 { OPT_LEVELS_NONE
, 0, NULL
, 0 }
315 /* Initialize the GCC target structure. */
316 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
317 #undef TARGET_MERGE_DECL_ATTRIBUTES
318 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
321 #undef TARGET_LEGITIMIZE_ADDRESS
322 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
324 #undef TARGET_ATTRIBUTE_TABLE
325 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
327 #undef TARGET_ASM_FILE_START
328 #define TARGET_ASM_FILE_START arm_file_start
329 #undef TARGET_ASM_FILE_END
330 #define TARGET_ASM_FILE_END arm_file_end
332 #undef TARGET_ASM_ALIGNED_SI_OP
333 #define TARGET_ASM_ALIGNED_SI_OP NULL
334 #undef TARGET_ASM_INTEGER
335 #define TARGET_ASM_INTEGER arm_assemble_integer
337 #undef TARGET_PRINT_OPERAND
338 #define TARGET_PRINT_OPERAND arm_print_operand
339 #undef TARGET_PRINT_OPERAND_ADDRESS
340 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
341 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
342 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
344 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
345 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
347 #undef TARGET_ASM_FUNCTION_PROLOGUE
348 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
350 #undef TARGET_ASM_FUNCTION_EPILOGUE
351 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
353 #undef TARGET_DEFAULT_TARGET_FLAGS
354 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
355 #undef TARGET_OPTION_OVERRIDE
356 #define TARGET_OPTION_OVERRIDE arm_option_override
357 #undef TARGET_OPTION_OPTIMIZATION_TABLE
358 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
360 #undef TARGET_COMP_TYPE_ATTRIBUTES
361 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
363 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
364 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
366 #undef TARGET_SCHED_ADJUST_COST
367 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
369 #undef TARGET_ENCODE_SECTION_INFO
371 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
373 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
376 #undef TARGET_STRIP_NAME_ENCODING
377 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
379 #undef TARGET_ASM_INTERNAL_LABEL
380 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
382 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
383 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
385 #undef TARGET_FUNCTION_VALUE
386 #define TARGET_FUNCTION_VALUE arm_function_value
388 #undef TARGET_LIBCALL_VALUE
389 #define TARGET_LIBCALL_VALUE arm_libcall_value
391 #undef TARGET_ASM_OUTPUT_MI_THUNK
392 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
393 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
394 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
396 #undef TARGET_RTX_COSTS
397 #define TARGET_RTX_COSTS arm_rtx_costs
398 #undef TARGET_ADDRESS_COST
399 #define TARGET_ADDRESS_COST arm_address_cost
401 #undef TARGET_SHIFT_TRUNCATION_MASK
402 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
403 #undef TARGET_VECTOR_MODE_SUPPORTED_P
404 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
405 #undef TARGET_ARRAY_MODE_SUPPORTED_P
406 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
407 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
408 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
409 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
410 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
411 arm_autovectorize_vector_sizes
413 #undef TARGET_MACHINE_DEPENDENT_REORG
414 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
416 #undef TARGET_INIT_BUILTINS
417 #define TARGET_INIT_BUILTINS arm_init_builtins
418 #undef TARGET_EXPAND_BUILTIN
419 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
420 #undef TARGET_BUILTIN_DECL
421 #define TARGET_BUILTIN_DECL arm_builtin_decl
423 #undef TARGET_INIT_LIBFUNCS
424 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
426 #undef TARGET_PROMOTE_FUNCTION_MODE
427 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
428 #undef TARGET_PROMOTE_PROTOTYPES
429 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
430 #undef TARGET_PASS_BY_REFERENCE
431 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
432 #undef TARGET_ARG_PARTIAL_BYTES
433 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
434 #undef TARGET_FUNCTION_ARG
435 #define TARGET_FUNCTION_ARG arm_function_arg
436 #undef TARGET_FUNCTION_ARG_ADVANCE
437 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
438 #undef TARGET_FUNCTION_ARG_BOUNDARY
439 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
441 #undef TARGET_SETUP_INCOMING_VARARGS
442 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
444 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
445 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
447 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
448 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
449 #undef TARGET_TRAMPOLINE_INIT
450 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
451 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
452 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
454 #undef TARGET_DEFAULT_SHORT_ENUMS
455 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
457 #undef TARGET_ALIGN_ANON_BITFIELD
458 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
460 #undef TARGET_NARROW_VOLATILE_BITFIELD
461 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
463 #undef TARGET_CXX_GUARD_TYPE
464 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
466 #undef TARGET_CXX_GUARD_MASK_BIT
467 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
469 #undef TARGET_CXX_GET_COOKIE_SIZE
470 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
472 #undef TARGET_CXX_COOKIE_HAS_SIZE
473 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
475 #undef TARGET_CXX_CDTOR_RETURNS_THIS
476 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
478 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
479 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
481 #undef TARGET_CXX_USE_AEABI_ATEXIT
482 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
484 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
485 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
486 arm_cxx_determine_class_data_visibility
488 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
489 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
491 #undef TARGET_RETURN_IN_MSB
492 #define TARGET_RETURN_IN_MSB arm_return_in_msb
494 #undef TARGET_RETURN_IN_MEMORY
495 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
497 #undef TARGET_MUST_PASS_IN_STACK
498 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
501 #undef TARGET_ASM_UNWIND_EMIT
502 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
504 /* EABI unwinding tables use a different format for the typeinfo tables. */
505 #undef TARGET_ASM_TTYPE
506 #define TARGET_ASM_TTYPE arm_output_ttype
508 #undef TARGET_ARM_EABI_UNWINDER
509 #define TARGET_ARM_EABI_UNWINDER true
511 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
512 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
514 #undef TARGET_ASM_INIT_SECTIONS
515 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
516 #endif /* ARM_UNWIND_INFO */
518 #undef TARGET_EXCEPT_UNWIND_INFO
519 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
521 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
522 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
524 #undef TARGET_DWARF_REGISTER_SPAN
525 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
527 #undef TARGET_CANNOT_COPY_INSN_P
528 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
531 #undef TARGET_HAVE_TLS
532 #define TARGET_HAVE_TLS true
535 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
536 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
538 #undef TARGET_LEGITIMATE_CONSTANT_P
539 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
541 #undef TARGET_CANNOT_FORCE_CONST_MEM
542 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
544 #undef TARGET_MAX_ANCHOR_OFFSET
545 #define TARGET_MAX_ANCHOR_OFFSET 4095
547 /* The minimum is set such that the total size of the block
548 for a particular anchor is -4088 + 1 + 4095 bytes, which is
549 divisible by eight, ensuring natural spacing of anchors. */
550 #undef TARGET_MIN_ANCHOR_OFFSET
551 #define TARGET_MIN_ANCHOR_OFFSET -4088
553 #undef TARGET_SCHED_ISSUE_RATE
554 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
556 #undef TARGET_MANGLE_TYPE
557 #define TARGET_MANGLE_TYPE arm_mangle_type
559 #undef TARGET_BUILD_BUILTIN_VA_LIST
560 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
561 #undef TARGET_EXPAND_BUILTIN_VA_START
562 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
563 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
564 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
567 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
568 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
571 #undef TARGET_LEGITIMATE_ADDRESS_P
572 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
574 #undef TARGET_INVALID_PARAMETER_TYPE
575 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
577 #undef TARGET_INVALID_RETURN_TYPE
578 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
580 #undef TARGET_PROMOTED_TYPE
581 #define TARGET_PROMOTED_TYPE arm_promoted_type
583 #undef TARGET_CONVERT_TO_TYPE
584 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
586 #undef TARGET_SCALAR_MODE_SUPPORTED_P
587 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
589 #undef TARGET_FRAME_POINTER_REQUIRED
590 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
592 #undef TARGET_CAN_ELIMINATE
593 #define TARGET_CAN_ELIMINATE arm_can_eliminate
595 #undef TARGET_CONDITIONAL_REGISTER_USAGE
596 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
598 #undef TARGET_CLASS_LIKELY_SPILLED_P
599 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
601 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
602 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
603 arm_vector_alignment_reachable
605 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
606 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
607 arm_builtin_support_vector_misalignment
609 #undef TARGET_PREFERRED_RENAME_CLASS
610 #define TARGET_PREFERRED_RENAME_CLASS \
611 arm_preferred_rename_class
613 struct gcc_target targetm
= TARGET_INITIALIZER
;
615 /* Obstack for minipool constant handling. */
616 static struct obstack minipool_obstack
;
617 static char * minipool_startobj
;
619 /* The maximum number of insns skipped which
620 will be conditionalised if possible. */
621 static int max_insns_skipped
= 5;
623 extern FILE * asm_out_file
;
625 /* True if we are currently building a constant table. */
626 int making_const_table
;
628 /* The processor for which instructions should be scheduled. */
629 enum processor_type arm_tune
= arm_none
;
631 /* The current tuning set. */
632 const struct tune_params
*current_tune
;
634 /* Which floating point hardware to schedule for. */
637 /* Which floating popint hardware to use. */
638 const struct arm_fpu_desc
*arm_fpu_desc
;
640 /* Used for Thumb call_via trampolines. */
641 rtx thumb_call_via_label
[14];
642 static int thumb_call_reg_needed
;
644 /* Bit values used to identify processor capabilities. */
645 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
646 #define FL_ARCH3M (1 << 1) /* Extended multiply */
647 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
648 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
649 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
650 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
651 #define FL_THUMB (1 << 6) /* Thumb aware */
652 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
653 #define FL_STRONG (1 << 8) /* StrongARM */
654 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
655 #define FL_XSCALE (1 << 10) /* XScale */
656 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
657 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
658 media instructions. */
659 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
660 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
661 Note: ARM6 & 7 derivatives only. */
662 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
663 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
664 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
666 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
667 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
668 #define FL_NEON (1 << 20) /* Neon instructions. */
669 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
671 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
672 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
674 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
676 /* Flags that only effect tuning, not available instructions. */
677 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
680 #define FL_FOR_ARCH2 FL_NOTM
681 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
682 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
683 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
684 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
685 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
686 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
687 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
688 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
689 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
690 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
691 #define FL_FOR_ARCH6J FL_FOR_ARCH6
692 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
693 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
694 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
695 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
696 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
697 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
698 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
699 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
700 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
701 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
703 /* The bits in this mask specify which
704 instructions we are allowed to generate. */
705 static unsigned long insn_flags
= 0;
707 /* The bits in this mask specify which instruction scheduling options should
709 static unsigned long tune_flags
= 0;
711 /* The following are used in the arm.md file as equivalents to bits
712 in the above two flag variables. */
714 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
717 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
720 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
723 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
726 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
729 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
732 /* Nonzero if this chip supports the ARM 6K extensions. */
735 /* Nonzero if this chip supports the ARM 7 extensions. */
738 /* Nonzero if instructions not present in the 'M' profile can be used. */
739 int arm_arch_notm
= 0;
741 /* Nonzero if instructions present in ARMv7E-M can be used. */
744 /* Nonzero if this chip can benefit from load scheduling. */
745 int arm_ld_sched
= 0;
747 /* Nonzero if this chip is a StrongARM. */
748 int arm_tune_strongarm
= 0;
750 /* Nonzero if this chip is a Cirrus variant. */
751 int arm_arch_cirrus
= 0;
753 /* Nonzero if this chip supports Intel Wireless MMX technology. */
754 int arm_arch_iwmmxt
= 0;
756 /* Nonzero if this chip is an XScale. */
757 int arm_arch_xscale
= 0;
759 /* Nonzero if tuning for XScale */
760 int arm_tune_xscale
= 0;
762 /* Nonzero if we want to tune for stores that access the write-buffer.
763 This typically means an ARM6 or ARM7 with MMU or MPU. */
764 int arm_tune_wbuf
= 0;
766 /* Nonzero if tuning for Cortex-A9. */
767 int arm_tune_cortex_a9
= 0;
769 /* Nonzero if generating Thumb instructions. */
772 /* Nonzero if generating Thumb-1 instructions. */
775 /* Nonzero if we should define __THUMB_INTERWORK__ in the
777 XXX This is a bit of a hack, it's intended to help work around
778 problems in GLD which doesn't understand that armv5t code is
779 interworking clean. */
780 int arm_cpp_interwork
= 0;
782 /* Nonzero if chip supports Thumb 2. */
785 /* Nonzero if chip supports integer division instruction. */
786 int arm_arch_arm_hwdiv
;
787 int arm_arch_thumb_hwdiv
;
789 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
790 we must report the mode of the memory reference from
791 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
792 enum machine_mode output_memory_reference_mode
;
794 /* The register number to be used for the PIC offset register. */
795 unsigned arm_pic_register
= INVALID_REGNUM
;
797 /* Set to 1 after arm_reorg has started. Reset to start at the start of
798 the next function. */
799 static int after_arm_reorg
= 0;
801 enum arm_pcs arm_pcs_default
;
803 /* For an explanation of these variables, see final_prescan_insn below. */
805 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
806 enum arm_cond_code arm_current_cc
;
809 int arm_target_label
;
810 /* The number of conditionally executed insns, including the current insn. */
811 int arm_condexec_count
= 0;
812 /* A bitmask specifying the patterns for the IT block.
813 Zero means do not output an IT block before this insn. */
814 int arm_condexec_mask
= 0;
815 /* The number of bits used in arm_condexec_mask. */
816 int arm_condexec_masklen
= 0;
818 /* The condition codes of the ARM, and the inverse function. */
819 static const char * const arm_condition_codes
[] =
821 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
822 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
825 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
826 int arm_regs_in_sequence
[] =
828 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
831 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
832 #define streq(string1, string2) (strcmp (string1, string2) == 0)
834 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
835 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
836 | (1 << PIC_OFFSET_TABLE_REGNUM)))
838 /* Initialization code. */
842 const char *const name
;
843 enum processor_type core
;
845 const unsigned long flags
;
846 const struct tune_params
*const tune
;
850 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
851 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
856 const struct tune_params arm_slowmul_tune
=
858 arm_slowmul_rtx_costs
,
860 3, /* Constant limit. */
861 ARM_PREFETCH_NOT_BENEFICIAL
,
862 true, /* Prefer constant pool. */
863 arm_default_branch_cost
866 const struct tune_params arm_fastmul_tune
=
868 arm_fastmul_rtx_costs
,
870 1, /* Constant limit. */
871 ARM_PREFETCH_NOT_BENEFICIAL
,
872 true, /* Prefer constant pool. */
873 arm_default_branch_cost
876 const struct tune_params arm_xscale_tune
=
878 arm_xscale_rtx_costs
,
879 xscale_sched_adjust_cost
,
880 2, /* Constant limit. */
881 ARM_PREFETCH_NOT_BENEFICIAL
,
882 true, /* Prefer constant pool. */
883 arm_default_branch_cost
886 const struct tune_params arm_9e_tune
=
890 1, /* Constant limit. */
891 ARM_PREFETCH_NOT_BENEFICIAL
,
892 true, /* Prefer constant pool. */
893 arm_default_branch_cost
896 const struct tune_params arm_v6t2_tune
=
900 1, /* Constant limit. */
901 ARM_PREFETCH_NOT_BENEFICIAL
,
902 false, /* Prefer constant pool. */
903 arm_default_branch_cost
906 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
907 const struct tune_params arm_cortex_tune
=
911 1, /* Constant limit. */
912 ARM_PREFETCH_NOT_BENEFICIAL
,
913 false, /* Prefer constant pool. */
914 arm_default_branch_cost
917 const struct tune_params arm_cortex_a9_tune
=
920 cortex_a9_sched_adjust_cost
,
921 1, /* Constant limit. */
922 ARM_PREFETCH_BENEFICIAL(4,32,32),
923 false, /* Prefer constant pool. */
924 arm_default_branch_cost
927 const struct tune_params arm_fa726te_tune
=
930 fa726te_sched_adjust_cost
,
931 1, /* Constant limit. */
932 ARM_PREFETCH_NOT_BENEFICIAL
,
933 true, /* Prefer constant pool. */
934 arm_default_branch_cost
938 /* Not all of these give usefully different compilation alternatives,
939 but there is no simple way of generalizing them. */
940 static const struct processors all_cores
[] =
943 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
944 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
945 #include "arm-cores.def"
947 {NULL
, arm_none
, NULL
, 0, NULL
}
950 static const struct processors all_architectures
[] =
952 /* ARM Architectures */
953 /* We don't specify tuning costs here as it will be figured out
956 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
957 {NAME, CORE, #ARCH, FLAGS, NULL},
958 #include "arm-arches.def"
960 {NULL
, arm_none
, NULL
, 0 , NULL
}
964 /* These are populated as commandline arguments are processed, or NULL
966 static const struct processors
*arm_selected_arch
;
967 static const struct processors
*arm_selected_cpu
;
968 static const struct processors
*arm_selected_tune
;
970 /* The name of the preprocessor macro to define for this architecture. */
972 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
974 /* Available values for -mfpu=. */
976 static const struct arm_fpu_desc all_fpus
[] =
978 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
979 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
980 #include "arm-fpus.def"
985 /* Supported TLS relocations. */
995 /* The maximum number of insns to be used when loading a constant. */
997 arm_constant_limit (bool size_p
)
999 return size_p
? 1 : current_tune
->constant_limit
;
1002 /* Emit an insn that's a simple single-set. Both the operands must be known
1005 emit_set_insn (rtx x
, rtx y
)
1007 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1010 /* Return the number of bits set in VALUE. */
1012 bit_count (unsigned long value
)
1014 unsigned long count
= 0;
1019 value
&= value
- 1; /* Clear the least-significant set bit. */
1025 /* Set up library functions unique to ARM. */
1028 arm_init_libfuncs (void)
1030 /* There are no special library functions unless we are using the
1035 /* The functions below are described in Section 4 of the "Run-Time
1036 ABI for the ARM architecture", Version 1.0. */
1038 /* Double-precision floating-point arithmetic. Table 2. */
1039 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1040 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1041 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1042 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1043 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1045 /* Double-precision comparisons. Table 3. */
1046 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1047 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1048 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1049 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1050 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1051 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1052 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1054 /* Single-precision floating-point arithmetic. Table 4. */
1055 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1056 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1057 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1058 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1059 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1061 /* Single-precision comparisons. Table 5. */
1062 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1063 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1064 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1065 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1066 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1067 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1068 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1070 /* Floating-point to integer conversions. Table 6. */
1071 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1072 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1073 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1074 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1075 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1076 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1077 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1078 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1080 /* Conversions between floating types. Table 7. */
1081 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1082 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1084 /* Integer to floating-point conversions. Table 8. */
1085 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1086 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1087 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1088 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1089 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1090 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1091 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1092 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1094 /* Long long. Table 9. */
1095 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1096 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1097 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1098 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1099 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1100 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1101 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1102 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1104 /* Integer (32/32->32) division. \S 4.3.1. */
1105 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1106 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1108 /* The divmod functions are designed so that they can be used for
1109 plain division, even though they return both the quotient and the
1110 remainder. The quotient is returned in the usual location (i.e.,
1111 r0 for SImode, {r0, r1} for DImode), just as would be expected
1112 for an ordinary division routine. Because the AAPCS calling
1113 conventions specify that all of { r0, r1, r2, r3 } are
1114 callee-saved registers, there is no need to tell the compiler
1115 explicitly that those registers are clobbered by these
1117 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1118 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1120 /* For SImode division the ABI provides div-without-mod routines,
1121 which are faster. */
1122 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1123 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1125 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1126 divmod libcalls instead. */
1127 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1128 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1129 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1130 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1132 /* Half-precision float operations. The compiler handles all operations
1133 with NULL libfuncs by converting the SFmode. */
1134 switch (arm_fp16_format
)
1136 case ARM_FP16_FORMAT_IEEE
:
1137 case ARM_FP16_FORMAT_ALTERNATIVE
:
1140 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1141 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1143 : "__gnu_f2h_alternative"));
1144 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
1145 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1147 : "__gnu_h2f_alternative"));
1150 set_optab_libfunc (add_optab
, HFmode
, NULL
);
1151 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
1152 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
1153 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
1154 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
1157 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
1158 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
1159 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
1160 set_optab_libfunc (le_optab
, HFmode
, NULL
);
1161 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
1162 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
1163 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
1170 if (TARGET_AAPCS_BASED
)
1171 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
1174 /* On AAPCS systems, this is the "struct __va_list". */
1175 static GTY(()) tree va_list_type
;
1177 /* Return the type to use as __builtin_va_list. */
1179 arm_build_builtin_va_list (void)
1184 if (!TARGET_AAPCS_BASED
)
1185 return std_build_builtin_va_list ();
1187 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1195 The C Library ABI further reinforces this definition in \S
1198 We must follow this definition exactly. The structure tag
1199 name is visible in C++ mangled names, and thus forms a part
1200 of the ABI. The field name may be used by people who
1201 #include <stdarg.h>. */
1202 /* Create the type. */
1203 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1204 /* Give it the required name. */
1205 va_list_name
= build_decl (BUILTINS_LOCATION
,
1207 get_identifier ("__va_list"),
1209 DECL_ARTIFICIAL (va_list_name
) = 1;
1210 TYPE_NAME (va_list_type
) = va_list_name
;
1211 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
1212 /* Create the __ap field. */
1213 ap_field
= build_decl (BUILTINS_LOCATION
,
1215 get_identifier ("__ap"),
1217 DECL_ARTIFICIAL (ap_field
) = 1;
1218 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
1219 TYPE_FIELDS (va_list_type
) = ap_field
;
1220 /* Compute its layout. */
1221 layout_type (va_list_type
);
1223 return va_list_type
;
1226 /* Return an expression of type "void *" pointing to the next
1227 available argument in a variable-argument list. VALIST is the
1228 user-level va_list object, of type __builtin_va_list. */
1230 arm_extract_valist_ptr (tree valist
)
1232 if (TREE_TYPE (valist
) == error_mark_node
)
1233 return error_mark_node
;
1235 /* On an AAPCS target, the pointer is stored within "struct
1237 if (TARGET_AAPCS_BASED
)
1239 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
1240 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
1241 valist
, ap_field
, NULL_TREE
);
1247 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1249 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
1251 valist
= arm_extract_valist_ptr (valist
);
1252 std_expand_builtin_va_start (valist
, nextarg
);
1255 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1257 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
1260 valist
= arm_extract_valist_ptr (valist
);
1261 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
1264 /* Fix up any incompatible options that the user has specified. */
1266 arm_option_override (void)
1268 if (global_options_set
.x_arm_arch_option
)
1269 arm_selected_arch
= &all_architectures
[arm_arch_option
];
1271 if (global_options_set
.x_arm_cpu_option
)
1272 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
1274 if (global_options_set
.x_arm_tune_option
)
1275 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
1277 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1278 SUBTARGET_OVERRIDE_OPTIONS
;
1281 if (arm_selected_arch
)
1283 if (arm_selected_cpu
)
1285 /* Check for conflict between mcpu and march. */
1286 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
1288 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1289 arm_selected_cpu
->name
, arm_selected_arch
->name
);
1290 /* -march wins for code generation.
1291 -mcpu wins for default tuning. */
1292 if (!arm_selected_tune
)
1293 arm_selected_tune
= arm_selected_cpu
;
1295 arm_selected_cpu
= arm_selected_arch
;
1299 arm_selected_arch
= NULL
;
1302 /* Pick a CPU based on the architecture. */
1303 arm_selected_cpu
= arm_selected_arch
;
1306 /* If the user did not specify a processor, choose one for them. */
1307 if (!arm_selected_cpu
)
1309 const struct processors
* sel
;
1310 unsigned int sought
;
1312 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
1313 if (!arm_selected_cpu
->name
)
1315 #ifdef SUBTARGET_CPU_DEFAULT
1316 /* Use the subtarget default CPU if none was specified by
1318 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
1320 /* Default to ARM6. */
1321 if (!arm_selected_cpu
->name
)
1322 arm_selected_cpu
= &all_cores
[arm6
];
1325 sel
= arm_selected_cpu
;
1326 insn_flags
= sel
->flags
;
1328 /* Now check to see if the user has specified some command line
1329 switch that require certain abilities from the cpu. */
1332 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1334 sought
|= (FL_THUMB
| FL_MODE32
);
1336 /* There are no ARM processors that support both APCS-26 and
1337 interworking. Therefore we force FL_MODE26 to be removed
1338 from insn_flags here (if it was set), so that the search
1339 below will always be able to find a compatible processor. */
1340 insn_flags
&= ~FL_MODE26
;
1343 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1345 /* Try to locate a CPU type that supports all of the abilities
1346 of the default CPU, plus the extra abilities requested by
1348 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1349 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1352 if (sel
->name
== NULL
)
1354 unsigned current_bit_count
= 0;
1355 const struct processors
* best_fit
= NULL
;
1357 /* Ideally we would like to issue an error message here
1358 saying that it was not possible to find a CPU compatible
1359 with the default CPU, but which also supports the command
1360 line options specified by the programmer, and so they
1361 ought to use the -mcpu=<name> command line option to
1362 override the default CPU type.
1364 If we cannot find a cpu that has both the
1365 characteristics of the default cpu and the given
1366 command line options we scan the array again looking
1367 for a best match. */
1368 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1369 if ((sel
->flags
& sought
) == sought
)
1373 count
= bit_count (sel
->flags
& insn_flags
);
1375 if (count
>= current_bit_count
)
1378 current_bit_count
= count
;
1382 gcc_assert (best_fit
);
1386 arm_selected_cpu
= sel
;
1390 gcc_assert (arm_selected_cpu
);
1391 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1392 if (!arm_selected_tune
)
1393 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
1395 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
1396 insn_flags
= arm_selected_cpu
->flags
;
1398 arm_tune
= arm_selected_tune
->core
;
1399 tune_flags
= arm_selected_tune
->flags
;
1400 current_tune
= arm_selected_tune
->tune
;
1402 /* Make sure that the processor choice does not conflict with any of the
1403 other command line choices. */
1404 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
1405 error ("target CPU does not support ARM mode");
1407 /* BPABI targets use linker tricks to allow interworking on cores
1408 without thumb support. */
1409 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
1411 warning (0, "target CPU does not support interworking" );
1412 target_flags
&= ~MASK_INTERWORK
;
1415 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
1417 warning (0, "target CPU does not support THUMB instructions");
1418 target_flags
&= ~MASK_THUMB
;
1421 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
1423 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1424 target_flags
&= ~MASK_APCS_FRAME
;
1427 /* Callee super interworking implies thumb interworking. Adding
1428 this to the flags here simplifies the logic elsewhere. */
1429 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
1430 target_flags
|= MASK_INTERWORK
;
1432 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1433 from here where no function is being compiled currently. */
1434 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
1435 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1437 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
1438 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1440 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
1442 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1443 target_flags
|= MASK_APCS_FRAME
;
1446 if (TARGET_POKE_FUNCTION_NAME
)
1447 target_flags
|= MASK_APCS_FRAME
;
1449 if (TARGET_APCS_REENT
&& flag_pic
)
1450 error ("-fpic and -mapcs-reent are incompatible");
1452 if (TARGET_APCS_REENT
)
1453 warning (0, "APCS reentrant code not supported. Ignored");
1455 /* If this target is normally configured to use APCS frames, warn if they
1456 are turned off and debugging is turned on. */
1458 && write_symbols
!= NO_DEBUG
1459 && !TARGET_APCS_FRAME
1460 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
1461 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1463 if (TARGET_APCS_FLOAT
)
1464 warning (0, "passing floating point arguments in fp regs not yet supported");
1466 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1467 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
1468 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
1469 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
1470 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
1471 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
1472 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
1473 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
1474 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
1475 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
1476 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
1477 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
1478 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
1479 arm_arch_cirrus
= (insn_flags
& FL_CIRRUS
) != 0;
1481 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
1482 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
1483 thumb_code
= TARGET_ARM
== 0;
1484 thumb1_code
= TARGET_THUMB1
!= 0;
1485 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
1486 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
1487 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
1488 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
1489 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
1490 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
1492 /* If we are not using the default (ARM mode) section anchor offset
1493 ranges, then set the correct ranges now. */
1496 /* Thumb-1 LDR instructions cannot have negative offsets.
1497 Permissible positive offset ranges are 5-bit (for byte loads),
1498 6-bit (for halfword loads), or 7-bit (for word loads).
1499 Empirical results suggest a 7-bit anchor range gives the best
1500 overall code size. */
1501 targetm
.min_anchor_offset
= 0;
1502 targetm
.max_anchor_offset
= 127;
1504 else if (TARGET_THUMB2
)
1506 /* The minimum is set such that the total size of the block
1507 for a particular anchor is 248 + 1 + 4095 bytes, which is
1508 divisible by eight, ensuring natural spacing of anchors. */
1509 targetm
.min_anchor_offset
= -248;
1510 targetm
.max_anchor_offset
= 4095;
1513 /* V5 code we generate is completely interworking capable, so we turn off
1514 TARGET_INTERWORK here to avoid many tests later on. */
1516 /* XXX However, we must pass the right pre-processor defines to CPP
1517 or GLD can get confused. This is a hack. */
1518 if (TARGET_INTERWORK
)
1519 arm_cpp_interwork
= 1;
1522 target_flags
&= ~MASK_INTERWORK
;
1524 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
1525 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1527 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
1528 error ("iwmmxt abi requires an iwmmxt capable cpu");
1530 if (!global_options_set
.x_arm_fpu_index
)
1532 const char *target_fpu_name
;
1535 #ifdef FPUTYPE_DEFAULT
1536 target_fpu_name
= FPUTYPE_DEFAULT
;
1538 if (arm_arch_cirrus
)
1539 target_fpu_name
= "maverick";
1541 target_fpu_name
= "fpe2";
1544 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
1549 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
1551 switch (arm_fpu_desc
->model
)
1553 case ARM_FP_MODEL_FPA
:
1554 if (arm_fpu_desc
->rev
== 2)
1555 arm_fpu_attr
= FPU_FPE2
;
1556 else if (arm_fpu_desc
->rev
== 3)
1557 arm_fpu_attr
= FPU_FPE3
;
1559 arm_fpu_attr
= FPU_FPA
;
1562 case ARM_FP_MODEL_MAVERICK
:
1563 arm_fpu_attr
= FPU_MAVERICK
;
1566 case ARM_FP_MODEL_VFP
:
1567 arm_fpu_attr
= FPU_VFP
;
1574 if (TARGET_AAPCS_BASED
1575 && (arm_fpu_desc
->model
== ARM_FP_MODEL_FPA
))
1576 error ("FPA is unsupported in the AAPCS");
1578 if (TARGET_AAPCS_BASED
)
1580 if (TARGET_CALLER_INTERWORKING
)
1581 error ("AAPCS does not support -mcaller-super-interworking");
1583 if (TARGET_CALLEE_INTERWORKING
)
1584 error ("AAPCS does not support -mcallee-super-interworking");
1587 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1588 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1589 will ever exist. GCC makes no attempt to support this combination. */
1590 if (TARGET_IWMMXT
&& !TARGET_SOFT_FLOAT
)
1591 sorry ("iWMMXt and hardware floating point");
1593 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1594 if (TARGET_THUMB2
&& TARGET_IWMMXT
)
1595 sorry ("Thumb-2 iWMMXt");
1597 /* __fp16 support currently assumes the core has ldrh. */
1598 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
1599 sorry ("__fp16 and no ldrh");
1601 /* If soft-float is specified then don't use FPU. */
1602 if (TARGET_SOFT_FLOAT
)
1603 arm_fpu_attr
= FPU_NONE
;
1605 if (TARGET_AAPCS_BASED
)
1607 if (arm_abi
== ARM_ABI_IWMMXT
)
1608 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
1609 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
1610 && TARGET_HARD_FLOAT
1612 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
1614 arm_pcs_default
= ARM_PCS_AAPCS
;
1618 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
1619 sorry ("-mfloat-abi=hard and VFP");
1621 if (arm_abi
== ARM_ABI_APCS
)
1622 arm_pcs_default
= ARM_PCS_APCS
;
1624 arm_pcs_default
= ARM_PCS_ATPCS
;
1627 /* For arm2/3 there is no need to do any scheduling if there is only
1628 a floating point emulator, or we are doing software floating-point. */
1629 if ((TARGET_SOFT_FLOAT
1630 || (TARGET_FPA
&& arm_fpu_desc
->rev
))
1631 && (tune_flags
& FL_MODE32
) == 0)
1632 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
1634 /* Use the cp15 method if it is available. */
1635 if (target_thread_pointer
== TP_AUTO
)
1637 if (arm_arch6k
&& !TARGET_THUMB1
)
1638 target_thread_pointer
= TP_CP15
;
1640 target_thread_pointer
= TP_SOFT
;
1643 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
1644 error ("can not use -mtp=cp15 with 16-bit Thumb");
1646 /* Override the default structure alignment for AAPCS ABI. */
1647 if (!global_options_set
.x_arm_structure_size_boundary
)
1649 if (TARGET_AAPCS_BASED
)
1650 arm_structure_size_boundary
= 8;
1654 if (arm_structure_size_boundary
!= 8
1655 && arm_structure_size_boundary
!= 32
1656 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
1658 if (ARM_DOUBLEWORD_ALIGN
)
1660 "structure size boundary can only be set to 8, 32 or 64");
1662 warning (0, "structure size boundary can only be set to 8 or 32");
1663 arm_structure_size_boundary
1664 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
1668 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
1670 error ("RTP PIC is incompatible with Thumb");
1674 /* If stack checking is disabled, we can use r10 as the PIC register,
1675 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1676 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
1678 if (TARGET_VXWORKS_RTP
)
1679 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1680 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
1683 if (flag_pic
&& TARGET_VXWORKS_RTP
)
1684 arm_pic_register
= 9;
1686 if (arm_pic_register_string
!= NULL
)
1688 int pic_register
= decode_reg_name (arm_pic_register_string
);
1691 warning (0, "-mpic-register= is useless without -fpic");
1693 /* Prevent the user from choosing an obviously stupid PIC register. */
1694 else if (pic_register
< 0 || call_used_regs
[pic_register
]
1695 || pic_register
== HARD_FRAME_POINTER_REGNUM
1696 || pic_register
== STACK_POINTER_REGNUM
1697 || pic_register
>= PC_REGNUM
1698 || (TARGET_VXWORKS_RTP
1699 && (unsigned int) pic_register
!= arm_pic_register
))
1700 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
1702 arm_pic_register
= pic_register
;
1705 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1706 if (fix_cm3_ldrd
== 2)
1708 if (arm_selected_cpu
->core
== cortexm3
)
1714 if (TARGET_THUMB1
&& flag_schedule_insns
)
1716 /* Don't warn since it's on by default in -O2. */
1717 flag_schedule_insns
= 0;
1722 /* If optimizing for size, bump the number of instructions that we
1723 are prepared to conditionally execute (even on a StrongARM). */
1724 max_insns_skipped
= 6;
1728 /* StrongARM has early execution of branches, so a sequence
1729 that is worth skipping is shorter. */
1730 if (arm_tune_strongarm
)
1731 max_insns_skipped
= 3;
1734 /* Hot/Cold partitioning is not currently supported, since we can't
1735 handle literal pool placement in that case. */
1736 if (flag_reorder_blocks_and_partition
)
1738 inform (input_location
,
1739 "-freorder-blocks-and-partition not supported on this architecture");
1740 flag_reorder_blocks_and_partition
= 0;
1741 flag_reorder_blocks
= 1;
1745 /* Hoisting PIC address calculations more aggressively provides a small,
1746 but measurable, size reduction for PIC code. Therefore, we decrease
1747 the bar for unrestricted expression hoisting to the cost of PIC address
1748 calculation, which is 2 instructions. */
1749 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
1750 global_options
.x_param_values
,
1751 global_options_set
.x_param_values
);
1753 /* ARM EABI defaults to strict volatile bitfields. */
1754 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0)
1755 flag_strict_volatile_bitfields
= 1;
1757 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1758 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1759 if (flag_prefetch_loop_arrays
< 0
1762 && current_tune
->num_prefetch_slots
> 0)
1763 flag_prefetch_loop_arrays
= 1;
1765 /* Set up parameters to be used in prefetching algorithm. Do not override the
1766 defaults unless we are tuning for a core we have researched values for. */
1767 if (current_tune
->num_prefetch_slots
> 0)
1768 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
1769 current_tune
->num_prefetch_slots
,
1770 global_options
.x_param_values
,
1771 global_options_set
.x_param_values
);
1772 if (current_tune
->l1_cache_line_size
>= 0)
1773 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
1774 current_tune
->l1_cache_line_size
,
1775 global_options
.x_param_values
,
1776 global_options_set
.x_param_values
);
1777 if (current_tune
->l1_cache_size
>= 0)
1778 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
1779 current_tune
->l1_cache_size
,
1780 global_options
.x_param_values
,
1781 global_options_set
.x_param_values
);
1783 /* Register global variables with the garbage collector. */
1784 arm_add_gc_roots ();
1788 arm_add_gc_roots (void)
1790 gcc_obstack_init(&minipool_obstack
);
1791 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
1794 /* A table of known ARM exception types.
1795 For use with the interrupt function attribute. */
1799 const char *const arg
;
1800 const unsigned long return_value
;
1804 static const isr_attribute_arg isr_attribute_args
[] =
1806 { "IRQ", ARM_FT_ISR
},
1807 { "irq", ARM_FT_ISR
},
1808 { "FIQ", ARM_FT_FIQ
},
1809 { "fiq", ARM_FT_FIQ
},
1810 { "ABORT", ARM_FT_ISR
},
1811 { "abort", ARM_FT_ISR
},
1812 { "ABORT", ARM_FT_ISR
},
1813 { "abort", ARM_FT_ISR
},
1814 { "UNDEF", ARM_FT_EXCEPTION
},
1815 { "undef", ARM_FT_EXCEPTION
},
1816 { "SWI", ARM_FT_EXCEPTION
},
1817 { "swi", ARM_FT_EXCEPTION
},
1818 { NULL
, ARM_FT_NORMAL
}
1821 /* Returns the (interrupt) function type of the current
1822 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1824 static unsigned long
1825 arm_isr_value (tree argument
)
1827 const isr_attribute_arg
* ptr
;
1831 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
1833 /* No argument - default to IRQ. */
1834 if (argument
== NULL_TREE
)
1837 /* Get the value of the argument. */
1838 if (TREE_VALUE (argument
) == NULL_TREE
1839 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
1840 return ARM_FT_UNKNOWN
;
1842 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
1844 /* Check it against the list of known arguments. */
1845 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
1846 if (streq (arg
, ptr
->arg
))
1847 return ptr
->return_value
;
1849 /* An unrecognized interrupt type. */
1850 return ARM_FT_UNKNOWN
;
1853 /* Computes the type of the current function. */
1855 static unsigned long
1856 arm_compute_func_type (void)
1858 unsigned long type
= ARM_FT_UNKNOWN
;
1862 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
1864 /* Decide if the current function is volatile. Such functions
1865 never return, and many memory cycles can be saved by not storing
1866 register values that will never be needed again. This optimization
1867 was added to speed up context switching in a kernel application. */
1869 && (TREE_NOTHROW (current_function_decl
)
1870 || !(flag_unwind_tables
1872 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
1873 && TREE_THIS_VOLATILE (current_function_decl
))
1874 type
|= ARM_FT_VOLATILE
;
1876 if (cfun
->static_chain_decl
!= NULL
)
1877 type
|= ARM_FT_NESTED
;
1879 attr
= DECL_ATTRIBUTES (current_function_decl
);
1881 a
= lookup_attribute ("naked", attr
);
1883 type
|= ARM_FT_NAKED
;
1885 a
= lookup_attribute ("isr", attr
);
1887 a
= lookup_attribute ("interrupt", attr
);
1890 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
1892 type
|= arm_isr_value (TREE_VALUE (a
));
1897 /* Returns the type of the current function. */
1900 arm_current_func_type (void)
1902 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
1903 cfun
->machine
->func_type
= arm_compute_func_type ();
1905 return cfun
->machine
->func_type
;
1909 arm_allocate_stack_slots_for_args (void)
1911 /* Naked functions should not allocate stack slots for arguments. */
1912 return !IS_NAKED (arm_current_func_type ());
1916 /* Output assembler code for a block containing the constant parts
1917 of a trampoline, leaving space for the variable parts.
1919 On the ARM, (if r8 is the static chain regnum, and remembering that
1920 referencing pc adds an offset of 8) the trampoline looks like:
1923 .word static chain value
1924 .word function's address
1925 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
1928 arm_asm_trampoline_template (FILE *f
)
1932 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
1933 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
1935 else if (TARGET_THUMB2
)
1937 /* The Thumb-2 trampoline is similar to the arm implementation.
1938 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
1939 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
1940 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
1941 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
1945 ASM_OUTPUT_ALIGN (f
, 2);
1946 fprintf (f
, "\t.code\t16\n");
1947 fprintf (f
, ".Ltrampoline_start:\n");
1948 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
1949 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
1950 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
1951 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
1952 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
1953 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
1955 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
1956 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
1959 /* Emit RTL insns to initialize the variable parts of a trampoline. */
1962 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
1964 rtx fnaddr
, mem
, a_tramp
;
1966 emit_block_move (m_tramp
, assemble_trampoline_template (),
1967 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
1969 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
1970 emit_move_insn (mem
, chain_value
);
1972 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
1973 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
1974 emit_move_insn (mem
, fnaddr
);
1976 a_tramp
= XEXP (m_tramp
, 0);
1977 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
1978 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
1979 plus_constant (a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
1982 /* Thumb trampolines should be entered in thumb mode, so set
1983 the bottom bit of the address. */
1986 arm_trampoline_adjust_address (rtx addr
)
1989 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
1990 NULL
, 0, OPTAB_LIB_WIDEN
);
1994 /* Return 1 if it is possible to return using a single instruction.
1995 If SIBLING is non-null, this is a test for a return before a sibling
1996 call. SIBLING is the call insn, so we can examine its register usage. */
1999 use_return_insn (int iscond
, rtx sibling
)
2002 unsigned int func_type
;
2003 unsigned long saved_int_regs
;
2004 unsigned HOST_WIDE_INT stack_adjust
;
2005 arm_stack_offsets
*offsets
;
2007 /* Never use a return instruction before reload has run. */
2008 if (!reload_completed
)
2011 func_type
= arm_current_func_type ();
2013 /* Naked, volatile and stack alignment functions need special
2015 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
2018 /* So do interrupt functions that use the frame pointer and Thumb
2019 interrupt functions. */
2020 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
2023 offsets
= arm_get_frame_offsets ();
2024 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
2026 /* As do variadic functions. */
2027 if (crtl
->args
.pretend_args_size
2028 || cfun
->machine
->uses_anonymous_args
2029 /* Or if the function calls __builtin_eh_return () */
2030 || crtl
->calls_eh_return
2031 /* Or if the function calls alloca */
2032 || cfun
->calls_alloca
2033 /* Or if there is a stack adjustment. However, if the stack pointer
2034 is saved on the stack, we can use a pre-incrementing stack load. */
2035 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
2036 && stack_adjust
== 4)))
2039 saved_int_regs
= offsets
->saved_regs_mask
;
2041 /* Unfortunately, the insn
2043 ldmib sp, {..., sp, ...}
2045 triggers a bug on most SA-110 based devices, such that the stack
2046 pointer won't be correctly restored if the instruction takes a
2047 page fault. We work around this problem by popping r3 along with
2048 the other registers, since that is never slower than executing
2049 another instruction.
2051 We test for !arm_arch5 here, because code for any architecture
2052 less than this could potentially be run on one of the buggy
2054 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
2056 /* Validate that r3 is a call-clobbered register (always true in
2057 the default abi) ... */
2058 if (!call_used_regs
[3])
2061 /* ... that it isn't being used for a return value ... */
2062 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
2065 /* ... or for a tail-call argument ... */
2068 gcc_assert (GET_CODE (sibling
) == CALL_INSN
);
2070 if (find_regno_fusage (sibling
, USE
, 3))
2074 /* ... and that there are no call-saved registers in r0-r2
2075 (always true in the default ABI). */
2076 if (saved_int_regs
& 0x7)
2080 /* Can't be done if interworking with Thumb, and any registers have been
2082 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
2085 /* On StrongARM, conditional returns are expensive if they aren't
2086 taken and multiple registers have been stacked. */
2087 if (iscond
&& arm_tune_strongarm
)
2089 /* Conditional return when just the LR is stored is a simple
2090 conditional-load instruction, that's not expensive. */
2091 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
2095 && arm_pic_register
!= INVALID_REGNUM
2096 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
2100 /* If there are saved registers but the LR isn't saved, then we need
2101 two instructions for the return. */
2102 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
2105 /* Can't be done if any of the FPA regs are pushed,
2106 since this also requires an insn. */
2107 if (TARGET_HARD_FLOAT
&& TARGET_FPA
)
2108 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
2109 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2112 /* Likewise VFP regs. */
2113 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
2114 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
2115 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2118 if (TARGET_REALLY_IWMMXT
)
2119 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
2120 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2126 /* Return TRUE if int I is a valid immediate ARM constant. */
2129 const_ok_for_arm (HOST_WIDE_INT i
)
2133 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2134 be all zero, or all one. */
2135 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
2136 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
2137 != ((~(unsigned HOST_WIDE_INT
) 0)
2138 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
2141 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
2143 /* Fast return for 0 and small values. We must do this for zero, since
2144 the code below can't handle that one case. */
2145 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
2148 /* Get the number of trailing zeros. */
2149 lowbit
= ffs((int) i
) - 1;
2151 /* Only even shifts are allowed in ARM mode so round down to the
2152 nearest even number. */
2156 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
2161 /* Allow rotated constants in ARM mode. */
2163 && ((i
& ~0xc000003f) == 0
2164 || (i
& ~0xf000000f) == 0
2165 || (i
& ~0xfc000003) == 0))
2172 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2175 if (i
== v
|| i
== (v
| (v
<< 8)))
2178 /* Allow repeated pattern 0xXY00XY00. */
2188 /* Return true if I is a valid constant for the operation CODE. */
2190 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
2192 if (const_ok_for_arm (i
))
2198 /* See if we can use movw. */
2199 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
2223 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
2225 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
2231 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2235 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2242 /* Emit a sequence of insns to handle a large constant.
2243 CODE is the code of the operation required, it can be any of SET, PLUS,
2244 IOR, AND, XOR, MINUS;
2245 MODE is the mode in which the operation is being performed;
2246 VAL is the integer to operate on;
2247 SOURCE is the other operand (a register, or a null-pointer for SET);
2248 SUBTARGETS means it is safe to create scratch registers if that will
2249 either produce a simpler sequence, or we will want to cse the values.
2250 Return value is the number of insns emitted. */
2252 /* ??? Tweak this for thumb2. */
2254 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
2255 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
2259 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
2260 cond
= COND_EXEC_TEST (PATTERN (insn
));
2264 if (subtargets
|| code
== SET
2265 || (GET_CODE (target
) == REG
&& GET_CODE (source
) == REG
2266 && REGNO (target
) != REGNO (source
)))
2268 /* After arm_reorg has been called, we can't fix up expensive
2269 constants by pushing them into memory so we must synthesize
2270 them in-line, regardless of the cost. This is only likely to
2271 be more costly on chips that have load delay slots and we are
2272 compiling without running the scheduler (so no splitting
2273 occurred before the final instruction emission).
2275 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2277 if (!after_arm_reorg
2279 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
2281 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
2286 /* Currently SET is the only monadic value for CODE, all
2287 the rest are diadic. */
2288 if (TARGET_USE_MOVT
)
2289 arm_emit_movpair (target
, GEN_INT (val
));
2291 emit_set_insn (target
, GEN_INT (val
));
2297 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
2299 if (TARGET_USE_MOVT
)
2300 arm_emit_movpair (temp
, GEN_INT (val
));
2302 emit_set_insn (temp
, GEN_INT (val
));
2304 /* For MINUS, the value is subtracted from, since we never
2305 have subtraction of a constant. */
2307 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
2309 emit_set_insn (target
,
2310 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
2316 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
2320 /* Return the number of instructions required to synthesize the given
2321 constant, if we start emitting them from bit-position I. */
2323 count_insns_for_constant (HOST_WIDE_INT remainder
, int i
)
2325 HOST_WIDE_INT temp1
;
2326 int step_size
= TARGET_ARM
? 2 : 1;
2329 gcc_assert (TARGET_ARM
|| i
== 0);
2337 if (remainder
& (((1 << step_size
) - 1) << (i
- step_size
)))
2342 temp1
= remainder
& ((0x0ff << end
)
2343 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
2344 remainder
&= ~temp1
;
2349 } while (remainder
);
2354 find_best_start (unsigned HOST_WIDE_INT remainder
)
2356 int best_consecutive_zeros
= 0;
2360 /* If we aren't targetting ARM, the best place to start is always at
2365 for (i
= 0; i
< 32; i
+= 2)
2367 int consecutive_zeros
= 0;
2369 if (!(remainder
& (3 << i
)))
2371 while ((i
< 32) && !(remainder
& (3 << i
)))
2373 consecutive_zeros
+= 2;
2376 if (consecutive_zeros
> best_consecutive_zeros
)
2378 best_consecutive_zeros
= consecutive_zeros
;
2379 best_start
= i
- consecutive_zeros
;
2385 /* So long as it won't require any more insns to do so, it's
2386 desirable to emit a small constant (in bits 0...9) in the last
2387 insn. This way there is more chance that it can be combined with
2388 a later addressing insn to form a pre-indexed load or store
2389 operation. Consider:
2391 *((volatile int *)0xe0000100) = 1;
2392 *((volatile int *)0xe0000110) = 2;
2394 We want this to wind up as:
2398 str rB, [rA, #0x100]
2400 str rB, [rA, #0x110]
2402 rather than having to synthesize both large constants from scratch.
2404 Therefore, we calculate how many insns would be required to emit
2405 the constant starting from `best_start', and also starting from
2406 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2407 yield a shorter sequence, we may as well use zero. */
2409 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < remainder
)
2410 && (count_insns_for_constant (remainder
, 0) <=
2411 count_insns_for_constant (remainder
, best_start
)))
2417 /* Emit an instruction with the indicated PATTERN. If COND is
2418 non-NULL, conditionalize the execution of the instruction on COND
2422 emit_constant_insn (rtx cond
, rtx pattern
)
2425 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
2426 emit_insn (pattern
);
2429 /* As above, but extra parameter GENERATE which, if clear, suppresses
2431 /* ??? This needs more work for thumb2. */
2434 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
2435 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
2440 int final_invert
= 0;
2442 int num_bits_set
= 0;
2443 int set_sign_bit_copies
= 0;
2444 int clear_sign_bit_copies
= 0;
2445 int clear_zero_bit_copies
= 0;
2446 int set_zero_bit_copies
= 0;
2448 unsigned HOST_WIDE_INT temp1
, temp2
;
2449 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
2450 int step_size
= TARGET_ARM
? 2 : 1;
2452 /* Find out which operations are safe for a given CODE. Also do a quick
2453 check for degenerate cases; these can occur when DImode operations
2467 if (remainder
== 0xffffffff)
2470 emit_constant_insn (cond
,
2471 gen_rtx_SET (VOIDmode
, target
,
2472 GEN_INT (ARM_SIGN_EXTEND (val
))));
2478 if (reload_completed
&& rtx_equal_p (target
, source
))
2482 emit_constant_insn (cond
,
2483 gen_rtx_SET (VOIDmode
, target
, source
));
2492 emit_constant_insn (cond
,
2493 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
2496 if (remainder
== 0xffffffff)
2498 if (reload_completed
&& rtx_equal_p (target
, source
))
2501 emit_constant_insn (cond
,
2502 gen_rtx_SET (VOIDmode
, target
, source
));
2511 if (reload_completed
&& rtx_equal_p (target
, source
))
2514 emit_constant_insn (cond
,
2515 gen_rtx_SET (VOIDmode
, target
, source
));
2519 if (remainder
== 0xffffffff)
2522 emit_constant_insn (cond
,
2523 gen_rtx_SET (VOIDmode
, target
,
2524 gen_rtx_NOT (mode
, source
)));
2530 /* We treat MINUS as (val - source), since (source - val) is always
2531 passed as (source + (-val)). */
2535 emit_constant_insn (cond
,
2536 gen_rtx_SET (VOIDmode
, target
,
2537 gen_rtx_NEG (mode
, source
)));
2540 if (const_ok_for_arm (val
))
2543 emit_constant_insn (cond
,
2544 gen_rtx_SET (VOIDmode
, target
,
2545 gen_rtx_MINUS (mode
, GEN_INT (val
),
2557 /* If we can do it in one insn get out quickly. */
2558 if (const_ok_for_op (val
, code
))
2561 emit_constant_insn (cond
,
2562 gen_rtx_SET (VOIDmode
, target
,
2564 ? gen_rtx_fmt_ee (code
, mode
, source
,
2570 /* Calculate a few attributes that may be useful for specific
2572 /* Count number of leading zeros. */
2573 for (i
= 31; i
>= 0; i
--)
2575 if ((remainder
& (1 << i
)) == 0)
2576 clear_sign_bit_copies
++;
2581 /* Count number of leading 1's. */
2582 for (i
= 31; i
>= 0; i
--)
2584 if ((remainder
& (1 << i
)) != 0)
2585 set_sign_bit_copies
++;
2590 /* Count number of trailing zero's. */
2591 for (i
= 0; i
<= 31; i
++)
2593 if ((remainder
& (1 << i
)) == 0)
2594 clear_zero_bit_copies
++;
2599 /* Count number of trailing 1's. */
2600 for (i
= 0; i
<= 31; i
++)
2602 if ((remainder
& (1 << i
)) != 0)
2603 set_zero_bit_copies
++;
2611 /* See if we can do this by sign_extending a constant that is known
2612 to be negative. This is a good, way of doing it, since the shift
2613 may well merge into a subsequent insn. */
2614 if (set_sign_bit_copies
> 1)
2616 if (const_ok_for_arm
2617 (temp1
= ARM_SIGN_EXTEND (remainder
2618 << (set_sign_bit_copies
- 1))))
2622 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2623 emit_constant_insn (cond
,
2624 gen_rtx_SET (VOIDmode
, new_src
,
2626 emit_constant_insn (cond
,
2627 gen_ashrsi3 (target
, new_src
,
2628 GEN_INT (set_sign_bit_copies
- 1)));
2632 /* For an inverted constant, we will need to set the low bits,
2633 these will be shifted out of harm's way. */
2634 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
2635 if (const_ok_for_arm (~temp1
))
2639 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2640 emit_constant_insn (cond
,
2641 gen_rtx_SET (VOIDmode
, new_src
,
2643 emit_constant_insn (cond
,
2644 gen_ashrsi3 (target
, new_src
,
2645 GEN_INT (set_sign_bit_copies
- 1)));
2651 /* See if we can calculate the value as the difference between two
2652 valid immediates. */
2653 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
2655 int topshift
= clear_sign_bit_copies
& ~1;
2657 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
2658 & (0xff000000 >> topshift
));
2660 /* If temp1 is zero, then that means the 9 most significant
2661 bits of remainder were 1 and we've caused it to overflow.
2662 When topshift is 0 we don't need to do anything since we
2663 can borrow from 'bit 32'. */
2664 if (temp1
== 0 && topshift
!= 0)
2665 temp1
= 0x80000000 >> (topshift
- 1);
2667 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
2669 if (const_ok_for_arm (temp2
))
2673 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2674 emit_constant_insn (cond
,
2675 gen_rtx_SET (VOIDmode
, new_src
,
2677 emit_constant_insn (cond
,
2678 gen_addsi3 (target
, new_src
,
2686 /* See if we can generate this by setting the bottom (or the top)
2687 16 bits, and then shifting these into the other half of the
2688 word. We only look for the simplest cases, to do more would cost
2689 too much. Be careful, however, not to generate this when the
2690 alternative would take fewer insns. */
2691 if (val
& 0xffff0000)
2693 temp1
= remainder
& 0xffff0000;
2694 temp2
= remainder
& 0x0000ffff;
2696 /* Overlaps outside this range are best done using other methods. */
2697 for (i
= 9; i
< 24; i
++)
2699 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
2700 && !const_ok_for_arm (temp2
))
2702 rtx new_src
= (subtargets
2703 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2705 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
2706 source
, subtargets
, generate
);
2714 gen_rtx_ASHIFT (mode
, source
,
2721 /* Don't duplicate cases already considered. */
2722 for (i
= 17; i
< 24; i
++)
2724 if (((temp1
| (temp1
>> i
)) == remainder
)
2725 && !const_ok_for_arm (temp1
))
2727 rtx new_src
= (subtargets
2728 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2730 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
2731 source
, subtargets
, generate
);
2736 gen_rtx_SET (VOIDmode
, target
,
2739 gen_rtx_LSHIFTRT (mode
, source
,
2750 /* If we have IOR or XOR, and the constant can be loaded in a
2751 single instruction, and we can find a temporary to put it in,
2752 then this can be done in two instructions instead of 3-4. */
2754 /* TARGET can't be NULL if SUBTARGETS is 0 */
2755 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
2757 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
2761 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2763 emit_constant_insn (cond
,
2764 gen_rtx_SET (VOIDmode
, sub
,
2766 emit_constant_insn (cond
,
2767 gen_rtx_SET (VOIDmode
, target
,
2768 gen_rtx_fmt_ee (code
, mode
,
2779 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2780 and the remainder 0s for e.g. 0xfff00000)
2781 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2783 This can be done in 2 instructions by using shifts with mov or mvn.
2788 mvn r0, r0, lsr #12 */
2789 if (set_sign_bit_copies
> 8
2790 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
2794 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2795 rtx shift
= GEN_INT (set_sign_bit_copies
);
2799 gen_rtx_SET (VOIDmode
, sub
,
2801 gen_rtx_ASHIFT (mode
,
2806 gen_rtx_SET (VOIDmode
, target
,
2808 gen_rtx_LSHIFTRT (mode
, sub
,
2815 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2817 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2819 For eg. r0 = r0 | 0xfff
2824 if (set_zero_bit_copies
> 8
2825 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
2829 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2830 rtx shift
= GEN_INT (set_zero_bit_copies
);
2834 gen_rtx_SET (VOIDmode
, sub
,
2836 gen_rtx_LSHIFTRT (mode
,
2841 gen_rtx_SET (VOIDmode
, target
,
2843 gen_rtx_ASHIFT (mode
, sub
,
2849 /* This will never be reached for Thumb2 because orn is a valid
2850 instruction. This is for Thumb1 and the ARM 32 bit cases.
2852 x = y | constant (such that ~constant is a valid constant)
2854 x = ~(~y & ~constant).
2856 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
2860 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2861 emit_constant_insn (cond
,
2862 gen_rtx_SET (VOIDmode
, sub
,
2863 gen_rtx_NOT (mode
, source
)));
2866 sub
= gen_reg_rtx (mode
);
2867 emit_constant_insn (cond
,
2868 gen_rtx_SET (VOIDmode
, sub
,
2869 gen_rtx_AND (mode
, source
,
2871 emit_constant_insn (cond
,
2872 gen_rtx_SET (VOIDmode
, target
,
2873 gen_rtx_NOT (mode
, sub
)));
2880 /* See if two shifts will do 2 or more insn's worth of work. */
2881 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
2883 HOST_WIDE_INT shift_mask
= ((0xffffffff
2884 << (32 - clear_sign_bit_copies
))
2887 if ((remainder
| shift_mask
) != 0xffffffff)
2891 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2892 insns
= arm_gen_constant (AND
, mode
, cond
,
2893 remainder
| shift_mask
,
2894 new_src
, source
, subtargets
, 1);
2899 rtx targ
= subtargets
? NULL_RTX
: target
;
2900 insns
= arm_gen_constant (AND
, mode
, cond
,
2901 remainder
| shift_mask
,
2902 targ
, source
, subtargets
, 0);
2908 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2909 rtx shift
= GEN_INT (clear_sign_bit_copies
);
2911 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
2912 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
2918 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
2920 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
2922 if ((remainder
| shift_mask
) != 0xffffffff)
2926 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2928 insns
= arm_gen_constant (AND
, mode
, cond
,
2929 remainder
| shift_mask
,
2930 new_src
, source
, subtargets
, 1);
2935 rtx targ
= subtargets
? NULL_RTX
: target
;
2937 insns
= arm_gen_constant (AND
, mode
, cond
,
2938 remainder
| shift_mask
,
2939 targ
, source
, subtargets
, 0);
2945 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2946 rtx shift
= GEN_INT (clear_zero_bit_copies
);
2948 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
2949 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
2961 for (i
= 0; i
< 32; i
++)
2962 if (remainder
& (1 << i
))
2965 if ((code
== AND
) || (can_invert
&& num_bits_set
> 16))
2966 remainder
^= 0xffffffff;
2967 else if (code
== PLUS
&& num_bits_set
> 16)
2968 remainder
= (-remainder
) & 0xffffffff;
2970 /* For XOR, if more than half the bits are set and there's a sequence
2971 of more than 8 consecutive ones in the pattern then we can XOR by the
2972 inverted constant and then invert the final result; this may save an
2973 instruction and might also lead to the final mvn being merged with
2974 some other operation. */
2975 else if (code
== XOR
&& num_bits_set
> 16
2976 && (count_insns_for_constant (remainder
^ 0xffffffff,
2978 (remainder
^ 0xffffffff))
2979 < count_insns_for_constant (remainder
,
2980 find_best_start (remainder
))))
2982 remainder
^= 0xffffffff;
2991 /* Now try and find a way of doing the job in either two or three
2993 We start by looking for the largest block of zeros that are aligned on
2994 a 2-bit boundary, we then fill up the temps, wrapping around to the
2995 top of the word when we drop off the bottom.
2996 In the worst case this code should produce no more than four insns.
2997 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2998 best place to start. */
3000 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3003 /* Now start emitting the insns. */
3004 i
= find_best_start (remainder
);
3011 if (remainder
& (3 << (i
- 2)))
3016 temp1
= remainder
& ((0x0ff << end
)
3017 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
3018 remainder
&= ~temp1
;
3022 rtx new_src
, temp1_rtx
;
3024 if (code
== SET
|| code
== MINUS
)
3026 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
3027 if (can_invert
&& code
!= MINUS
)
3032 if ((final_invert
|| remainder
) && subtargets
)
3033 new_src
= gen_reg_rtx (mode
);
3038 else if (can_negate
)
3042 temp1
= trunc_int_for_mode (temp1
, mode
);
3043 temp1_rtx
= GEN_INT (temp1
);
3047 else if (code
== MINUS
)
3048 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
3050 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
3052 emit_constant_insn (cond
,
3053 gen_rtx_SET (VOIDmode
, new_src
,
3063 else if (code
== MINUS
)
3069 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3079 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
3080 gen_rtx_NOT (mode
, source
)));
3087 /* Canonicalize a comparison so that we are more likely to recognize it.
3088 This can be done for a few constant compares, where we can make the
3089 immediate value easier to load. */
3092 arm_canonicalize_comparison (enum rtx_code code
, rtx
*op0
, rtx
*op1
)
3094 enum machine_mode mode
;
3095 unsigned HOST_WIDE_INT i
, maxval
;
3097 mode
= GET_MODE (*op0
);
3098 if (mode
== VOIDmode
)
3099 mode
= GET_MODE (*op1
);
3101 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
3103 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3104 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3105 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3106 for GTU/LEU in Thumb mode. */
3111 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3113 if (TARGET_ARM
&& TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
3116 if (code
== GT
|| code
== LE
3117 || (!TARGET_ARM
&& (code
== GTU
|| code
== LEU
)))
3119 /* Missing comparison. First try to use an available
3121 if (GET_CODE (*op1
) == CONST_INT
)
3129 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3131 *op1
= GEN_INT (i
+ 1);
3132 return code
== GT
? GE
: LT
;
3137 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3138 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3140 *op1
= GEN_INT (i
+ 1);
3141 return code
== GTU
? GEU
: LTU
;
3149 /* If that did not work, reverse the condition. */
3153 return swap_condition (code
);
3159 /* Comparisons smaller than DImode. Only adjust comparisons against
3160 an out-of-range constant. */
3161 if (GET_CODE (*op1
) != CONST_INT
3162 || const_ok_for_arm (INTVAL (*op1
))
3163 || const_ok_for_arm (- INTVAL (*op1
)))
3177 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3179 *op1
= GEN_INT (i
+ 1);
3180 return code
== GT
? GE
: LT
;
3187 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3189 *op1
= GEN_INT (i
- 1);
3190 return code
== GE
? GT
: LE
;
3196 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3197 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3199 *op1
= GEN_INT (i
+ 1);
3200 return code
== GTU
? GEU
: LTU
;
3207 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3209 *op1
= GEN_INT (i
- 1);
3210 return code
== GEU
? GTU
: LEU
;
3222 /* Define how to find the value returned by a function. */
3225 arm_function_value(const_tree type
, const_tree func
,
3226 bool outgoing ATTRIBUTE_UNUSED
)
3228 enum machine_mode mode
;
3229 int unsignedp ATTRIBUTE_UNUSED
;
3230 rtx r ATTRIBUTE_UNUSED
;
3232 mode
= TYPE_MODE (type
);
3234 if (TARGET_AAPCS_BASED
)
3235 return aapcs_allocate_return_reg (mode
, type
, func
);
3237 /* Promote integer types. */
3238 if (INTEGRAL_TYPE_P (type
))
3239 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
3241 /* Promotes small structs returned in a register to full-word size
3242 for big-endian AAPCS. */
3243 if (arm_return_in_msb (type
))
3245 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3246 if (size
% UNITS_PER_WORD
!= 0)
3248 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
3249 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
3253 return LIBCALL_VALUE (mode
);
3257 libcall_eq (const void *p1
, const void *p2
)
3259 return rtx_equal_p ((const_rtx
) p1
, (const_rtx
) p2
);
3263 libcall_hash (const void *p1
)
3265 return hash_rtx ((const_rtx
) p1
, VOIDmode
, NULL
, NULL
, FALSE
);
3269 add_libcall (htab_t htab
, rtx libcall
)
3271 *htab_find_slot (htab
, libcall
, INSERT
) = libcall
;
3275 arm_libcall_uses_aapcs_base (const_rtx libcall
)
3277 static bool init_done
= false;
3278 static htab_t libcall_htab
;
3284 libcall_htab
= htab_create (31, libcall_hash
, libcall_eq
,
3286 add_libcall (libcall_htab
,
3287 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
3288 add_libcall (libcall_htab
,
3289 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
3290 add_libcall (libcall_htab
,
3291 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
3292 add_libcall (libcall_htab
,
3293 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
3295 add_libcall (libcall_htab
,
3296 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
3297 add_libcall (libcall_htab
,
3298 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
3299 add_libcall (libcall_htab
,
3300 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
3301 add_libcall (libcall_htab
,
3302 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
3304 add_libcall (libcall_htab
,
3305 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
3306 add_libcall (libcall_htab
,
3307 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
3308 add_libcall (libcall_htab
,
3309 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
3310 add_libcall (libcall_htab
,
3311 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
3312 add_libcall (libcall_htab
,
3313 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
3314 add_libcall (libcall_htab
,
3315 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
3318 return libcall
&& htab_find (libcall_htab
, libcall
) != NULL
;
3322 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
3324 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
3325 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
3327 /* The following libcalls return their result in integer registers,
3328 even though they return a floating point value. */
3329 if (arm_libcall_uses_aapcs_base (libcall
))
3330 return gen_rtx_REG (mode
, ARG_REGISTER(1));
3334 return LIBCALL_VALUE (mode
);
3337 /* Determine the amount of memory needed to store the possible return
3338 registers of an untyped call. */
3340 arm_apply_result_size (void)
3346 if (TARGET_HARD_FLOAT_ABI
)
3352 if (TARGET_MAVERICK
)
3355 if (TARGET_IWMMXT_ABI
)
3362 /* Decide whether TYPE should be returned in memory (true)
3363 or in a register (false). FNTYPE is the type of the function making
3366 arm_return_in_memory (const_tree type
, const_tree fntype
)
3370 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
3372 if (TARGET_AAPCS_BASED
)
3374 /* Simple, non-aggregate types (ie not including vectors and
3375 complex) are always returned in a register (or registers).
3376 We don't care about which register here, so we can short-cut
3377 some of the detail. */
3378 if (!AGGREGATE_TYPE_P (type
)
3379 && TREE_CODE (type
) != VECTOR_TYPE
3380 && TREE_CODE (type
) != COMPLEX_TYPE
)
3383 /* Any return value that is no larger than one word can be
3385 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
3388 /* Check any available co-processors to see if they accept the
3389 type as a register candidate (VFP, for example, can return
3390 some aggregates in consecutive registers). These aren't
3391 available if the call is variadic. */
3392 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
3395 /* Vector values should be returned using ARM registers, not
3396 memory (unless they're over 16 bytes, which will break since
3397 we only have four call-clobbered registers to play with). */
3398 if (TREE_CODE (type
) == VECTOR_TYPE
)
3399 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3401 /* The rest go in memory. */
3405 if (TREE_CODE (type
) == VECTOR_TYPE
)
3406 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3408 if (!AGGREGATE_TYPE_P (type
) &&
3409 (TREE_CODE (type
) != VECTOR_TYPE
))
3410 /* All simple types are returned in registers. */
3413 if (arm_abi
!= ARM_ABI_APCS
)
3415 /* ATPCS and later return aggregate types in memory only if they are
3416 larger than a word (or are variable size). */
3417 return (size
< 0 || size
> UNITS_PER_WORD
);
3420 /* For the arm-wince targets we choose to be compatible with Microsoft's
3421 ARM and Thumb compilers, which always return aggregates in memory. */
3423 /* All structures/unions bigger than one word are returned in memory.
3424 Also catch the case where int_size_in_bytes returns -1. In this case
3425 the aggregate is either huge or of variable size, and in either case
3426 we will want to return it via memory and not in a register. */
3427 if (size
< 0 || size
> UNITS_PER_WORD
)
3430 if (TREE_CODE (type
) == RECORD_TYPE
)
3434 /* For a struct the APCS says that we only return in a register
3435 if the type is 'integer like' and every addressable element
3436 has an offset of zero. For practical purposes this means
3437 that the structure can have at most one non bit-field element
3438 and that this element must be the first one in the structure. */
3440 /* Find the first field, ignoring non FIELD_DECL things which will
3441 have been created by C++. */
3442 for (field
= TYPE_FIELDS (type
);
3443 field
&& TREE_CODE (field
) != FIELD_DECL
;
3444 field
= DECL_CHAIN (field
))
3448 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3450 /* Check that the first field is valid for returning in a register. */
3452 /* ... Floats are not allowed */
3453 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3456 /* ... Aggregates that are not themselves valid for returning in
3457 a register are not allowed. */
3458 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3461 /* Now check the remaining fields, if any. Only bitfields are allowed,
3462 since they are not addressable. */
3463 for (field
= DECL_CHAIN (field
);
3465 field
= DECL_CHAIN (field
))
3467 if (TREE_CODE (field
) != FIELD_DECL
)
3470 if (!DECL_BIT_FIELD_TYPE (field
))
3477 if (TREE_CODE (type
) == UNION_TYPE
)
3481 /* Unions can be returned in registers if every element is
3482 integral, or can be returned in an integer register. */
3483 for (field
= TYPE_FIELDS (type
);
3485 field
= DECL_CHAIN (field
))
3487 if (TREE_CODE (field
) != FIELD_DECL
)
3490 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3493 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3499 #endif /* not ARM_WINCE */
3501 /* Return all other types in memory. */
3505 /* Indicate whether or not words of a double are in big-endian order. */
3508 arm_float_words_big_endian (void)
3510 if (TARGET_MAVERICK
)
3513 /* For FPA, float words are always big-endian. For VFP, floats words
3514 follow the memory system mode. */
3522 return (TARGET_BIG_END
? 1 : 0);
3527 const struct pcs_attribute_arg
3531 } pcs_attribute_args
[] =
3533 {"aapcs", ARM_PCS_AAPCS
},
3534 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
3536 /* We could recognize these, but changes would be needed elsewhere
3537 * to implement them. */
3538 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
3539 {"atpcs", ARM_PCS_ATPCS
},
3540 {"apcs", ARM_PCS_APCS
},
3542 {NULL
, ARM_PCS_UNKNOWN
}
3546 arm_pcs_from_attribute (tree attr
)
3548 const struct pcs_attribute_arg
*ptr
;
3551 /* Get the value of the argument. */
3552 if (TREE_VALUE (attr
) == NULL_TREE
3553 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
3554 return ARM_PCS_UNKNOWN
;
3556 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
3558 /* Check it against the list of known arguments. */
3559 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3560 if (streq (arg
, ptr
->arg
))
3563 /* An unrecognized interrupt type. */
3564 return ARM_PCS_UNKNOWN
;
3567 /* Get the PCS variant to use for this call. TYPE is the function's type
3568 specification, DECL is the specific declartion. DECL may be null if
3569 the call could be indirect or if this is a library call. */
3571 arm_get_pcs_model (const_tree type
, const_tree decl
)
3573 bool user_convention
= false;
3574 enum arm_pcs user_pcs
= arm_pcs_default
;
3579 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
3582 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
3583 user_convention
= true;
3586 if (TARGET_AAPCS_BASED
)
3588 /* Detect varargs functions. These always use the base rules
3589 (no argument is ever a candidate for a co-processor
3591 bool base_rules
= stdarg_p (type
);
3593 if (user_convention
)
3595 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
3596 sorry ("non-AAPCS derived PCS variant");
3597 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
3598 error ("variadic functions must use the base AAPCS variant");
3602 return ARM_PCS_AAPCS
;
3603 else if (user_convention
)
3605 else if (decl
&& flag_unit_at_a_time
)
3607 /* Local functions never leak outside this compilation unit,
3608 so we are free to use whatever conventions are
3610 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3611 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3613 return ARM_PCS_AAPCS_LOCAL
;
3616 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
3617 sorry ("PCS variant");
3619 /* For everything else we use the target's default. */
3620 return arm_pcs_default
;
3625 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
3626 const_tree fntype ATTRIBUTE_UNUSED
,
3627 rtx libcall ATTRIBUTE_UNUSED
,
3628 const_tree fndecl ATTRIBUTE_UNUSED
)
3630 /* Record the unallocated VFP registers. */
3631 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
3632 pcum
->aapcs_vfp_reg_alloc
= 0;
3635 /* Walk down the type tree of TYPE counting consecutive base elements.
3636 If *MODEP is VOIDmode, then set it to the first valid floating point
3637 type. If a non-floating point type is found, or if a floating point
3638 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3639 otherwise return the count in the sub-tree. */
3641 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
3643 enum machine_mode mode
;
3646 switch (TREE_CODE (type
))
3649 mode
= TYPE_MODE (type
);
3650 if (mode
!= DFmode
&& mode
!= SFmode
)
3653 if (*modep
== VOIDmode
)
3662 mode
= TYPE_MODE (TREE_TYPE (type
));
3663 if (mode
!= DFmode
&& mode
!= SFmode
)
3666 if (*modep
== VOIDmode
)
3675 /* Use V2SImode and V4SImode as representatives of all 64-bit
3676 and 128-bit vector types, whether or not those modes are
3677 supported with the present options. */
3678 size
= int_size_in_bytes (type
);
3691 if (*modep
== VOIDmode
)
3694 /* Vector modes are considered to be opaque: two vectors are
3695 equivalent for the purposes of being homogeneous aggregates
3696 if they are the same size. */
3705 tree index
= TYPE_DOMAIN (type
);
3707 /* Can't handle incomplete types. */
3708 if (!COMPLETE_TYPE_P(type
))
3711 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
3714 || !TYPE_MAX_VALUE (index
)
3715 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
3716 || !TYPE_MIN_VALUE (index
)
3717 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
3721 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
3722 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
3724 /* There must be no padding. */
3725 if (!host_integerp (TYPE_SIZE (type
), 1)
3726 || (tree_low_cst (TYPE_SIZE (type
), 1)
3727 != count
* GET_MODE_BITSIZE (*modep
)))
3739 /* Can't handle incomplete types. */
3740 if (!COMPLETE_TYPE_P(type
))
3743 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3745 if (TREE_CODE (field
) != FIELD_DECL
)
3748 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
3754 /* There must be no padding. */
3755 if (!host_integerp (TYPE_SIZE (type
), 1)
3756 || (tree_low_cst (TYPE_SIZE (type
), 1)
3757 != count
* GET_MODE_BITSIZE (*modep
)))
3764 case QUAL_UNION_TYPE
:
3766 /* These aren't very interesting except in a degenerate case. */
3771 /* Can't handle incomplete types. */
3772 if (!COMPLETE_TYPE_P(type
))
3775 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3777 if (TREE_CODE (field
) != FIELD_DECL
)
3780 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
3783 count
= count
> sub_count
? count
: sub_count
;
3786 /* There must be no padding. */
3787 if (!host_integerp (TYPE_SIZE (type
), 1)
3788 || (tree_low_cst (TYPE_SIZE (type
), 1)
3789 != count
* GET_MODE_BITSIZE (*modep
)))
3802 /* Return true if PCS_VARIANT should use VFP registers. */
3804 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
3806 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
3808 static bool seen_thumb1_vfp
= false;
3810 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
3812 sorry ("Thumb-1 hard-float VFP ABI");
3813 /* sorry() is not immediately fatal, so only display this once. */
3814 seen_thumb1_vfp
= true;
3820 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
3823 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
3824 (TARGET_VFP_DOUBLE
|| !is_double
));
3828 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
3829 enum machine_mode mode
, const_tree type
,
3830 enum machine_mode
*base_mode
, int *count
)
3832 enum machine_mode new_mode
= VOIDmode
;
3834 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
3835 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3836 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
3841 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
3844 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
3846 else if (type
&& (mode
== BLKmode
|| TREE_CODE (type
) == VECTOR_TYPE
))
3848 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
3850 if (ag_count
> 0 && ag_count
<= 4)
3859 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
3862 *base_mode
= new_mode
;
3867 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
3868 enum machine_mode mode
, const_tree type
)
3870 int count ATTRIBUTE_UNUSED
;
3871 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
3873 if (!use_vfp_abi (pcs_variant
, false))
3875 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
3880 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
3883 if (!use_vfp_abi (pcum
->pcs_variant
, false))
3886 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
3887 &pcum
->aapcs_vfp_rmode
,
3888 &pcum
->aapcs_vfp_rcount
);
3892 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
3893 const_tree type ATTRIBUTE_UNUSED
)
3895 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
3896 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
3899 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
3900 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
3902 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
3903 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
3906 int rcount
= pcum
->aapcs_vfp_rcount
;
3908 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
3912 /* Avoid using unsupported vector modes. */
3913 if (rmode
== V2SImode
)
3915 else if (rmode
== V4SImode
)
3922 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
3923 for (i
= 0; i
< rcount
; i
++)
3925 rtx tmp
= gen_rtx_REG (rmode
,
3926 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
3927 tmp
= gen_rtx_EXPR_LIST
3929 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
3930 XVECEXP (par
, 0, i
) = tmp
;
3933 pcum
->aapcs_reg
= par
;
3936 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
3943 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
3944 enum machine_mode mode
,
3945 const_tree type ATTRIBUTE_UNUSED
)
3947 if (!use_vfp_abi (pcs_variant
, false))
3950 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
3953 enum machine_mode ag_mode
;
3958 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
3963 if (ag_mode
== V2SImode
)
3965 else if (ag_mode
== V4SImode
)
3971 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
3972 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
3973 for (i
= 0; i
< count
; i
++)
3975 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
3976 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
3977 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
3978 XVECEXP (par
, 0, i
) = tmp
;
3984 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
3988 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
3989 enum machine_mode mode ATTRIBUTE_UNUSED
,
3990 const_tree type ATTRIBUTE_UNUSED
)
3992 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
3993 pcum
->aapcs_vfp_reg_alloc
= 0;
3997 #define AAPCS_CP(X) \
3999 aapcs_ ## X ## _cum_init, \
4000 aapcs_ ## X ## _is_call_candidate, \
4001 aapcs_ ## X ## _allocate, \
4002 aapcs_ ## X ## _is_return_candidate, \
4003 aapcs_ ## X ## _allocate_return_reg, \
4004 aapcs_ ## X ## _advance \
4007 /* Table of co-processors that can be used to pass arguments in
4008 registers. Idealy no arugment should be a candidate for more than
4009 one co-processor table entry, but the table is processed in order
4010 and stops after the first match. If that entry then fails to put
4011 the argument into a co-processor register, the argument will go on
4015 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4016 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
4018 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4019 BLKmode) is a candidate for this co-processor's registers; this
4020 function should ignore any position-dependent state in
4021 CUMULATIVE_ARGS and only use call-type dependent information. */
4022 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4024 /* Return true if the argument does get a co-processor register; it
4025 should set aapcs_reg to an RTX of the register allocated as is
4026 required for a return from FUNCTION_ARG. */
4027 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4029 /* Return true if a result of mode MODE (or type TYPE if MODE is
4030 BLKmode) is can be returned in this co-processor's registers. */
4031 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4033 /* Allocate and return an RTX element to hold the return type of a
4034 call, this routine must not fail and will only be called if
4035 is_return_candidate returned true with the same parameters. */
4036 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4038 /* Finish processing this argument and prepare to start processing
4040 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4041 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
4049 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4054 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4055 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
4062 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
4064 /* We aren't passed a decl, so we can't check that a call is local.
4065 However, it isn't clear that that would be a win anyway, since it
4066 might limit some tail-calling opportunities. */
4067 enum arm_pcs pcs_variant
;
4071 const_tree fndecl
= NULL_TREE
;
4073 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4076 fntype
= TREE_TYPE (fntype
);
4079 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4082 pcs_variant
= arm_pcs_default
;
4084 if (pcs_variant
!= ARM_PCS_AAPCS
)
4088 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4089 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
4098 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
4101 /* We aren't passed a decl, so we can't check that a call is local.
4102 However, it isn't clear that that would be a win anyway, since it
4103 might limit some tail-calling opportunities. */
4104 enum arm_pcs pcs_variant
;
4105 int unsignedp ATTRIBUTE_UNUSED
;
4109 const_tree fndecl
= NULL_TREE
;
4111 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4114 fntype
= TREE_TYPE (fntype
);
4117 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4120 pcs_variant
= arm_pcs_default
;
4122 /* Promote integer types. */
4123 if (type
&& INTEGRAL_TYPE_P (type
))
4124 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
4126 if (pcs_variant
!= ARM_PCS_AAPCS
)
4130 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4131 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
4133 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
4137 /* Promotes small structs returned in a register to full-word size
4138 for big-endian AAPCS. */
4139 if (type
&& arm_return_in_msb (type
))
4141 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4142 if (size
% UNITS_PER_WORD
!= 0)
4144 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4145 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4149 return gen_rtx_REG (mode
, R0_REGNUM
);
4153 aapcs_libcall_value (enum machine_mode mode
)
4155 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
4158 /* Lay out a function argument using the AAPCS rules. The rule
4159 numbers referred to here are those in the AAPCS. */
4161 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4162 const_tree type
, bool named
)
4167 /* We only need to do this once per argument. */
4168 if (pcum
->aapcs_arg_processed
)
4171 pcum
->aapcs_arg_processed
= true;
4173 /* Special case: if named is false then we are handling an incoming
4174 anonymous argument which is on the stack. */
4178 /* Is this a potential co-processor register candidate? */
4179 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4181 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
4182 pcum
->aapcs_cprc_slot
= slot
;
4184 /* We don't have to apply any of the rules from part B of the
4185 preparation phase, these are handled elsewhere in the
4190 /* A Co-processor register candidate goes either in its own
4191 class of registers or on the stack. */
4192 if (!pcum
->aapcs_cprc_failed
[slot
])
4194 /* C1.cp - Try to allocate the argument to co-processor
4196 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
4199 /* C2.cp - Put the argument on the stack and note that we
4200 can't assign any more candidates in this slot. We also
4201 need to note that we have allocated stack space, so that
4202 we won't later try to split a non-cprc candidate between
4203 core registers and the stack. */
4204 pcum
->aapcs_cprc_failed
[slot
] = true;
4205 pcum
->can_split
= false;
4208 /* We didn't get a register, so this argument goes on the
4210 gcc_assert (pcum
->can_split
== false);
4215 /* C3 - For double-word aligned arguments, round the NCRN up to the
4216 next even number. */
4217 ncrn
= pcum
->aapcs_ncrn
;
4218 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
4221 nregs
= ARM_NUM_REGS2(mode
, type
);
4223 /* Sigh, this test should really assert that nregs > 0, but a GCC
4224 extension allows empty structs and then gives them empty size; it
4225 then allows such a structure to be passed by value. For some of
4226 the code below we have to pretend that such an argument has
4227 non-zero size so that we 'locate' it correctly either in
4228 registers or on the stack. */
4229 gcc_assert (nregs
>= 0);
4231 nregs2
= nregs
? nregs
: 1;
4233 /* C4 - Argument fits entirely in core registers. */
4234 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
4236 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4237 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
4241 /* C5 - Some core registers left and there are no arguments already
4242 on the stack: split this argument between the remaining core
4243 registers and the stack. */
4244 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
4246 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4247 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4248 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
4252 /* C6 - NCRN is set to 4. */
4253 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4255 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4259 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4260 for a call to a function whose data type is FNTYPE.
4261 For a library call, FNTYPE is NULL. */
4263 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
4265 tree fndecl ATTRIBUTE_UNUSED
)
4267 /* Long call handling. */
4269 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4271 pcum
->pcs_variant
= arm_pcs_default
;
4273 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4275 if (arm_libcall_uses_aapcs_base (libname
))
4276 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
4278 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
4279 pcum
->aapcs_reg
= NULL_RTX
;
4280 pcum
->aapcs_partial
= 0;
4281 pcum
->aapcs_arg_processed
= false;
4282 pcum
->aapcs_cprc_slot
= -1;
4283 pcum
->can_split
= true;
4285 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4289 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4291 pcum
->aapcs_cprc_failed
[i
] = false;
4292 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
4300 /* On the ARM, the offset starts at 0. */
4302 pcum
->iwmmxt_nregs
= 0;
4303 pcum
->can_split
= true;
4305 /* Varargs vectors are treated the same as long long.
4306 named_count avoids having to change the way arm handles 'named' */
4307 pcum
->named_count
= 0;
4310 if (TARGET_REALLY_IWMMXT
&& fntype
)
4314 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
4316 fn_arg
= TREE_CHAIN (fn_arg
))
4317 pcum
->named_count
+= 1;
4319 if (! pcum
->named_count
)
4320 pcum
->named_count
= INT_MAX
;
4325 /* Return true if mode/type need doubleword alignment. */
4327 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
4329 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
4330 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
4334 /* Determine where to put an argument to a function.
4335 Value is zero to push the argument on the stack,
4336 or a hard register in which to store the argument.
4338 MODE is the argument's machine mode.
4339 TYPE is the data type of the argument (as a tree).
4340 This is null for libcalls where that information may
4342 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4343 the preceding args and about the function being called.
4344 NAMED is nonzero if this argument is a named parameter
4345 (otherwise it is an extra parameter matching an ellipsis).
4347 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4348 other arguments are passed on the stack. If (NAMED == 0) (which happens
4349 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4350 defined), say it is passed in the stack (function_prologue will
4351 indeed make it pass in the stack if necessary). */
4354 arm_function_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4355 const_tree type
, bool named
)
4359 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4360 a call insn (op3 of a call_value insn). */
4361 if (mode
== VOIDmode
)
4364 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4366 aapcs_layout_arg (pcum
, mode
, type
, named
);
4367 return pcum
->aapcs_reg
;
4370 /* Varargs vectors are treated the same as long long.
4371 named_count avoids having to change the way arm handles 'named' */
4372 if (TARGET_IWMMXT_ABI
4373 && arm_vector_mode_supported_p (mode
)
4374 && pcum
->named_count
> pcum
->nargs
+ 1)
4376 if (pcum
->iwmmxt_nregs
<= 9)
4377 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
4380 pcum
->can_split
= false;
4385 /* Put doubleword aligned quantities in even register pairs. */
4387 && ARM_DOUBLEWORD_ALIGN
4388 && arm_needs_doubleword_align (mode
, type
))
4391 /* Only allow splitting an arg between regs and memory if all preceding
4392 args were allocated to regs. For args passed by reference we only count
4393 the reference pointer. */
4394 if (pcum
->can_split
)
4397 nregs
= ARM_NUM_REGS2 (mode
, type
);
4399 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
4402 return gen_rtx_REG (mode
, pcum
->nregs
);
4406 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
4408 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
4409 ? DOUBLEWORD_ALIGNMENT
4414 arm_arg_partial_bytes (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4415 tree type
, bool named
)
4417 int nregs
= pcum
->nregs
;
4419 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4421 aapcs_layout_arg (pcum
, mode
, type
, named
);
4422 return pcum
->aapcs_partial
;
4425 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
4428 if (NUM_ARG_REGS
> nregs
4429 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
4431 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
4436 /* Update the data in PCUM to advance over an argument
4437 of mode MODE and data type TYPE.
4438 (TYPE is null for libcalls where that information may not be available.) */
4441 arm_function_arg_advance (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4442 const_tree type
, bool named
)
4444 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4446 aapcs_layout_arg (pcum
, mode
, type
, named
);
4448 if (pcum
->aapcs_cprc_slot
>= 0)
4450 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
4452 pcum
->aapcs_cprc_slot
= -1;
4455 /* Generic stuff. */
4456 pcum
->aapcs_arg_processed
= false;
4457 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
4458 pcum
->aapcs_reg
= NULL_RTX
;
4459 pcum
->aapcs_partial
= 0;
4464 if (arm_vector_mode_supported_p (mode
)
4465 && pcum
->named_count
> pcum
->nargs
4466 && TARGET_IWMMXT_ABI
)
4467 pcum
->iwmmxt_nregs
+= 1;
4469 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
4473 /* Variable sized types are passed by reference. This is a GCC
4474 extension to the ARM ABI. */
4477 arm_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4478 enum machine_mode mode ATTRIBUTE_UNUSED
,
4479 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4481 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4484 /* Encode the current state of the #pragma [no_]long_calls. */
4487 OFF
, /* No #pragma [no_]long_calls is in effect. */
4488 LONG
, /* #pragma long_calls is in effect. */
4489 SHORT
/* #pragma no_long_calls is in effect. */
4492 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
4495 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4497 arm_pragma_long_calls
= LONG
;
4501 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4503 arm_pragma_long_calls
= SHORT
;
4507 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4509 arm_pragma_long_calls
= OFF
;
4512 /* Handle an attribute requiring a FUNCTION_DECL;
4513 arguments as in struct attribute_spec.handler. */
4515 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
4516 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4518 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4520 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4522 *no_add_attrs
= true;
4528 /* Handle an "interrupt" or "isr" attribute;
4529 arguments as in struct attribute_spec.handler. */
4531 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
4536 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4538 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4540 *no_add_attrs
= true;
4542 /* FIXME: the argument if any is checked for type attributes;
4543 should it be checked for decl ones? */
4547 if (TREE_CODE (*node
) == FUNCTION_TYPE
4548 || TREE_CODE (*node
) == METHOD_TYPE
)
4550 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
4552 warning (OPT_Wattributes
, "%qE attribute ignored",
4554 *no_add_attrs
= true;
4557 else if (TREE_CODE (*node
) == POINTER_TYPE
4558 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
4559 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
4560 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
4562 *node
= build_variant_type_copy (*node
);
4563 TREE_TYPE (*node
) = build_type_attribute_variant
4565 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
4566 *no_add_attrs
= true;
4570 /* Possibly pass this attribute on from the type to a decl. */
4571 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
4572 | (int) ATTR_FLAG_FUNCTION_NEXT
4573 | (int) ATTR_FLAG_ARRAY_NEXT
))
4575 *no_add_attrs
= true;
4576 return tree_cons (name
, args
, NULL_TREE
);
4580 warning (OPT_Wattributes
, "%qE attribute ignored",
4589 /* Handle a "pcs" attribute; arguments as in struct
4590 attribute_spec.handler. */
4592 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
4593 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4595 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
4597 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
4598 *no_add_attrs
= true;
4603 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4604 /* Handle the "notshared" attribute. This attribute is another way of
4605 requesting hidden visibility. ARM's compiler supports
4606 "__declspec(notshared)"; we support the same thing via an
4610 arm_handle_notshared_attribute (tree
*node
,
4611 tree name ATTRIBUTE_UNUSED
,
4612 tree args ATTRIBUTE_UNUSED
,
4613 int flags ATTRIBUTE_UNUSED
,
4616 tree decl
= TYPE_NAME (*node
);
4620 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
4621 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
4622 *no_add_attrs
= false;
4628 /* Return 0 if the attributes for two types are incompatible, 1 if they
4629 are compatible, and 2 if they are nearly compatible (which causes a
4630 warning to be generated). */
4632 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
4636 /* Check for mismatch of non-default calling convention. */
4637 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
4640 /* Check for mismatched call attributes. */
4641 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4642 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4643 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4644 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4646 /* Only bother to check if an attribute is defined. */
4647 if (l1
| l2
| s1
| s2
)
4649 /* If one type has an attribute, the other must have the same attribute. */
4650 if ((l1
!= l2
) || (s1
!= s2
))
4653 /* Disallow mixed attributes. */
4654 if ((l1
& s2
) || (l2
& s1
))
4658 /* Check for mismatched ISR attribute. */
4659 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
4661 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
4662 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
4664 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
4671 /* Assigns default attributes to newly defined type. This is used to
4672 set short_call/long_call attributes for function types of
4673 functions defined inside corresponding #pragma scopes. */
4675 arm_set_default_type_attributes (tree type
)
4677 /* Add __attribute__ ((long_call)) to all functions, when
4678 inside #pragma long_calls or __attribute__ ((short_call)),
4679 when inside #pragma no_long_calls. */
4680 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
4682 tree type_attr_list
, attr_name
;
4683 type_attr_list
= TYPE_ATTRIBUTES (type
);
4685 if (arm_pragma_long_calls
== LONG
)
4686 attr_name
= get_identifier ("long_call");
4687 else if (arm_pragma_long_calls
== SHORT
)
4688 attr_name
= get_identifier ("short_call");
4692 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
4693 TYPE_ATTRIBUTES (type
) = type_attr_list
;
4697 /* Return true if DECL is known to be linked into section SECTION. */
4700 arm_function_in_section_p (tree decl
, section
*section
)
4702 /* We can only be certain about functions defined in the same
4703 compilation unit. */
4704 if (!TREE_STATIC (decl
))
4707 /* Make sure that SYMBOL always binds to the definition in this
4708 compilation unit. */
4709 if (!targetm
.binds_local_p (decl
))
4712 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4713 if (!DECL_SECTION_NAME (decl
))
4715 /* Make sure that we will not create a unique section for DECL. */
4716 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
4720 return function_section (decl
) == section
;
4723 /* Return nonzero if a 32-bit "long_call" should be generated for
4724 a call from the current function to DECL. We generate a long_call
4727 a. has an __attribute__((long call))
4728 or b. is within the scope of a #pragma long_calls
4729 or c. the -mlong-calls command line switch has been specified
4731 However we do not generate a long call if the function:
4733 d. has an __attribute__ ((short_call))
4734 or e. is inside the scope of a #pragma no_long_calls
4735 or f. is defined in the same section as the current function. */
4738 arm_is_long_call_p (tree decl
)
4743 return TARGET_LONG_CALLS
;
4745 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
4746 if (lookup_attribute ("short_call", attrs
))
4749 /* For "f", be conservative, and only cater for cases in which the
4750 whole of the current function is placed in the same section. */
4751 if (!flag_reorder_blocks_and_partition
4752 && TREE_CODE (decl
) == FUNCTION_DECL
4753 && arm_function_in_section_p (decl
, current_function_section ()))
4756 if (lookup_attribute ("long_call", attrs
))
4759 return TARGET_LONG_CALLS
;
4762 /* Return nonzero if it is ok to make a tail-call to DECL. */
4764 arm_function_ok_for_sibcall (tree decl
, tree exp
)
4766 unsigned long func_type
;
4768 if (cfun
->machine
->sibcall_blocked
)
4771 /* Never tailcall something for which we have no decl, or if we
4772 are generating code for Thumb-1. */
4773 if (decl
== NULL
|| TARGET_THUMB1
)
4776 /* The PIC register is live on entry to VxWorks PLT entries, so we
4777 must make the call before restoring the PIC register. */
4778 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
4781 /* Cannot tail-call to long calls, since these are out of range of
4782 a branch instruction. */
4783 if (arm_is_long_call_p (decl
))
4786 /* If we are interworking and the function is not declared static
4787 then we can't tail-call it unless we know that it exists in this
4788 compilation unit (since it might be a Thumb routine). */
4789 if (TARGET_INTERWORK
&& TREE_PUBLIC (decl
) && !TREE_ASM_WRITTEN (decl
))
4792 func_type
= arm_current_func_type ();
4793 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4794 if (IS_INTERRUPT (func_type
))
4797 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4799 /* Check that the return value locations are the same. For
4800 example that we aren't returning a value from the sibling in
4801 a VFP register but then need to transfer it to a core
4805 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
4806 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4808 if (!rtx_equal_p (a
, b
))
4812 /* Never tailcall if function may be called with a misaligned SP. */
4813 if (IS_STACKALIGN (func_type
))
4816 /* Everything else is ok. */
4821 /* Addressing mode support functions. */
4823 /* Return nonzero if X is a legitimate immediate operand when compiling
4824 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4826 legitimate_pic_operand_p (rtx x
)
4828 if (GET_CODE (x
) == SYMBOL_REF
4829 || (GET_CODE (x
) == CONST
4830 && GET_CODE (XEXP (x
, 0)) == PLUS
4831 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
4837 /* Record that the current function needs a PIC register. Initialize
4838 cfun->machine->pic_reg if we have not already done so. */
4841 require_pic_register (void)
4843 /* A lot of the logic here is made obscure by the fact that this
4844 routine gets called as part of the rtx cost estimation process.
4845 We don't want those calls to affect any assumptions about the real
4846 function; and further, we can't call entry_of_function() until we
4847 start the real expansion process. */
4848 if (!crtl
->uses_pic_offset_table
)
4850 gcc_assert (can_create_pseudo_p ());
4851 if (arm_pic_register
!= INVALID_REGNUM
)
4853 if (!cfun
->machine
->pic_reg
)
4854 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
4856 /* Play games to avoid marking the function as needing pic
4857 if we are being called as part of the cost-estimation
4859 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
4860 crtl
->uses_pic_offset_table
= 1;
4866 if (!cfun
->machine
->pic_reg
)
4867 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
4869 /* Play games to avoid marking the function as needing pic
4870 if we are being called as part of the cost-estimation
4872 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
4874 crtl
->uses_pic_offset_table
= 1;
4877 arm_load_pic_register (0UL);
4882 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
4884 INSN_LOCATOR (insn
) = prologue_locator
;
4886 /* We can be called during expansion of PHI nodes, where
4887 we can't yet emit instructions directly in the final
4888 insn stream. Queue the insns on the entry edge, they will
4889 be committed after everything else is expanded. */
4890 insert_insn_on_edge (seq
, single_succ_edge (ENTRY_BLOCK_PTR
));
4897 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
4899 if (GET_CODE (orig
) == SYMBOL_REF
4900 || GET_CODE (orig
) == LABEL_REF
)
4906 gcc_assert (can_create_pseudo_p ());
4907 reg
= gen_reg_rtx (Pmode
);
4910 /* VxWorks does not impose a fixed gap between segments; the run-time
4911 gap can be different from the object-file gap. We therefore can't
4912 use GOTOFF unless we are absolutely sure that the symbol is in the
4913 same segment as the GOT. Unfortunately, the flexibility of linker
4914 scripts means that we can't be sure of that in general, so assume
4915 that GOTOFF is never valid on VxWorks. */
4916 if ((GET_CODE (orig
) == LABEL_REF
4917 || (GET_CODE (orig
) == SYMBOL_REF
&&
4918 SYMBOL_REF_LOCAL_P (orig
)))
4920 && !TARGET_VXWORKS_RTP
)
4921 insn
= arm_pic_static_addr (orig
, reg
);
4927 /* If this function doesn't have a pic register, create one now. */
4928 require_pic_register ();
4930 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
4932 /* Make the MEM as close to a constant as possible. */
4933 mem
= SET_SRC (pat
);
4934 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
4935 MEM_READONLY_P (mem
) = 1;
4936 MEM_NOTRAP_P (mem
) = 1;
4938 insn
= emit_insn (pat
);
4941 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4943 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
4947 else if (GET_CODE (orig
) == CONST
)
4951 if (GET_CODE (XEXP (orig
, 0)) == PLUS
4952 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
4955 /* Handle the case where we have: const (UNSPEC_TLS). */
4956 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
4957 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
4960 /* Handle the case where we have:
4961 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4963 if (GET_CODE (XEXP (orig
, 0)) == PLUS
4964 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
4965 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
4967 gcc_assert (GET_CODE (XEXP (XEXP (orig
, 0), 1)) == CONST_INT
);
4973 gcc_assert (can_create_pseudo_p ());
4974 reg
= gen_reg_rtx (Pmode
);
4977 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
4979 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
4980 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
4981 base
== reg
? 0 : reg
);
4983 if (GET_CODE (offset
) == CONST_INT
)
4985 /* The base register doesn't really matter, we only want to
4986 test the index for the appropriate mode. */
4987 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
4989 gcc_assert (can_create_pseudo_p ());
4990 offset
= force_reg (Pmode
, offset
);
4993 if (GET_CODE (offset
) == CONST_INT
)
4994 return plus_constant (base
, INTVAL (offset
));
4997 if (GET_MODE_SIZE (mode
) > 4
4998 && (GET_MODE_CLASS (mode
) == MODE_INT
4999 || TARGET_SOFT_FLOAT
))
5001 emit_insn (gen_addsi3 (reg
, base
, offset
));
5005 return gen_rtx_PLUS (Pmode
, base
, offset
);
5012 /* Find a spare register to use during the prolog of a function. */
5015 thumb_find_work_register (unsigned long pushed_regs_mask
)
5019 /* Check the argument registers first as these are call-used. The
5020 register allocation order means that sometimes r3 might be used
5021 but earlier argument registers might not, so check them all. */
5022 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
5023 if (!df_regs_ever_live_p (reg
))
5026 /* Before going on to check the call-saved registers we can try a couple
5027 more ways of deducing that r3 is available. The first is when we are
5028 pushing anonymous arguments onto the stack and we have less than 4
5029 registers worth of fixed arguments(*). In this case r3 will be part of
5030 the variable argument list and so we can be sure that it will be
5031 pushed right at the start of the function. Hence it will be available
5032 for the rest of the prologue.
5033 (*): ie crtl->args.pretend_args_size is greater than 0. */
5034 if (cfun
->machine
->uses_anonymous_args
5035 && crtl
->args
.pretend_args_size
> 0)
5036 return LAST_ARG_REGNUM
;
5038 /* The other case is when we have fixed arguments but less than 4 registers
5039 worth. In this case r3 might be used in the body of the function, but
5040 it is not being used to convey an argument into the function. In theory
5041 we could just check crtl->args.size to see how many bytes are
5042 being passed in argument registers, but it seems that it is unreliable.
5043 Sometimes it will have the value 0 when in fact arguments are being
5044 passed. (See testcase execute/20021111-1.c for an example). So we also
5045 check the args_info.nregs field as well. The problem with this field is
5046 that it makes no allowances for arguments that are passed to the
5047 function but which are not used. Hence we could miss an opportunity
5048 when a function has an unused argument in r3. But it is better to be
5049 safe than to be sorry. */
5050 if (! cfun
->machine
->uses_anonymous_args
5051 && crtl
->args
.size
>= 0
5052 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
5053 && crtl
->args
.info
.nregs
< 4)
5054 return LAST_ARG_REGNUM
;
5056 /* Otherwise look for a call-saved register that is going to be pushed. */
5057 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
5058 if (pushed_regs_mask
& (1 << reg
))
5063 /* Thumb-2 can use high regs. */
5064 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
5065 if (pushed_regs_mask
& (1 << reg
))
5068 /* Something went wrong - thumb_compute_save_reg_mask()
5069 should have arranged for a suitable register to be pushed. */
5073 static GTY(()) int pic_labelno
;
5075 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5079 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
5081 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
5083 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
5086 gcc_assert (flag_pic
);
5088 pic_reg
= cfun
->machine
->pic_reg
;
5089 if (TARGET_VXWORKS_RTP
)
5091 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
5092 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5093 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5095 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
5097 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5098 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
5102 /* We use an UNSPEC rather than a LABEL_REF because this label
5103 never appears in the code stream. */
5105 labelno
= GEN_INT (pic_labelno
++);
5106 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5107 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5109 /* On the ARM the PC register contains 'dot + 8' at the time of the
5110 addition, on the Thumb it is 'dot + 4'. */
5111 pic_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5112 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
5114 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5118 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5120 emit_insn (gen_pic_add_dot_plus_eight (pic_reg
, pic_reg
, labelno
));
5122 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5124 else /* TARGET_THUMB1 */
5126 if (arm_pic_register
!= INVALID_REGNUM
5127 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
5129 /* We will have pushed the pic register, so we should always be
5130 able to find a work register. */
5131 pic_tmp
= gen_rtx_REG (SImode
,
5132 thumb_find_work_register (saved_regs
));
5133 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
5134 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
5137 emit_insn (gen_pic_load_addr_thumb1 (pic_reg
, pic_rtx
));
5138 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5142 /* Need to emit this whether or not we obey regdecls,
5143 since setjmp/longjmp can cause life info to screw up. */
5147 /* Generate code to load the address of a static var when flag_pic is set. */
5149 arm_pic_static_addr (rtx orig
, rtx reg
)
5151 rtx l1
, labelno
, offset_rtx
, insn
;
5153 gcc_assert (flag_pic
);
5155 /* We use an UNSPEC rather than a LABEL_REF because this label
5156 never appears in the code stream. */
5157 labelno
= GEN_INT (pic_labelno
++);
5158 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5159 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5161 /* On the ARM the PC register contains 'dot + 8' at the time of the
5162 addition, on the Thumb it is 'dot + 4'. */
5163 offset_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5164 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
5165 UNSPEC_SYMBOL_OFFSET
);
5166 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
5170 emit_insn (gen_pic_load_addr_32bit (reg
, offset_rtx
));
5172 insn
= emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5174 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5176 else /* TARGET_THUMB1 */
5178 emit_insn (gen_pic_load_addr_thumb1 (reg
, offset_rtx
));
5179 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5185 /* Return nonzero if X is valid as an ARM state addressing register. */
5187 arm_address_register_rtx_p (rtx x
, int strict_p
)
5191 if (GET_CODE (x
) != REG
)
5197 return ARM_REGNO_OK_FOR_BASE_P (regno
);
5199 return (regno
<= LAST_ARM_REGNUM
5200 || regno
>= FIRST_PSEUDO_REGISTER
5201 || regno
== FRAME_POINTER_REGNUM
5202 || regno
== ARG_POINTER_REGNUM
);
5205 /* Return TRUE if this rtx is the difference of a symbol and a label,
5206 and will reduce to a PC-relative relocation in the object file.
5207 Expressions like this can be left alone when generating PIC, rather
5208 than forced through the GOT. */
5210 pcrel_constant_p (rtx x
)
5212 if (GET_CODE (x
) == MINUS
)
5213 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
5218 /* Return true if X will surely end up in an index register after next
5221 will_be_in_index_register (const_rtx x
)
5223 /* arm.md: calculate_pic_address will split this into a register. */
5224 return GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_SYM
;
5227 /* Return nonzero if X is a valid ARM state address operand. */
5229 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
5233 enum rtx_code code
= GET_CODE (x
);
5235 if (arm_address_register_rtx_p (x
, strict_p
))
5238 use_ldrd
= (TARGET_LDRD
5240 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5242 if (code
== POST_INC
|| code
== PRE_DEC
5243 || ((code
== PRE_INC
|| code
== POST_DEC
)
5244 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5245 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5247 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5248 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5249 && GET_CODE (XEXP (x
, 1)) == PLUS
5250 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5252 rtx addend
= XEXP (XEXP (x
, 1), 1);
5254 /* Don't allow ldrd post increment by register because it's hard
5255 to fixup invalid register choices. */
5257 && GET_CODE (x
) == POST_MODIFY
5258 && GET_CODE (addend
) == REG
)
5261 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
5262 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
5265 /* After reload constants split into minipools will have addresses
5266 from a LABEL_REF. */
5267 else if (reload_completed
5268 && (code
== LABEL_REF
5270 && GET_CODE (XEXP (x
, 0)) == PLUS
5271 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5272 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5275 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5278 else if (code
== PLUS
)
5280 rtx xop0
= XEXP (x
, 0);
5281 rtx xop1
= XEXP (x
, 1);
5283 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5284 && ((GET_CODE(xop1
) == CONST_INT
5285 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
5286 || (!strict_p
&& will_be_in_index_register (xop1
))))
5287 || (arm_address_register_rtx_p (xop1
, strict_p
)
5288 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
5292 /* Reload currently can't handle MINUS, so disable this for now */
5293 else if (GET_CODE (x
) == MINUS
)
5295 rtx xop0
= XEXP (x
, 0);
5296 rtx xop1
= XEXP (x
, 1);
5298 return (arm_address_register_rtx_p (xop0
, strict_p
)
5299 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
5303 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5304 && code
== SYMBOL_REF
5305 && CONSTANT_POOL_ADDRESS_P (x
)
5307 && symbol_mentioned_p (get_pool_constant (x
))
5308 && ! pcrel_constant_p (get_pool_constant (x
))))
5314 /* Return nonzero if X is a valid Thumb-2 address operand. */
5316 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5319 enum rtx_code code
= GET_CODE (x
);
5321 if (arm_address_register_rtx_p (x
, strict_p
))
5324 use_ldrd
= (TARGET_LDRD
5326 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5328 if (code
== POST_INC
|| code
== PRE_DEC
5329 || ((code
== PRE_INC
|| code
== POST_DEC
)
5330 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5331 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5333 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5334 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5335 && GET_CODE (XEXP (x
, 1)) == PLUS
5336 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5338 /* Thumb-2 only has autoincrement by constant. */
5339 rtx addend
= XEXP (XEXP (x
, 1), 1);
5340 HOST_WIDE_INT offset
;
5342 if (GET_CODE (addend
) != CONST_INT
)
5345 offset
= INTVAL(addend
);
5346 if (GET_MODE_SIZE (mode
) <= 4)
5347 return (offset
> -256 && offset
< 256);
5349 return (use_ldrd
&& offset
> -1024 && offset
< 1024
5350 && (offset
& 3) == 0);
5353 /* After reload constants split into minipools will have addresses
5354 from a LABEL_REF. */
5355 else if (reload_completed
5356 && (code
== LABEL_REF
5358 && GET_CODE (XEXP (x
, 0)) == PLUS
5359 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5360 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5363 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5366 else if (code
== PLUS
)
5368 rtx xop0
= XEXP (x
, 0);
5369 rtx xop1
= XEXP (x
, 1);
5371 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5372 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
5373 || (!strict_p
&& will_be_in_index_register (xop1
))))
5374 || (arm_address_register_rtx_p (xop1
, strict_p
)
5375 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
5378 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5379 && code
== SYMBOL_REF
5380 && CONSTANT_POOL_ADDRESS_P (x
)
5382 && symbol_mentioned_p (get_pool_constant (x
))
5383 && ! pcrel_constant_p (get_pool_constant (x
))))
5389 /* Return nonzero if INDEX is valid for an address index operand in
5392 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
5395 HOST_WIDE_INT range
;
5396 enum rtx_code code
= GET_CODE (index
);
5398 /* Standard coprocessor addressing modes. */
5399 if (TARGET_HARD_FLOAT
5400 && (TARGET_VFP
|| TARGET_FPA
|| TARGET_MAVERICK
)
5401 && (mode
== SFmode
|| mode
== DFmode
5402 || (TARGET_MAVERICK
&& mode
== DImode
)))
5403 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5404 && INTVAL (index
) > -1024
5405 && (INTVAL (index
) & 3) == 0);
5407 /* For quad modes, we restrict the constant offset to be slightly less
5408 than what the instruction format permits. We do this because for
5409 quad mode moves, we will actually decompose them into two separate
5410 double-mode reads or writes. INDEX must therefore be a valid
5411 (double-mode) offset and so should INDEX+8. */
5412 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
5413 return (code
== CONST_INT
5414 && INTVAL (index
) < 1016
5415 && INTVAL (index
) > -1024
5416 && (INTVAL (index
) & 3) == 0);
5418 /* We have no such constraint on double mode offsets, so we permit the
5419 full range of the instruction format. */
5420 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
5421 return (code
== CONST_INT
5422 && INTVAL (index
) < 1024
5423 && INTVAL (index
) > -1024
5424 && (INTVAL (index
) & 3) == 0);
5426 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5427 return (code
== CONST_INT
5428 && INTVAL (index
) < 1024
5429 && INTVAL (index
) > -1024
5430 && (INTVAL (index
) & 3) == 0);
5432 if (arm_address_register_rtx_p (index
, strict_p
)
5433 && (GET_MODE_SIZE (mode
) <= 4))
5436 if (mode
== DImode
|| mode
== DFmode
)
5438 if (code
== CONST_INT
)
5440 HOST_WIDE_INT val
= INTVAL (index
);
5443 return val
> -256 && val
< 256;
5445 return val
> -4096 && val
< 4092;
5448 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
5451 if (GET_MODE_SIZE (mode
) <= 4
5455 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
5459 rtx xiop0
= XEXP (index
, 0);
5460 rtx xiop1
= XEXP (index
, 1);
5462 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5463 && power_of_two_operand (xiop1
, SImode
))
5464 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5465 && power_of_two_operand (xiop0
, SImode
)));
5467 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
5468 || code
== ASHIFT
|| code
== ROTATERT
)
5470 rtx op
= XEXP (index
, 1);
5472 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5473 && GET_CODE (op
) == CONST_INT
5475 && INTVAL (op
) <= 31);
5479 /* For ARM v4 we may be doing a sign-extend operation during the
5485 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
5491 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
5493 return (code
== CONST_INT
5494 && INTVAL (index
) < range
5495 && INTVAL (index
) > -range
);
5498 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5499 index operand. i.e. 1, 2, 4 or 8. */
5501 thumb2_index_mul_operand (rtx op
)
5505 if (GET_CODE(op
) != CONST_INT
)
5509 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
5512 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5514 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
5516 enum rtx_code code
= GET_CODE (index
);
5518 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5519 /* Standard coprocessor addressing modes. */
5520 if (TARGET_HARD_FLOAT
5521 && (TARGET_VFP
|| TARGET_FPA
|| TARGET_MAVERICK
)
5522 && (mode
== SFmode
|| mode
== DFmode
5523 || (TARGET_MAVERICK
&& mode
== DImode
)))
5524 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5525 /* Thumb-2 allows only > -256 index range for it's core register
5526 load/stores. Since we allow SF/DF in core registers, we have
5527 to use the intersection between -256~4096 (core) and -1024~1024
5529 && INTVAL (index
) > -256
5530 && (INTVAL (index
) & 3) == 0);
5532 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5534 /* For DImode assume values will usually live in core regs
5535 and only allow LDRD addressing modes. */
5536 if (!TARGET_LDRD
|| mode
!= DImode
)
5537 return (code
== CONST_INT
5538 && INTVAL (index
) < 1024
5539 && INTVAL (index
) > -1024
5540 && (INTVAL (index
) & 3) == 0);
5543 /* For quad modes, we restrict the constant offset to be slightly less
5544 than what the instruction format permits. We do this because for
5545 quad mode moves, we will actually decompose them into two separate
5546 double-mode reads or writes. INDEX must therefore be a valid
5547 (double-mode) offset and so should INDEX+8. */
5548 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
5549 return (code
== CONST_INT
5550 && INTVAL (index
) < 1016
5551 && INTVAL (index
) > -1024
5552 && (INTVAL (index
) & 3) == 0);
5554 /* We have no such constraint on double mode offsets, so we permit the
5555 full range of the instruction format. */
5556 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
5557 return (code
== CONST_INT
5558 && INTVAL (index
) < 1024
5559 && INTVAL (index
) > -1024
5560 && (INTVAL (index
) & 3) == 0);
5562 if (arm_address_register_rtx_p (index
, strict_p
)
5563 && (GET_MODE_SIZE (mode
) <= 4))
5566 if (mode
== DImode
|| mode
== DFmode
)
5568 if (code
== CONST_INT
)
5570 HOST_WIDE_INT val
= INTVAL (index
);
5571 /* ??? Can we assume ldrd for thumb2? */
5572 /* Thumb-2 ldrd only has reg+const addressing modes. */
5573 /* ldrd supports offsets of +-1020.
5574 However the ldr fallback does not. */
5575 return val
> -256 && val
< 256 && (val
& 3) == 0;
5583 rtx xiop0
= XEXP (index
, 0);
5584 rtx xiop1
= XEXP (index
, 1);
5586 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5587 && thumb2_index_mul_operand (xiop1
))
5588 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5589 && thumb2_index_mul_operand (xiop0
)));
5591 else if (code
== ASHIFT
)
5593 rtx op
= XEXP (index
, 1);
5595 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5596 && GET_CODE (op
) == CONST_INT
5598 && INTVAL (op
) <= 3);
5601 return (code
== CONST_INT
5602 && INTVAL (index
) < 4096
5603 && INTVAL (index
) > -256);
5606 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5608 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
5612 if (GET_CODE (x
) != REG
)
5618 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
5620 return (regno
<= LAST_LO_REGNUM
5621 || regno
> LAST_VIRTUAL_REGISTER
5622 || regno
== FRAME_POINTER_REGNUM
5623 || (GET_MODE_SIZE (mode
) >= 4
5624 && (regno
== STACK_POINTER_REGNUM
5625 || regno
>= FIRST_PSEUDO_REGISTER
5626 || x
== hard_frame_pointer_rtx
5627 || x
== arg_pointer_rtx
)));
5630 /* Return nonzero if x is a legitimate index register. This is the case
5631 for any base register that can access a QImode object. */
5633 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
5635 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
5638 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5640 The AP may be eliminated to either the SP or the FP, so we use the
5641 least common denominator, e.g. SImode, and offsets from 0 to 64.
5643 ??? Verify whether the above is the right approach.
5645 ??? Also, the FP may be eliminated to the SP, so perhaps that
5646 needs special handling also.
5648 ??? Look at how the mips16 port solves this problem. It probably uses
5649 better ways to solve some of these problems.
5651 Although it is not incorrect, we don't accept QImode and HImode
5652 addresses based on the frame pointer or arg pointer until the
5653 reload pass starts. This is so that eliminating such addresses
5654 into stack based ones won't produce impossible code. */
5656 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5658 /* ??? Not clear if this is right. Experiment. */
5659 if (GET_MODE_SIZE (mode
) < 4
5660 && !(reload_in_progress
|| reload_completed
)
5661 && (reg_mentioned_p (frame_pointer_rtx
, x
)
5662 || reg_mentioned_p (arg_pointer_rtx
, x
)
5663 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
5664 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
5665 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
5666 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
5669 /* Accept any base register. SP only in SImode or larger. */
5670 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
5673 /* This is PC relative data before arm_reorg runs. */
5674 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
5675 && GET_CODE (x
) == SYMBOL_REF
5676 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
5679 /* This is PC relative data after arm_reorg runs. */
5680 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
5682 && (GET_CODE (x
) == LABEL_REF
5683 || (GET_CODE (x
) == CONST
5684 && GET_CODE (XEXP (x
, 0)) == PLUS
5685 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5686 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5689 /* Post-inc indexing only supported for SImode and larger. */
5690 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
5691 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
5694 else if (GET_CODE (x
) == PLUS
)
5696 /* REG+REG address can be any two index registers. */
5697 /* We disallow FRAME+REG addressing since we know that FRAME
5698 will be replaced with STACK, and SP relative addressing only
5699 permits SP+OFFSET. */
5700 if (GET_MODE_SIZE (mode
) <= 4
5701 && XEXP (x
, 0) != frame_pointer_rtx
5702 && XEXP (x
, 1) != frame_pointer_rtx
5703 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5704 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
5705 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
5708 /* REG+const has 5-7 bit offset for non-SP registers. */
5709 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5710 || XEXP (x
, 0) == arg_pointer_rtx
)
5711 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5712 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
5715 /* REG+const has 10-bit offset for SP, but only SImode and
5716 larger is supported. */
5717 /* ??? Should probably check for DI/DFmode overflow here
5718 just like GO_IF_LEGITIMATE_OFFSET does. */
5719 else if (GET_CODE (XEXP (x
, 0)) == REG
5720 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
5721 && GET_MODE_SIZE (mode
) >= 4
5722 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5723 && INTVAL (XEXP (x
, 1)) >= 0
5724 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
5725 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5728 else if (GET_CODE (XEXP (x
, 0)) == REG
5729 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
5730 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
5731 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
5732 && REGNO (XEXP (x
, 0))
5733 <= LAST_VIRTUAL_POINTER_REGISTER
))
5734 && GET_MODE_SIZE (mode
) >= 4
5735 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5736 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5740 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5741 && GET_MODE_SIZE (mode
) == 4
5742 && GET_CODE (x
) == SYMBOL_REF
5743 && CONSTANT_POOL_ADDRESS_P (x
)
5745 && symbol_mentioned_p (get_pool_constant (x
))
5746 && ! pcrel_constant_p (get_pool_constant (x
))))
5752 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5753 instruction of mode MODE. */
5755 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
5757 switch (GET_MODE_SIZE (mode
))
5760 return val
>= 0 && val
< 32;
5763 return val
>= 0 && val
< 64 && (val
& 1) == 0;
5767 && (val
+ GET_MODE_SIZE (mode
)) <= 128
5773 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
5776 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
5777 else if (TARGET_THUMB2
)
5778 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
5779 else /* if (TARGET_THUMB1) */
5780 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
5783 /* Build the SYMBOL_REF for __tls_get_addr. */
5785 static GTY(()) rtx tls_get_addr_libfunc
;
5788 get_tls_get_addr (void)
5790 if (!tls_get_addr_libfunc
)
5791 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
5792 return tls_get_addr_libfunc
;
5796 arm_load_tp (rtx target
)
5799 target
= gen_reg_rtx (SImode
);
5803 /* Can return in any reg. */
5804 emit_insn (gen_load_tp_hard (target
));
5808 /* Always returned in r0. Immediately copy the result into a pseudo,
5809 otherwise other uses of r0 (e.g. setting up function arguments) may
5810 clobber the value. */
5814 emit_insn (gen_load_tp_soft ());
5816 tmp
= gen_rtx_REG (SImode
, 0);
5817 emit_move_insn (target
, tmp
);
5823 load_tls_operand (rtx x
, rtx reg
)
5827 if (reg
== NULL_RTX
)
5828 reg
= gen_reg_rtx (SImode
);
5830 tmp
= gen_rtx_CONST (SImode
, x
);
5832 emit_move_insn (reg
, tmp
);
5838 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
5840 rtx insns
, label
, labelno
, sum
;
5844 labelno
= GEN_INT (pic_labelno
++);
5845 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5846 label
= gen_rtx_CONST (VOIDmode
, label
);
5848 sum
= gen_rtx_UNSPEC (Pmode
,
5849 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
5850 GEN_INT (TARGET_ARM
? 8 : 4)),
5852 reg
= load_tls_operand (sum
, reg
);
5855 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5856 else if (TARGET_THUMB2
)
5857 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5858 else /* TARGET_THUMB1 */
5859 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5861 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
, LCT_PURE
, /* LCT_CONST? */
5862 Pmode
, 1, reg
, Pmode
);
5864 insns
= get_insns ();
5871 legitimize_tls_address (rtx x
, rtx reg
)
5873 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
5874 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
5878 case TLS_MODEL_GLOBAL_DYNAMIC
:
5879 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
5880 dest
= gen_reg_rtx (Pmode
);
5881 emit_libcall_block (insns
, dest
, ret
, x
);
5884 case TLS_MODEL_LOCAL_DYNAMIC
:
5885 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
5887 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5888 share the LDM result with other LD model accesses. */
5889 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
5891 dest
= gen_reg_rtx (Pmode
);
5892 emit_libcall_block (insns
, dest
, ret
, eqv
);
5894 /* Load the addend. */
5895 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
, GEN_INT (TLS_LDO32
)),
5897 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
5898 return gen_rtx_PLUS (Pmode
, dest
, addend
);
5900 case TLS_MODEL_INITIAL_EXEC
:
5901 labelno
= GEN_INT (pic_labelno
++);
5902 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5903 label
= gen_rtx_CONST (VOIDmode
, label
);
5904 sum
= gen_rtx_UNSPEC (Pmode
,
5905 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
5906 GEN_INT (TARGET_ARM
? 8 : 4)),
5908 reg
= load_tls_operand (sum
, reg
);
5911 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
5912 else if (TARGET_THUMB2
)
5913 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
5916 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5917 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
5920 tp
= arm_load_tp (NULL_RTX
);
5922 return gen_rtx_PLUS (Pmode
, tp
, reg
);
5924 case TLS_MODEL_LOCAL_EXEC
:
5925 tp
= arm_load_tp (NULL_RTX
);
5927 reg
= gen_rtx_UNSPEC (Pmode
,
5928 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
5930 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
5932 return gen_rtx_PLUS (Pmode
, tp
, reg
);
5939 /* Try machine-dependent ways of modifying an illegitimate address
5940 to be legitimate. If we find one, return the new, valid address. */
5942 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
5946 /* TODO: legitimize_address for Thumb2. */
5949 return thumb_legitimize_address (x
, orig_x
, mode
);
5952 if (arm_tls_symbol_p (x
))
5953 return legitimize_tls_address (x
, NULL_RTX
);
5955 if (GET_CODE (x
) == PLUS
)
5957 rtx xop0
= XEXP (x
, 0);
5958 rtx xop1
= XEXP (x
, 1);
5960 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
5961 xop0
= force_reg (SImode
, xop0
);
5963 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
5964 xop1
= force_reg (SImode
, xop1
);
5966 if (ARM_BASE_REGISTER_RTX_P (xop0
)
5967 && GET_CODE (xop1
) == CONST_INT
)
5969 HOST_WIDE_INT n
, low_n
;
5973 /* VFP addressing modes actually allow greater offsets, but for
5974 now we just stick with the lowest common denominator. */
5976 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
5988 low_n
= ((mode
) == TImode
? 0
5989 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
5993 base_reg
= gen_reg_rtx (SImode
);
5994 val
= force_operand (plus_constant (xop0
, n
), NULL_RTX
);
5995 emit_move_insn (base_reg
, val
);
5996 x
= plus_constant (base_reg
, low_n
);
5998 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
5999 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6002 /* XXX We don't allow MINUS any more -- see comment in
6003 arm_legitimate_address_outer_p (). */
6004 else if (GET_CODE (x
) == MINUS
)
6006 rtx xop0
= XEXP (x
, 0);
6007 rtx xop1
= XEXP (x
, 1);
6009 if (CONSTANT_P (xop0
))
6010 xop0
= force_reg (SImode
, xop0
);
6012 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
6013 xop1
= force_reg (SImode
, xop1
);
6015 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6016 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
6019 /* Make sure to take full advantage of the pre-indexed addressing mode
6020 with absolute addresses which often allows for the base register to
6021 be factorized for multiple adjacent memory references, and it might
6022 even allows for the mini pool to be avoided entirely. */
6023 else if (GET_CODE (x
) == CONST_INT
&& optimize
> 0)
6026 HOST_WIDE_INT mask
, base
, index
;
6029 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6030 use a 8-bit index. So let's use a 12-bit index for SImode only and
6031 hope that arm_gen_constant will enable ldrb to use more bits. */
6032 bits
= (mode
== SImode
) ? 12 : 8;
6033 mask
= (1 << bits
) - 1;
6034 base
= INTVAL (x
) & ~mask
;
6035 index
= INTVAL (x
) & mask
;
6036 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
6038 /* It'll most probably be more efficient to generate the base
6039 with more bits set and use a negative index instead. */
6043 base_reg
= force_reg (SImode
, GEN_INT (base
));
6044 x
= plus_constant (base_reg
, index
);
6049 /* We need to find and carefully transform any SYMBOL and LABEL
6050 references; so go back to the original address expression. */
6051 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6053 if (new_x
!= orig_x
)
6061 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6062 to be legitimate. If we find one, return the new, valid address. */
6064 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6066 if (arm_tls_symbol_p (x
))
6067 return legitimize_tls_address (x
, NULL_RTX
);
6069 if (GET_CODE (x
) == PLUS
6070 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6071 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
6072 || INTVAL (XEXP (x
, 1)) < 0))
6074 rtx xop0
= XEXP (x
, 0);
6075 rtx xop1
= XEXP (x
, 1);
6076 HOST_WIDE_INT offset
= INTVAL (xop1
);
6078 /* Try and fold the offset into a biasing of the base register and
6079 then offsetting that. Don't do this when optimizing for space
6080 since it can cause too many CSEs. */
6081 if (optimize_size
&& offset
>= 0
6082 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
6084 HOST_WIDE_INT delta
;
6087 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
6088 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
6089 delta
= 31 * GET_MODE_SIZE (mode
);
6091 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
6093 xop0
= force_operand (plus_constant (xop0
, offset
- delta
),
6095 x
= plus_constant (xop0
, delta
);
6097 else if (offset
< 0 && offset
> -256)
6098 /* Small negative offsets are best done with a subtract before the
6099 dereference, forcing these into a register normally takes two
6101 x
= force_operand (x
, NULL_RTX
);
6104 /* For the remaining cases, force the constant into a register. */
6105 xop1
= force_reg (SImode
, xop1
);
6106 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6109 else if (GET_CODE (x
) == PLUS
6110 && s_register_operand (XEXP (x
, 1), SImode
)
6111 && !s_register_operand (XEXP (x
, 0), SImode
))
6113 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
6115 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
6120 /* We need to find and carefully transform any SYMBOL and LABEL
6121 references; so go back to the original address expression. */
6122 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6124 if (new_x
!= orig_x
)
6132 arm_legitimize_reload_address (rtx
*p
,
6133 enum machine_mode mode
,
6134 int opnum
, int type
,
6135 int ind_levels ATTRIBUTE_UNUSED
)
6137 if (GET_CODE (*p
) == PLUS
6138 && GET_CODE (XEXP (*p
, 0)) == REG
6139 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
6140 && GET_CODE (XEXP (*p
, 1)) == CONST_INT
)
6142 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
6143 HOST_WIDE_INT low
, high
;
6145 /* Detect coprocessor load/stores. */
6146 bool coproc_p
= ((TARGET_HARD_FLOAT
6147 && (TARGET_VFP
|| TARGET_FPA
|| TARGET_MAVERICK
)
6148 && (mode
== SFmode
|| mode
== DFmode
6149 || (mode
== DImode
&& TARGET_MAVERICK
)))
6150 || (TARGET_REALLY_IWMMXT
6151 && VALID_IWMMXT_REG_MODE (mode
))
6153 && (VALID_NEON_DREG_MODE (mode
)
6154 || VALID_NEON_QREG_MODE (mode
))));
6156 /* For some conditions, bail out when lower two bits are unaligned. */
6157 if ((val
& 0x3) != 0
6158 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6160 /* For DI, and DF under soft-float: */
6161 || ((mode
== DImode
|| mode
== DFmode
)
6162 /* Without ldrd, we use stm/ldm, which does not
6163 fair well with unaligned bits. */
6165 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6166 || TARGET_THUMB2
))))
6169 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6170 of which the (reg+high) gets turned into a reload add insn,
6171 we try to decompose the index into high/low values that can often
6172 also lead to better reload CSE.
6174 ldr r0, [r2, #4100] // Offset too large
6175 ldr r1, [r2, #4104] // Offset too large
6177 is best reloaded as:
6183 which post-reload CSE can simplify in most cases to eliminate the
6184 second add instruction:
6189 The idea here is that we want to split out the bits of the constant
6190 as a mask, rather than as subtracting the maximum offset that the
6191 respective type of load/store used can handle.
6193 When encountering negative offsets, we can still utilize it even if
6194 the overall offset is positive; sometimes this may lead to an immediate
6195 that can be constructed with fewer instructions.
6197 ldr r0, [r2, #0x3FFFFC]
6199 This is best reloaded as:
6200 add t1, r2, #0x400000
6203 The trick for spotting this for a load insn with N bits of offset
6204 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6205 negative offset that is going to make bit N and all the bits below
6206 it become zero in the remainder part.
6208 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6209 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6210 used in most cases of ARM load/store instructions. */
6212 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6213 (((VAL) & ((1 << (N)) - 1)) \
6214 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6219 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
6221 /* NEON quad-word load/stores are made of two double-word accesses,
6222 so the valid index range is reduced by 8. Treat as 9-bit range if
6224 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
6225 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
6227 else if (GET_MODE_SIZE (mode
) == 8)
6230 low
= (TARGET_THUMB2
6231 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
6232 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
6234 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6235 to access doublewords. The supported load/store offsets are
6236 -8, -4, and 4, which we try to produce here. */
6237 low
= ((val
& 0xf) ^ 0x8) - 0x8;
6239 else if (GET_MODE_SIZE (mode
) < 8)
6241 /* NEON element load/stores do not have an offset. */
6242 if (TARGET_NEON_FP16
&& mode
== HFmode
)
6247 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6248 Try the wider 12-bit range first, and re-try if the result
6250 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6252 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6256 if (mode
== HImode
|| mode
== HFmode
)
6259 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6262 /* The storehi/movhi_bytes fallbacks can use only
6263 [-4094,+4094] of the full ldrb/strb index range. */
6264 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6265 if (low
== 4095 || low
== -4095)
6270 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6276 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
6277 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
6278 - (unsigned HOST_WIDE_INT
) 0x80000000);
6279 /* Check for overflow or zero */
6280 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
6283 /* Reload the high part into a base reg; leave the low part
6285 *p
= gen_rtx_PLUS (GET_MODE (*p
),
6286 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
6289 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
6290 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
6291 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6299 thumb_legitimize_reload_address (rtx
*x_p
,
6300 enum machine_mode mode
,
6301 int opnum
, int type
,
6302 int ind_levels ATTRIBUTE_UNUSED
)
6306 if (GET_CODE (x
) == PLUS
6307 && GET_MODE_SIZE (mode
) < 4
6308 && REG_P (XEXP (x
, 0))
6309 && XEXP (x
, 0) == stack_pointer_rtx
6310 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6311 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6316 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6317 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6321 /* If both registers are hi-regs, then it's better to reload the
6322 entire expression rather than each register individually. That
6323 only requires one reload register rather than two. */
6324 if (GET_CODE (x
) == PLUS
6325 && REG_P (XEXP (x
, 0))
6326 && REG_P (XEXP (x
, 1))
6327 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
6328 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
6333 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6334 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6341 /* Test for various thread-local symbols. */
6343 /* Return TRUE if X is a thread-local symbol. */
6346 arm_tls_symbol_p (rtx x
)
6348 if (! TARGET_HAVE_TLS
)
6351 if (GET_CODE (x
) != SYMBOL_REF
)
6354 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6357 /* Helper for arm_tls_referenced_p. */
6360 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
6362 if (GET_CODE (*x
) == SYMBOL_REF
)
6363 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
6365 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6366 TLS offsets, not real symbol references. */
6367 if (GET_CODE (*x
) == UNSPEC
6368 && XINT (*x
, 1) == UNSPEC_TLS
)
6374 /* Return TRUE if X contains any TLS symbol references. */
6377 arm_tls_referenced_p (rtx x
)
6379 if (! TARGET_HAVE_TLS
)
6382 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
6385 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6387 On the ARM, allow any integer (invalid ones are removed later by insn
6388 patterns), nice doubles and symbol_refs which refer to the function's
6391 When generating pic allow anything. */
6394 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
6396 /* At present, we have no support for Neon structure constants, so forbid
6397 them here. It might be possible to handle simple cases like 0 and -1
6399 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
6402 return flag_pic
|| !label_mentioned_p (x
);
6406 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6408 return (GET_CODE (x
) == CONST_INT
6409 || GET_CODE (x
) == CONST_DOUBLE
6410 || CONSTANT_ADDRESS_P (x
)
6415 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
6417 return (!arm_cannot_force_const_mem (mode
, x
)
6419 ? arm_legitimate_constant_p_1 (mode
, x
)
6420 : thumb_legitimate_constant_p (mode
, x
)));
6423 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6426 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6430 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
6432 split_const (x
, &base
, &offset
);
6433 if (GET_CODE (base
) == SYMBOL_REF
6434 && !offset_within_block_p (base
, INTVAL (offset
)))
6437 return arm_tls_referenced_p (x
);
6440 #define REG_OR_SUBREG_REG(X) \
6441 (GET_CODE (X) == REG \
6442 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6444 #define REG_OR_SUBREG_RTX(X) \
6445 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6448 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
6450 enum machine_mode mode
= GET_MODE (x
);
6464 return COSTS_N_INSNS (1);
6467 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6470 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
6477 return COSTS_N_INSNS (2) + cycles
;
6479 return COSTS_N_INSNS (1) + 16;
6482 return (COSTS_N_INSNS (1)
6483 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
6484 + GET_CODE (SET_DEST (x
)) == MEM
));
6489 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
6491 if (thumb_shiftable_const (INTVAL (x
)))
6492 return COSTS_N_INSNS (2);
6493 return COSTS_N_INSNS (3);
6495 else if ((outer
== PLUS
|| outer
== COMPARE
)
6496 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
6498 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
6499 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
6500 return COSTS_N_INSNS (1);
6501 else if (outer
== AND
)
6504 /* This duplicates the tests in the andsi3 expander. */
6505 for (i
= 9; i
<= 31; i
++)
6506 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
6507 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
6508 return COSTS_N_INSNS (2);
6510 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
6511 || outer
== LSHIFTRT
)
6513 return COSTS_N_INSNS (2);
6519 return COSTS_N_INSNS (3);
6537 /* XXX another guess. */
6538 /* Memory costs quite a lot for the first word, but subsequent words
6539 load at the equivalent of a single insn each. */
6540 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
6541 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
6546 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6552 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
6553 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
6559 return total
+ COSTS_N_INSNS (1);
6561 /* Assume a two-shift sequence. Increase the cost slightly so
6562 we prefer actual shifts over an extend operation. */
6563 return total
+ 1 + COSTS_N_INSNS (2);
6571 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
6573 enum machine_mode mode
= GET_MODE (x
);
6574 enum rtx_code subcode
;
6576 enum rtx_code code
= GET_CODE (x
);
6582 /* Memory costs quite a lot for the first word, but subsequent words
6583 load at the equivalent of a single insn each. */
6584 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
6591 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
6592 *total
= COSTS_N_INSNS (2);
6593 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
6594 *total
= COSTS_N_INSNS (4);
6596 *total
= COSTS_N_INSNS (20);
6600 if (GET_CODE (XEXP (x
, 1)) == REG
)
6601 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
6602 else if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6603 *total
= rtx_cost (XEXP (x
, 1), code
, speed
);
6609 *total
+= COSTS_N_INSNS (4);
6614 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
6615 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6618 *total
+= COSTS_N_INSNS (3);
6622 *total
+= COSTS_N_INSNS (1);
6623 /* Increase the cost of complex shifts because they aren't any faster,
6624 and reduce dual issue opportunities. */
6625 if (arm_tune_cortex_a9
6626 && outer
!= SET
&& GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6634 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
6635 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6636 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6638 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6642 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6643 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
6645 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6652 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6654 if (TARGET_HARD_FLOAT
6656 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6658 *total
= COSTS_N_INSNS (1);
6659 if (GET_CODE (XEXP (x
, 0)) == CONST_DOUBLE
6660 && arm_const_double_rtx (XEXP (x
, 0)))
6662 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6666 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6667 && arm_const_double_rtx (XEXP (x
, 1)))
6669 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6675 *total
= COSTS_N_INSNS (20);
6679 *total
= COSTS_N_INSNS (1);
6680 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6681 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6683 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6687 subcode
= GET_CODE (XEXP (x
, 1));
6688 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6689 || subcode
== LSHIFTRT
6690 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6692 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6693 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6697 /* A shift as a part of RSB costs no more than RSB itself. */
6698 if (GET_CODE (XEXP (x
, 0)) == MULT
6699 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6701 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, speed
);
6702 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6707 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
6709 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6710 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6714 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
6715 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
6717 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6718 if (GET_CODE (XEXP (XEXP (x
, 1), 0)) == REG
6719 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
6720 *total
+= COSTS_N_INSNS (1);
6728 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
6729 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
6730 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
6732 *total
= COSTS_N_INSNS (1);
6733 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
6735 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6739 /* MLA: All arguments must be registers. We filter out
6740 multiplication by a power of two, so that we fall down into
6742 if (GET_CODE (XEXP (x
, 0)) == MULT
6743 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6745 /* The cost comes from the cost of the multiply. */
6749 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6751 if (TARGET_HARD_FLOAT
6753 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6755 *total
= COSTS_N_INSNS (1);
6756 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6757 && arm_const_double_rtx (XEXP (x
, 1)))
6759 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6766 *total
= COSTS_N_INSNS (20);
6770 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
6771 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
6773 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, speed
);
6774 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
6775 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
6776 *total
+= COSTS_N_INSNS (1);
6782 case AND
: case XOR
: case IOR
:
6784 /* Normally the frame registers will be spilt into reg+const during
6785 reload, so it is a bad idea to combine them with other instructions,
6786 since then they might not be moved outside of loops. As a compromise
6787 we allow integration with ops that have a constant as their second
6789 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
6790 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
6791 && GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6792 *total
= COSTS_N_INSNS (1);
6796 *total
+= COSTS_N_INSNS (2);
6797 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6798 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6800 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6807 *total
+= COSTS_N_INSNS (1);
6808 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6809 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6811 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6814 subcode
= GET_CODE (XEXP (x
, 0));
6815 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6816 || subcode
== LSHIFTRT
6817 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6819 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6820 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6825 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6827 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6828 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6832 if (subcode
== UMIN
|| subcode
== UMAX
6833 || subcode
== SMIN
|| subcode
== SMAX
)
6835 *total
= COSTS_N_INSNS (3);
6842 /* This should have been handled by the CPU specific routines. */
6846 if (arm_arch3m
&& mode
== SImode
6847 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6848 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6849 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
6850 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
6851 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6852 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
6854 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, speed
);
6857 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6861 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6863 if (TARGET_HARD_FLOAT
6865 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6867 *total
= COSTS_N_INSNS (1);
6870 *total
= COSTS_N_INSNS (2);
6876 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
6877 if (mode
== SImode
&& code
== NOT
)
6879 subcode
= GET_CODE (XEXP (x
, 0));
6880 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6881 || subcode
== LSHIFTRT
6882 || subcode
== ROTATE
|| subcode
== ROTATERT
6884 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
6886 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6887 /* Register shifts cost an extra cycle. */
6888 if (GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
6889 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
6898 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6900 *total
= COSTS_N_INSNS (4);
6904 operand
= XEXP (x
, 0);
6906 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
6907 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
6908 && GET_CODE (XEXP (operand
, 0)) == REG
6909 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
6910 *total
+= COSTS_N_INSNS (1);
6911 *total
+= (rtx_cost (XEXP (x
, 1), code
, speed
)
6912 + rtx_cost (XEXP (x
, 2), code
, speed
));
6916 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6918 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6924 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6925 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6927 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6933 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6934 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6936 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6956 /* SCC insns. In the case where the comparison has already been
6957 performed, then they cost 2 instructions. Otherwise they need
6958 an additional comparison before them. */
6959 *total
= COSTS_N_INSNS (2);
6960 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
6967 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
6973 *total
+= COSTS_N_INSNS (1);
6974 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6975 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6977 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6981 subcode
= GET_CODE (XEXP (x
, 0));
6982 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6983 || subcode
== LSHIFTRT
6984 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6986 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6987 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6992 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6994 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6995 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
7005 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
7006 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
7007 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7008 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7012 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7014 if (TARGET_HARD_FLOAT
7016 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7018 *total
= COSTS_N_INSNS (1);
7021 *total
= COSTS_N_INSNS (20);
7024 *total
= COSTS_N_INSNS (1);
7026 *total
+= COSTS_N_INSNS (3);
7032 if (GET_MODE_CLASS (mode
) == MODE_INT
)
7034 rtx op
= XEXP (x
, 0);
7035 enum machine_mode opmode
= GET_MODE (op
);
7038 *total
+= COSTS_N_INSNS (1);
7040 if (opmode
!= SImode
)
7044 /* If !arm_arch4, we use one of the extendhisi2_mem
7045 or movhi_bytes patterns for HImode. For a QImode
7046 sign extension, we first zero-extend from memory
7047 and then perform a shift sequence. */
7048 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
7049 *total
+= COSTS_N_INSNS (2);
7052 *total
+= COSTS_N_INSNS (1);
7054 /* We don't have the necessary insn, so we need to perform some
7056 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
7057 /* An and with constant 255. */
7058 *total
+= COSTS_N_INSNS (1);
7060 /* A shift sequence. Increase costs slightly to avoid
7061 combining two shifts into an extend operation. */
7062 *total
+= COSTS_N_INSNS (2) + 1;
7068 switch (GET_MODE (XEXP (x
, 0)))
7075 *total
= COSTS_N_INSNS (1);
7085 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
7089 if (const_ok_for_arm (INTVAL (x
))
7090 || const_ok_for_arm (~INTVAL (x
)))
7091 *total
= COSTS_N_INSNS (1);
7093 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
7094 INTVAL (x
), NULL_RTX
,
7101 *total
= COSTS_N_INSNS (3);
7105 *total
= COSTS_N_INSNS (1);
7109 *total
= COSTS_N_INSNS (1);
7110 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7114 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
7115 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7116 *total
= COSTS_N_INSNS (1);
7118 *total
= COSTS_N_INSNS (4);
7122 *total
= COSTS_N_INSNS (4);
7127 /* Estimates the size cost of thumb1 instructions.
7128 For now most of the code is copied from thumb1_rtx_costs. We need more
7129 fine grain tuning when we have more related test cases. */
7131 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7133 enum machine_mode mode
= GET_MODE (x
);
7146 return COSTS_N_INSNS (1);
7149 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7151 /* Thumb1 mul instruction can't operate on const. We must Load it
7152 into a register first. */
7153 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
7154 return COSTS_N_INSNS (1) + const_size
;
7156 return COSTS_N_INSNS (1);
7159 return (COSTS_N_INSNS (1)
7160 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
7161 + GET_CODE (SET_DEST (x
)) == MEM
));
7166 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7167 return COSTS_N_INSNS (1);
7168 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7169 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
7170 return COSTS_N_INSNS (2);
7171 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7172 if (thumb_shiftable_const (INTVAL (x
)))
7173 return COSTS_N_INSNS (2);
7174 return COSTS_N_INSNS (3);
7176 else if ((outer
== PLUS
|| outer
== COMPARE
)
7177 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7179 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7180 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7181 return COSTS_N_INSNS (1);
7182 else if (outer
== AND
)
7185 /* This duplicates the tests in the andsi3 expander. */
7186 for (i
= 9; i
<= 31; i
++)
7187 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7188 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7189 return COSTS_N_INSNS (2);
7191 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7192 || outer
== LSHIFTRT
)
7194 return COSTS_N_INSNS (2);
7200 return COSTS_N_INSNS (3);
7218 /* XXX another guess. */
7219 /* Memory costs quite a lot for the first word, but subsequent words
7220 load at the equivalent of a single insn each. */
7221 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7222 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7227 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7232 /* XXX still guessing. */
7233 switch (GET_MODE (XEXP (x
, 0)))
7236 return (1 + (mode
== DImode
? 4 : 0)
7237 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7240 return (4 + (mode
== DImode
? 4 : 0)
7241 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7244 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7255 /* RTX costs when optimizing for size. */
7257 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7260 enum machine_mode mode
= GET_MODE (x
);
7263 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
7267 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7271 /* A memory access costs 1 insn if the mode is small, or the address is
7272 a single register, otherwise it costs one insn per word. */
7273 if (REG_P (XEXP (x
, 0)))
7274 *total
= COSTS_N_INSNS (1);
7276 && GET_CODE (XEXP (x
, 0)) == PLUS
7277 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
7278 /* This will be split into two instructions.
7279 See arm.md:calculate_pic_address. */
7280 *total
= COSTS_N_INSNS (2);
7282 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7289 /* Needs a libcall, so it costs about this. */
7290 *total
= COSTS_N_INSNS (2);
7294 if (mode
== SImode
&& GET_CODE (XEXP (x
, 1)) == REG
)
7296 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, false);
7304 if (mode
== DImode
&& GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7306 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, false);
7309 else if (mode
== SImode
)
7311 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, false);
7312 /* Slightly disparage register shifts, but not by much. */
7313 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7314 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, false);
7318 /* Needs a libcall. */
7319 *total
= COSTS_N_INSNS (2);
7323 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7324 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7326 *total
= COSTS_N_INSNS (1);
7332 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
7333 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
7335 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
7336 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
7337 || subcode1
== ROTATE
|| subcode1
== ROTATERT
7338 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
7339 || subcode1
== ASHIFTRT
)
7341 /* It's just the cost of the two operands. */
7346 *total
= COSTS_N_INSNS (1);
7350 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7354 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7355 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7357 *total
= COSTS_N_INSNS (1);
7361 /* A shift as a part of ADD costs nothing. */
7362 if (GET_CODE (XEXP (x
, 0)) == MULT
7363 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7365 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
7366 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, false);
7367 *total
+= rtx_cost (XEXP (x
, 1), code
, false);
7372 case AND
: case XOR
: case IOR
:
7375 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
7377 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
7378 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
7379 || (code
== AND
&& subcode
== NOT
))
7381 /* It's just the cost of the two operands. */
7387 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7391 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7395 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7396 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7398 *total
= COSTS_N_INSNS (1);
7404 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7413 if (cc_register (XEXP (x
, 0), VOIDmode
))
7416 *total
= COSTS_N_INSNS (1);
7420 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7421 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7422 *total
= COSTS_N_INSNS (1);
7424 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
7429 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
7432 if (const_ok_for_arm (INTVAL (x
)))
7433 /* A multiplication by a constant requires another instruction
7434 to load the constant to a register. */
7435 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
7437 else if (const_ok_for_arm (~INTVAL (x
)))
7438 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
7439 else if (const_ok_for_arm (-INTVAL (x
)))
7441 if (outer_code
== COMPARE
|| outer_code
== PLUS
7442 || outer_code
== MINUS
)
7445 *total
= COSTS_N_INSNS (1);
7448 *total
= COSTS_N_INSNS (2);
7454 *total
= COSTS_N_INSNS (2);
7458 *total
= COSTS_N_INSNS (4);
7463 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7464 cost of these slightly. */
7465 *total
= COSTS_N_INSNS (1) + 1;
7469 if (mode
!= VOIDmode
)
7470 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7472 *total
= COSTS_N_INSNS (4); /* How knows? */
7477 /* RTX costs when optimizing for size. */
7479 arm_rtx_costs (rtx x
, int code
, int outer_code
, int *total
,
7483 return arm_size_rtx_costs (x
, (enum rtx_code
) code
,
7484 (enum rtx_code
) outer_code
, total
);
7486 return current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
7487 (enum rtx_code
) outer_code
,
7491 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7492 supported on any "slowmul" cores, so it can be ignored. */
7495 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7496 int *total
, bool speed
)
7498 enum machine_mode mode
= GET_MODE (x
);
7502 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7509 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
7512 *total
= COSTS_N_INSNS (20);
7516 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7518 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7519 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7520 int cost
, const_ok
= const_ok_for_arm (i
);
7521 int j
, booth_unit_size
;
7523 /* Tune as appropriate. */
7524 cost
= const_ok
? 4 : 8;
7525 booth_unit_size
= 2;
7526 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7528 i
>>= booth_unit_size
;
7532 *total
= COSTS_N_INSNS (cost
);
7533 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7537 *total
= COSTS_N_INSNS (20);
7541 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
7546 /* RTX cost for cores with a fast multiply unit (M variants). */
7549 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7550 int *total
, bool speed
)
7552 enum machine_mode mode
= GET_MODE (x
);
7556 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7560 /* ??? should thumb2 use different costs? */
7564 /* There is no point basing this on the tuning, since it is always the
7565 fast variant if it exists at all. */
7567 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7568 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7569 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7571 *total
= COSTS_N_INSNS(2);
7578 *total
= COSTS_N_INSNS (5);
7582 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7584 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7585 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7586 int cost
, const_ok
= const_ok_for_arm (i
);
7587 int j
, booth_unit_size
;
7589 /* Tune as appropriate. */
7590 cost
= const_ok
? 4 : 8;
7591 booth_unit_size
= 8;
7592 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7594 i
>>= booth_unit_size
;
7598 *total
= COSTS_N_INSNS(cost
);
7604 *total
= COSTS_N_INSNS (4);
7608 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7610 if (TARGET_HARD_FLOAT
7612 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7614 *total
= COSTS_N_INSNS (1);
7619 /* Requires a lib call */
7620 *total
= COSTS_N_INSNS (20);
7624 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7629 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7630 so it can be ignored. */
7633 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7634 int *total
, bool speed
)
7636 enum machine_mode mode
= GET_MODE (x
);
7640 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7647 if (GET_CODE (XEXP (x
, 0)) != MULT
)
7648 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7650 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7651 will stall until the multiplication is complete. */
7652 *total
= COSTS_N_INSNS (3);
7656 /* There is no point basing this on the tuning, since it is always the
7657 fast variant if it exists at all. */
7659 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7660 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7661 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7663 *total
= COSTS_N_INSNS (2);
7670 *total
= COSTS_N_INSNS (5);
7674 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7676 /* If operand 1 is a constant we can more accurately
7677 calculate the cost of the multiply. The multiplier can
7678 retire 15 bits on the first cycle and a further 12 on the
7679 second. We do, of course, have to load the constant into
7680 a register first. */
7681 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7682 /* There's a general overhead of one cycle. */
7684 unsigned HOST_WIDE_INT masked_const
;
7689 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
7691 masked_const
= i
& 0xffff8000;
7692 if (masked_const
!= 0)
7695 masked_const
= i
& 0xf8000000;
7696 if (masked_const
!= 0)
7699 *total
= COSTS_N_INSNS (cost
);
7705 *total
= COSTS_N_INSNS (3);
7709 /* Requires a lib call */
7710 *total
= COSTS_N_INSNS (20);
7714 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7719 /* RTX costs for 9e (and later) cores. */
7722 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7723 int *total
, bool speed
)
7725 enum machine_mode mode
= GET_MODE (x
);
7732 *total
= COSTS_N_INSNS (3);
7736 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7744 /* There is no point basing this on the tuning, since it is always the
7745 fast variant if it exists at all. */
7747 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7748 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7749 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7751 *total
= COSTS_N_INSNS (2);
7758 *total
= COSTS_N_INSNS (5);
7764 *total
= COSTS_N_INSNS (2);
7768 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7770 if (TARGET_HARD_FLOAT
7772 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7774 *total
= COSTS_N_INSNS (1);
7779 *total
= COSTS_N_INSNS (20);
7783 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7786 /* All address computations that can be done are free, but rtx cost returns
7787 the same for practically all of them. So we weight the different types
7788 of address here in the order (most pref first):
7789 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7791 arm_arm_address_cost (rtx x
)
7793 enum rtx_code c
= GET_CODE (x
);
7795 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
7797 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
7802 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7805 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
7815 arm_thumb_address_cost (rtx x
)
7817 enum rtx_code c
= GET_CODE (x
);
7822 && GET_CODE (XEXP (x
, 0)) == REG
7823 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7830 arm_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
7832 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
7835 /* Adjust cost hook for XScale. */
7837 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
7839 /* Some true dependencies can have a higher cost depending
7840 on precisely how certain input operands are used. */
7841 if (REG_NOTE_KIND(link
) == 0
7842 && recog_memoized (insn
) >= 0
7843 && recog_memoized (dep
) >= 0)
7845 int shift_opnum
= get_attr_shift (insn
);
7846 enum attr_type attr_type
= get_attr_type (dep
);
7848 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7849 operand for INSN. If we have a shifted input operand and the
7850 instruction we depend on is another ALU instruction, then we may
7851 have to account for an additional stall. */
7852 if (shift_opnum
!= 0
7853 && (attr_type
== TYPE_ALU_SHIFT
|| attr_type
== TYPE_ALU_SHIFT_REG
))
7855 rtx shifted_operand
;
7858 /* Get the shifted operand. */
7859 extract_insn (insn
);
7860 shifted_operand
= recog_data
.operand
[shift_opnum
];
7862 /* Iterate over all the operands in DEP. If we write an operand
7863 that overlaps with SHIFTED_OPERAND, then we have increase the
7864 cost of this dependency. */
7866 preprocess_constraints ();
7867 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
7869 /* We can ignore strict inputs. */
7870 if (recog_data
.operand_type
[opno
] == OP_IN
)
7873 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
7885 /* Adjust cost hook for Cortex A9. */
7887 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
7889 switch (REG_NOTE_KIND (link
))
7896 case REG_DEP_OUTPUT
:
7897 if (recog_memoized (insn
) >= 0
7898 && recog_memoized (dep
) >= 0)
7900 if (GET_CODE (PATTERN (insn
)) == SET
)
7903 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
7905 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
7907 enum attr_type attr_type_insn
= get_attr_type (insn
);
7908 enum attr_type attr_type_dep
= get_attr_type (dep
);
7910 /* By default all dependencies of the form
7913 have an extra latency of 1 cycle because
7914 of the input and output dependency in this
7915 case. However this gets modeled as an true
7916 dependency and hence all these checks. */
7917 if (REG_P (SET_DEST (PATTERN (insn
)))
7918 && REG_P (SET_DEST (PATTERN (dep
)))
7919 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
7920 SET_DEST (PATTERN (dep
))))
7922 /* FMACS is a special case where the dependant
7923 instruction can be issued 3 cycles before
7924 the normal latency in case of an output
7926 if ((attr_type_insn
== TYPE_FMACS
7927 || attr_type_insn
== TYPE_FMACD
)
7928 && (attr_type_dep
== TYPE_FMACS
7929 || attr_type_dep
== TYPE_FMACD
))
7931 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
7932 *cost
= insn_default_latency (dep
) - 3;
7934 *cost
= insn_default_latency (dep
);
7939 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
7940 *cost
= insn_default_latency (dep
) + 1;
7942 *cost
= insn_default_latency (dep
);
7958 /* Adjust cost hook for FA726TE. */
7960 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
7962 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
7963 have penalty of 3. */
7964 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
7965 && recog_memoized (insn
) >= 0
7966 && recog_memoized (dep
) >= 0
7967 && get_attr_conds (dep
) == CONDS_SET
)
7969 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
7970 if (get_attr_conds (insn
) == CONDS_USE
7971 && get_attr_type (insn
) != TYPE_BRANCH
)
7977 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
7978 || get_attr_conds (insn
) == CONDS_USE
)
7988 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
7989 It corrects the value of COST based on the relationship between
7990 INSN and DEP through the dependence LINK. It returns the new
7991 value. There is a per-core adjust_cost hook to adjust scheduler costs
7992 and the per-core hook can choose to completely override the generic
7993 adjust_cost function. Only put bits of code into arm_adjust_cost that
7994 are common across all cores. */
7996 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
8000 /* When generating Thumb-1 code, we want to place flag-setting operations
8001 close to a conditional branch which depends on them, so that we can
8002 omit the comparison. */
8004 && REG_NOTE_KIND (link
) == 0
8005 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
8006 && recog_memoized (dep
) >= 0
8007 && get_attr_conds (dep
) == CONDS_SET
)
8010 if (current_tune
->sched_adjust_cost
!= NULL
)
8012 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
8016 /* XXX This is not strictly true for the FPA. */
8017 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8018 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8021 /* Call insns don't incur a stall, even if they follow a load. */
8022 if (REG_NOTE_KIND (link
) == 0
8023 && GET_CODE (insn
) == CALL_INSN
)
8026 if ((i_pat
= single_set (insn
)) != NULL
8027 && GET_CODE (SET_SRC (i_pat
)) == MEM
8028 && (d_pat
= single_set (dep
)) != NULL
8029 && GET_CODE (SET_DEST (d_pat
)) == MEM
)
8031 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
8032 /* This is a load after a store, there is no conflict if the load reads
8033 from a cached area. Assume that loads from the stack, and from the
8034 constant pool are cached, and that others will miss. This is a
8037 if ((GET_CODE (src_mem
) == SYMBOL_REF
8038 && CONSTANT_POOL_ADDRESS_P (src_mem
))
8039 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
8040 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
8041 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
8049 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
8052 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
8054 return (optimize
> 0) ? 2 : 0;
8057 static int fp_consts_inited
= 0;
8059 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8060 static const char * const strings_fp
[8] =
8063 "4", "5", "0.5", "10"
8066 static REAL_VALUE_TYPE values_fp
[8];
8069 init_fp_table (void)
8075 fp_consts_inited
= 1;
8077 fp_consts_inited
= 8;
8079 for (i
= 0; i
< fp_consts_inited
; i
++)
8081 r
= REAL_VALUE_ATOF (strings_fp
[i
], DFmode
);
8086 /* Return TRUE if rtx X is a valid immediate FP constant. */
8088 arm_const_double_rtx (rtx x
)
8093 if (!fp_consts_inited
)
8096 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8097 if (REAL_VALUE_MINUS_ZERO (r
))
8100 for (i
= 0; i
< fp_consts_inited
; i
++)
8101 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
8107 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8109 neg_const_double_rtx_ok_for_fpa (rtx x
)
8114 if (!fp_consts_inited
)
8117 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8118 r
= real_value_negate (&r
);
8119 if (REAL_VALUE_MINUS_ZERO (r
))
8122 for (i
= 0; i
< 8; i
++)
8123 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
8130 /* VFPv3 has a fairly wide range of representable immediates, formed from
8131 "quarter-precision" floating-point values. These can be evaluated using this
8132 formula (with ^ for exponentiation):
8136 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8137 16 <= n <= 31 and 0 <= r <= 7.
8139 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8141 - A (most-significant) is the sign bit.
8142 - BCD are the exponent (encoded as r XOR 3).
8143 - EFGH are the mantissa (encoded as n - 16).
8146 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8147 fconst[sd] instruction, or -1 if X isn't suitable. */
8149 vfp3_const_double_index (rtx x
)
8151 REAL_VALUE_TYPE r
, m
;
8153 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
8154 unsigned HOST_WIDE_INT mask
;
8155 HOST_WIDE_INT m1
, m2
;
8156 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8158 if (!TARGET_VFP3
|| GET_CODE (x
) != CONST_DOUBLE
)
8161 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8163 /* We can't represent these things, so detect them first. */
8164 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
8167 /* Extract sign, exponent and mantissa. */
8168 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
8169 r
= real_value_abs (&r
);
8170 exponent
= REAL_EXP (&r
);
8171 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8172 highest (sign) bit, with a fixed binary point at bit point_pos.
8173 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8174 bits for the mantissa, this may fail (low bits would be lost). */
8175 real_ldexp (&m
, &r
, point_pos
- exponent
);
8176 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
8180 /* If there are bits set in the low part of the mantissa, we can't
8181 represent this value. */
8185 /* Now make it so that mantissa contains the most-significant bits, and move
8186 the point_pos to indicate that the least-significant bits have been
8188 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8191 /* We can permit four significant bits of mantissa only, plus a high bit
8192 which is always 1. */
8193 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8194 if ((mantissa
& mask
) != 0)
8197 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8198 mantissa
>>= point_pos
- 5;
8200 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8201 floating-point immediate zero with Neon using an integer-zero load, but
8202 that case is handled elsewhere.) */
8206 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
8208 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8209 normalized significands are in the range [1, 2). (Our mantissa is shifted
8210 left 4 places at this point relative to normalized IEEE754 values). GCC
8211 internally uses [0.5, 1) (see real.c), so the exponent returned from
8212 REAL_EXP must be altered. */
8213 exponent
= 5 - exponent
;
8215 if (exponent
< 0 || exponent
> 7)
8218 /* Sign, mantissa and exponent are now in the correct form to plug into the
8219 formula described in the comment above. */
8220 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
8223 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8225 vfp3_const_double_rtx (rtx x
)
8230 return vfp3_const_double_index (x
) != -1;
8233 /* Recognize immediates which can be used in various Neon instructions. Legal
8234 immediates are described by the following table (for VMVN variants, the
8235 bitwise inverse of the constant shown is recognized. In either case, VMOV
8236 is output and the correct instruction to use for a given constant is chosen
8237 by the assembler). The constant shown is replicated across all elements of
8238 the destination vector.
8240 insn elems variant constant (binary)
8241 ---- ----- ------- -----------------
8242 vmov i32 0 00000000 00000000 00000000 abcdefgh
8243 vmov i32 1 00000000 00000000 abcdefgh 00000000
8244 vmov i32 2 00000000 abcdefgh 00000000 00000000
8245 vmov i32 3 abcdefgh 00000000 00000000 00000000
8246 vmov i16 4 00000000 abcdefgh
8247 vmov i16 5 abcdefgh 00000000
8248 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8249 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8250 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8251 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8252 vmvn i16 10 00000000 abcdefgh
8253 vmvn i16 11 abcdefgh 00000000
8254 vmov i32 12 00000000 00000000 abcdefgh 11111111
8255 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8256 vmov i32 14 00000000 abcdefgh 11111111 11111111
8257 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8259 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8260 eeeeeeee ffffffff gggggggg hhhhhhhh
8261 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8263 For case 18, B = !b. Representable values are exactly those accepted by
8264 vfp3_const_double_index, but are output as floating-point numbers rather
8267 Variants 0-5 (inclusive) may also be used as immediates for the second
8268 operand of VORR/VBIC instructions.
8270 The INVERSE argument causes the bitwise inverse of the given operand to be
8271 recognized instead (used for recognizing legal immediates for the VAND/VORN
8272 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8273 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8274 output, rather than the real insns vbic/vorr).
8276 INVERSE makes no difference to the recognition of float vectors.
8278 The return value is the variant of immediate as shown in the above table, or
8279 -1 if the given value doesn't match any of the listed patterns.
8282 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
8283 rtx
*modconst
, int *elementwidth
)
8285 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8287 for (i = 0; i < idx; i += (STRIDE)) \
8292 immtype = (CLASS); \
8293 elsize = (ELSIZE); \
8297 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
8298 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8299 unsigned char bytes
[16];
8300 int immtype
= -1, matches
;
8301 unsigned int invmask
= inverse
? 0xff : 0;
8303 /* Vectors of float constants. */
8304 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8306 rtx el0
= CONST_VECTOR_ELT (op
, 0);
8309 if (!vfp3_const_double_rtx (el0
))
8312 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
8314 for (i
= 1; i
< n_elts
; i
++)
8316 rtx elt
= CONST_VECTOR_ELT (op
, i
);
8319 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
8321 if (!REAL_VALUES_EQUAL (r0
, re
))
8326 *modconst
= CONST_VECTOR_ELT (op
, 0);
8334 /* Splat vector constant out into a byte vector. */
8335 for (i
= 0; i
< n_elts
; i
++)
8337 rtx el
= CONST_VECTOR_ELT (op
, i
);
8338 unsigned HOST_WIDE_INT elpart
;
8339 unsigned int part
, parts
;
8341 if (GET_CODE (el
) == CONST_INT
)
8343 elpart
= INTVAL (el
);
8346 else if (GET_CODE (el
) == CONST_DOUBLE
)
8348 elpart
= CONST_DOUBLE_LOW (el
);
8354 for (part
= 0; part
< parts
; part
++)
8357 for (byte
= 0; byte
< innersize
; byte
++)
8359 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8360 elpart
>>= BITS_PER_UNIT
;
8362 if (GET_CODE (el
) == CONST_DOUBLE
)
8363 elpart
= CONST_DOUBLE_HIGH (el
);
8368 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8372 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8373 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8375 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8376 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8378 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8379 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8381 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8382 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
8384 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
8386 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
8388 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8389 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8391 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8392 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8394 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8395 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8397 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8398 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
8400 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
8402 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
8404 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8405 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8407 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8408 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8410 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8411 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8413 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8414 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8416 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
8418 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8419 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
8427 *elementwidth
= elsize
;
8431 unsigned HOST_WIDE_INT imm
= 0;
8433 /* Un-invert bytes of recognized vector, if necessary. */
8435 for (i
= 0; i
< idx
; i
++)
8436 bytes
[i
] ^= invmask
;
8440 /* FIXME: Broken on 32-bit H_W_I hosts. */
8441 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8443 for (i
= 0; i
< 8; i
++)
8444 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8445 << (i
* BITS_PER_UNIT
);
8447 *modconst
= GEN_INT (imm
);
8451 unsigned HOST_WIDE_INT imm
= 0;
8453 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8454 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8456 *modconst
= GEN_INT (imm
);
8464 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8465 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8466 float elements), and a modified constant (whatever should be output for a
8467 VMOV) in *MODCONST. */
8470 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
8471 rtx
*modconst
, int *elementwidth
)
8475 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
8481 *modconst
= tmpconst
;
8484 *elementwidth
= tmpwidth
;
8489 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8490 the immediate is valid, write a constant suitable for using as an operand
8491 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8492 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8495 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
8496 rtx
*modconst
, int *elementwidth
)
8500 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
8502 if (retval
< 0 || retval
> 5)
8506 *modconst
= tmpconst
;
8509 *elementwidth
= tmpwidth
;
8514 /* Return a string suitable for output of Neon immediate logic operation
8518 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
8519 int inverse
, int quad
)
8521 int width
, is_valid
;
8522 static char templ
[40];
8524 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
8526 gcc_assert (is_valid
!= 0);
8529 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
8531 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
8536 /* Output a sequence of pairwise operations to implement a reduction.
8537 NOTE: We do "too much work" here, because pairwise operations work on two
8538 registers-worth of operands in one go. Unfortunately we can't exploit those
8539 extra calculations to do the full operation in fewer steps, I don't think.
8540 Although all vector elements of the result but the first are ignored, we
8541 actually calculate the same result in each of the elements. An alternative
8542 such as initially loading a vector with zero to use as each of the second
8543 operands would use up an additional register and take an extra instruction,
8544 for no particular gain. */
8547 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
8548 rtx (*reduc
) (rtx
, rtx
, rtx
))
8550 enum machine_mode inner
= GET_MODE_INNER (mode
);
8551 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
8554 for (i
= parts
/ 2; i
>= 1; i
/= 2)
8556 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
8557 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
8562 /* If VALS is a vector constant that can be loaded into a register
8563 using VDUP, generate instructions to do so and return an RTX to
8564 assign to the register. Otherwise return NULL_RTX. */
8567 neon_vdup_constant (rtx vals
)
8569 enum machine_mode mode
= GET_MODE (vals
);
8570 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8571 int n_elts
= GET_MODE_NUNITS (mode
);
8572 bool all_same
= true;
8576 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
8579 for (i
= 0; i
< n_elts
; ++i
)
8581 x
= XVECEXP (vals
, 0, i
);
8582 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8587 /* The elements are not all the same. We could handle repeating
8588 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8589 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8593 /* We can load this constant by using VDUP and a constant in a
8594 single ARM register. This will be cheaper than a vector
8597 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8598 return gen_rtx_VEC_DUPLICATE (mode
, x
);
8601 /* Generate code to load VALS, which is a PARALLEL containing only
8602 constants (for vec_init) or CONST_VECTOR, efficiently into a
8603 register. Returns an RTX to copy into the register, or NULL_RTX
8604 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8607 neon_make_constant (rtx vals
)
8609 enum machine_mode mode
= GET_MODE (vals
);
8611 rtx const_vec
= NULL_RTX
;
8612 int n_elts
= GET_MODE_NUNITS (mode
);
8616 if (GET_CODE (vals
) == CONST_VECTOR
)
8618 else if (GET_CODE (vals
) == PARALLEL
)
8620 /* A CONST_VECTOR must contain only CONST_INTs and
8621 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8622 Only store valid constants in a CONST_VECTOR. */
8623 for (i
= 0; i
< n_elts
; ++i
)
8625 rtx x
= XVECEXP (vals
, 0, i
);
8626 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
8629 if (n_const
== n_elts
)
8630 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8635 if (const_vec
!= NULL
8636 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
8637 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8639 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
8640 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8641 pipeline cycle; creating the constant takes one or two ARM
8644 else if (const_vec
!= NULL_RTX
)
8645 /* Load from constant pool. On Cortex-A8 this takes two cycles
8646 (for either double or quad vectors). We can not take advantage
8647 of single-cycle VLD1 because we need a PC-relative addressing
8651 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8652 We can not construct an initializer. */
8656 /* Initialize vector TARGET to VALS. */
8659 neon_expand_vector_init (rtx target
, rtx vals
)
8661 enum machine_mode mode
= GET_MODE (target
);
8662 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8663 int n_elts
= GET_MODE_NUNITS (mode
);
8664 int n_var
= 0, one_var
= -1;
8665 bool all_same
= true;
8669 for (i
= 0; i
< n_elts
; ++i
)
8671 x
= XVECEXP (vals
, 0, i
);
8672 if (!CONSTANT_P (x
))
8673 ++n_var
, one_var
= i
;
8675 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8681 rtx constant
= neon_make_constant (vals
);
8682 if (constant
!= NULL_RTX
)
8684 emit_move_insn (target
, constant
);
8689 /* Splat a single non-constant element if we can. */
8690 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
8692 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8693 emit_insn (gen_rtx_SET (VOIDmode
, target
,
8694 gen_rtx_VEC_DUPLICATE (mode
, x
)));
8698 /* One field is non-constant. Load constant then overwrite varying
8699 field. This is more efficient than using the stack. */
8702 rtx copy
= copy_rtx (vals
);
8703 rtx index
= GEN_INT (one_var
);
8705 /* Load constant part of vector, substitute neighboring value for
8707 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
8708 neon_expand_vector_init (target
, copy
);
8710 /* Insert variable. */
8711 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8715 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
8718 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
8721 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
8724 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
8727 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
8730 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
8733 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
8736 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
8739 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
8747 /* Construct the vector in memory one field at a time
8748 and load the whole vector. */
8749 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), 0);
8750 for (i
= 0; i
< n_elts
; i
++)
8751 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8752 i
* GET_MODE_SIZE (inner_mode
)),
8753 XVECEXP (vals
, 0, i
));
8754 emit_move_insn (target
, mem
);
8757 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8758 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8759 reported source locations are bogus. */
8762 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
8767 gcc_assert (GET_CODE (operand
) == CONST_INT
);
8769 lane
= INTVAL (operand
);
8771 if (lane
< low
|| lane
>= high
)
8775 /* Bounds-check lanes. */
8778 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8780 bounds_check (operand
, low
, high
, "lane out of range");
8783 /* Bounds-check constants. */
8786 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8788 bounds_check (operand
, low
, high
, "constant out of range");
8792 neon_element_bits (enum machine_mode mode
)
8795 return GET_MODE_BITSIZE (mode
);
8797 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
8801 /* Predicates for `match_operand' and `match_operator'. */
8803 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8805 cirrus_memory_offset (rtx op
)
8807 /* Reject eliminable registers. */
8808 if (! (reload_in_progress
|| reload_completed
)
8809 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8810 || reg_mentioned_p (arg_pointer_rtx
, op
)
8811 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8812 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8813 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8814 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8817 if (GET_CODE (op
) == MEM
)
8823 /* Match: (mem (reg)). */
8824 if (GET_CODE (ind
) == REG
)
8830 if (GET_CODE (ind
) == PLUS
8831 && GET_CODE (XEXP (ind
, 0)) == REG
8832 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8833 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
)
8840 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8841 WB is true if full writeback address modes are allowed and is false
8842 if limited writeback address modes (POST_INC and PRE_DEC) are
8846 arm_coproc_mem_operand (rtx op
, bool wb
)
8850 /* Reject eliminable registers. */
8851 if (! (reload_in_progress
|| reload_completed
)
8852 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8853 || reg_mentioned_p (arg_pointer_rtx
, op
)
8854 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8855 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8856 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8857 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8860 /* Constants are converted into offsets from labels. */
8861 if (GET_CODE (op
) != MEM
)
8866 if (reload_completed
8867 && (GET_CODE (ind
) == LABEL_REF
8868 || (GET_CODE (ind
) == CONST
8869 && GET_CODE (XEXP (ind
, 0)) == PLUS
8870 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8871 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8874 /* Match: (mem (reg)). */
8875 if (GET_CODE (ind
) == REG
)
8876 return arm_address_register_rtx_p (ind
, 0);
8878 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8879 acceptable in any case (subject to verification by
8880 arm_address_register_rtx_p). We need WB to be true to accept
8881 PRE_INC and POST_DEC. */
8882 if (GET_CODE (ind
) == POST_INC
8883 || GET_CODE (ind
) == PRE_DEC
8885 && (GET_CODE (ind
) == PRE_INC
8886 || GET_CODE (ind
) == POST_DEC
)))
8887 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
8890 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
8891 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
8892 && GET_CODE (XEXP (ind
, 1)) == PLUS
8893 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
8894 ind
= XEXP (ind
, 1);
8899 if (GET_CODE (ind
) == PLUS
8900 && GET_CODE (XEXP (ind
, 0)) == REG
8901 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8902 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
8903 && INTVAL (XEXP (ind
, 1)) > -1024
8904 && INTVAL (XEXP (ind
, 1)) < 1024
8905 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
8911 /* Return TRUE if OP is a memory operand which we can load or store a vector
8912 to/from. TYPE is one of the following values:
8913 0 - Vector load/stor (vldr)
8914 1 - Core registers (ldm)
8915 2 - Element/structure loads (vld1)
8918 neon_vector_mem_operand (rtx op
, int type
)
8922 /* Reject eliminable registers. */
8923 if (! (reload_in_progress
|| reload_completed
)
8924 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8925 || reg_mentioned_p (arg_pointer_rtx
, op
)
8926 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8927 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8928 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8929 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8932 /* Constants are converted into offsets from labels. */
8933 if (GET_CODE (op
) != MEM
)
8938 if (reload_completed
8939 && (GET_CODE (ind
) == LABEL_REF
8940 || (GET_CODE (ind
) == CONST
8941 && GET_CODE (XEXP (ind
, 0)) == PLUS
8942 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8943 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8946 /* Match: (mem (reg)). */
8947 if (GET_CODE (ind
) == REG
)
8948 return arm_address_register_rtx_p (ind
, 0);
8950 /* Allow post-increment with Neon registers. */
8951 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
8952 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
8953 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
8955 /* FIXME: vld1 allows register post-modify. */
8961 && GET_CODE (ind
) == PLUS
8962 && GET_CODE (XEXP (ind
, 0)) == REG
8963 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8964 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
8965 && INTVAL (XEXP (ind
, 1)) > -1024
8966 && INTVAL (XEXP (ind
, 1)) < 1016
8967 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
8973 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8976 neon_struct_mem_operand (rtx op
)
8980 /* Reject eliminable registers. */
8981 if (! (reload_in_progress
|| reload_completed
)
8982 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8983 || reg_mentioned_p (arg_pointer_rtx
, op
)
8984 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8985 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8986 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8987 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8990 /* Constants are converted into offsets from labels. */
8991 if (GET_CODE (op
) != MEM
)
8996 if (reload_completed
8997 && (GET_CODE (ind
) == LABEL_REF
8998 || (GET_CODE (ind
) == CONST
8999 && GET_CODE (XEXP (ind
, 0)) == PLUS
9000 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9001 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
9004 /* Match: (mem (reg)). */
9005 if (GET_CODE (ind
) == REG
)
9006 return arm_address_register_rtx_p (ind
, 0);
9008 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9009 if (GET_CODE (ind
) == POST_INC
9010 || GET_CODE (ind
) == PRE_DEC
)
9011 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9016 /* Return true if X is a register that will be eliminated later on. */
9018 arm_eliminable_register (rtx x
)
9020 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
9021 || REGNO (x
) == ARG_POINTER_REGNUM
9022 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
9023 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
9026 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9027 coprocessor registers. Otherwise return NO_REGS. */
9030 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
9034 if (!TARGET_NEON_FP16
)
9035 return GENERAL_REGS
;
9036 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2))
9038 return GENERAL_REGS
;
9041 /* The neon move patterns handle all legitimate vector and struct
9045 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
9046 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
9047 || VALID_NEON_STRUCT_MODE (mode
)))
9050 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
9053 return GENERAL_REGS
;
9056 /* Values which must be returned in the most-significant end of the return
9060 arm_return_in_msb (const_tree valtype
)
9062 return (TARGET_AAPCS_BASED
9064 && (AGGREGATE_TYPE_P (valtype
)
9065 || TREE_CODE (valtype
) == COMPLEX_TYPE
));
9068 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9069 Use by the Cirrus Maverick code which has to workaround
9070 a hardware bug triggered by such instructions. */
9072 arm_memory_load_p (rtx insn
)
9074 rtx body
, lhs
, rhs
;;
9076 if (insn
== NULL_RTX
|| GET_CODE (insn
) != INSN
)
9079 body
= PATTERN (insn
);
9081 if (GET_CODE (body
) != SET
)
9084 lhs
= XEXP (body
, 0);
9085 rhs
= XEXP (body
, 1);
9087 lhs
= REG_OR_SUBREG_RTX (lhs
);
9089 /* If the destination is not a general purpose
9090 register we do not have to worry. */
9091 if (GET_CODE (lhs
) != REG
9092 || REGNO_REG_CLASS (REGNO (lhs
)) != GENERAL_REGS
)
9095 /* As well as loads from memory we also have to react
9096 to loads of invalid constants which will be turned
9097 into loads from the minipool. */
9098 return (GET_CODE (rhs
) == MEM
9099 || GET_CODE (rhs
) == SYMBOL_REF
9100 || note_invalid_constants (insn
, -1, false));
9103 /* Return TRUE if INSN is a Cirrus instruction. */
9105 arm_cirrus_insn_p (rtx insn
)
9107 enum attr_cirrus attr
;
9109 /* get_attr cannot accept USE or CLOBBER. */
9111 || GET_CODE (insn
) != INSN
9112 || GET_CODE (PATTERN (insn
)) == USE
9113 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
9116 attr
= get_attr_cirrus (insn
);
9118 return attr
!= CIRRUS_NOT
;
9121 /* Cirrus reorg for invalid instruction combinations. */
9123 cirrus_reorg (rtx first
)
9125 enum attr_cirrus attr
;
9126 rtx body
= PATTERN (first
);
9130 /* Any branch must be followed by 2 non Cirrus instructions. */
9131 if (GET_CODE (first
) == JUMP_INSN
&& GET_CODE (body
) != RETURN
)
9134 t
= next_nonnote_insn (first
);
9136 if (arm_cirrus_insn_p (t
))
9139 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
9143 emit_insn_after (gen_nop (), first
);
9148 /* (float (blah)) is in parallel with a clobber. */
9149 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
9150 body
= XVECEXP (body
, 0, 0);
9152 if (GET_CODE (body
) == SET
)
9154 rtx lhs
= XEXP (body
, 0), rhs
= XEXP (body
, 1);
9156 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9157 be followed by a non Cirrus insn. */
9158 if (get_attr_cirrus (first
) == CIRRUS_DOUBLE
)
9160 if (arm_cirrus_insn_p (next_nonnote_insn (first
)))
9161 emit_insn_after (gen_nop (), first
);
9165 else if (arm_memory_load_p (first
))
9167 unsigned int arm_regno
;
9169 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9170 ldr/cfmv64hr combination where the Rd field is the same
9171 in both instructions must be split with a non Cirrus
9178 /* Get Arm register number for ldr insn. */
9179 if (GET_CODE (lhs
) == REG
)
9180 arm_regno
= REGNO (lhs
);
9183 gcc_assert (GET_CODE (rhs
) == REG
);
9184 arm_regno
= REGNO (rhs
);
9188 first
= next_nonnote_insn (first
);
9190 if (! arm_cirrus_insn_p (first
))
9193 body
= PATTERN (first
);
9195 /* (float (blah)) is in parallel with a clobber. */
9196 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0))
9197 body
= XVECEXP (body
, 0, 0);
9199 if (GET_CODE (body
) == FLOAT
)
9200 body
= XEXP (body
, 0);
9202 if (get_attr_cirrus (first
) == CIRRUS_MOVE
9203 && GET_CODE (XEXP (body
, 1)) == REG
9204 && arm_regno
== REGNO (XEXP (body
, 1)))
9205 emit_insn_after (gen_nop (), first
);
9211 /* get_attr cannot accept USE or CLOBBER. */
9213 || GET_CODE (first
) != INSN
9214 || GET_CODE (PATTERN (first
)) == USE
9215 || GET_CODE (PATTERN (first
)) == CLOBBER
)
9218 attr
= get_attr_cirrus (first
);
9220 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9221 must be followed by a non-coprocessor instruction. */
9222 if (attr
== CIRRUS_COMPARE
)
9226 t
= next_nonnote_insn (first
);
9228 if (arm_cirrus_insn_p (t
))
9231 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
9235 emit_insn_after (gen_nop (), first
);
9241 /* Return TRUE if X references a SYMBOL_REF. */
9243 symbol_mentioned_p (rtx x
)
9248 if (GET_CODE (x
) == SYMBOL_REF
)
9251 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9252 are constant offsets, not symbols. */
9253 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9256 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9258 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9264 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9265 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
9268 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
9275 /* Return TRUE if X references a LABEL_REF. */
9277 label_mentioned_p (rtx x
)
9282 if (GET_CODE (x
) == LABEL_REF
)
9285 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9286 instruction, but they are constant offsets, not symbols. */
9287 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9290 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9291 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9297 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9298 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
9301 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
9309 tls_mentioned_p (rtx x
)
9311 switch (GET_CODE (x
))
9314 return tls_mentioned_p (XEXP (x
, 0));
9317 if (XINT (x
, 1) == UNSPEC_TLS
)
9325 /* Must not copy any rtx that uses a pc-relative address. */
9328 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
9330 if (GET_CODE (*x
) == UNSPEC
9331 && XINT (*x
, 1) == UNSPEC_PIC_BASE
)
9337 arm_cannot_copy_insn_p (rtx insn
)
9339 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
9345 enum rtx_code code
= GET_CODE (x
);
9362 /* Return 1 if memory locations are adjacent. */
9364 adjacent_mem_locations (rtx a
, rtx b
)
9366 /* We don't guarantee to preserve the order of these memory refs. */
9367 if (volatile_refs_p (a
) || volatile_refs_p (b
))
9370 if ((GET_CODE (XEXP (a
, 0)) == REG
9371 || (GET_CODE (XEXP (a
, 0)) == PLUS
9372 && GET_CODE (XEXP (XEXP (a
, 0), 1)) == CONST_INT
))
9373 && (GET_CODE (XEXP (b
, 0)) == REG
9374 || (GET_CODE (XEXP (b
, 0)) == PLUS
9375 && GET_CODE (XEXP (XEXP (b
, 0), 1)) == CONST_INT
)))
9377 HOST_WIDE_INT val0
= 0, val1
= 0;
9381 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
9383 reg0
= XEXP (XEXP (a
, 0), 0);
9384 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
9389 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
9391 reg1
= XEXP (XEXP (b
, 0), 0);
9392 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
9397 /* Don't accept any offset that will require multiple
9398 instructions to handle, since this would cause the
9399 arith_adjacentmem pattern to output an overlong sequence. */
9400 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
9403 /* Don't allow an eliminable register: register elimination can make
9404 the offset too large. */
9405 if (arm_eliminable_register (reg0
))
9408 val_diff
= val1
- val0
;
9412 /* If the target has load delay slots, then there's no benefit
9413 to using an ldm instruction unless the offset is zero and
9414 we are optimizing for size. */
9415 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
9416 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
9417 && (val_diff
== 4 || val_diff
== -4));
9420 return ((REGNO (reg0
) == REGNO (reg1
))
9421 && (val_diff
== 4 || val_diff
== -4));
9427 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9428 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9429 instruction. ADD_OFFSET is nonzero if the base address register needs
9430 to be modified with an add instruction before we can use it. */
9433 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
9434 int nops
, HOST_WIDE_INT add_offset
)
9436 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9437 if the offset isn't small enough. The reason 2 ldrs are faster
9438 is because these ARMs are able to do more than one cache access
9439 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9440 whilst the ARM8 has a double bandwidth cache. This means that
9441 these cores can do both an instruction fetch and a data fetch in
9442 a single cycle, so the trick of calculating the address into a
9443 scratch register (one of the result regs) and then doing a load
9444 multiple actually becomes slower (and no smaller in code size).
9445 That is the transformation
9447 ldr rd1, [rbase + offset]
9448 ldr rd2, [rbase + offset + 4]
9452 add rd1, rbase, offset
9453 ldmia rd1, {rd1, rd2}
9455 produces worse code -- '3 cycles + any stalls on rd2' instead of
9456 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9457 access per cycle, the first sequence could never complete in less
9458 than 6 cycles, whereas the ldm sequence would only take 5 and
9459 would make better use of sequential accesses if not hitting the
9462 We cheat here and test 'arm_ld_sched' which we currently know to
9463 only be true for the ARM8, ARM9 and StrongARM. If this ever
9464 changes, then the test below needs to be reworked. */
9465 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
9468 /* XScale has load-store double instructions, but they have stricter
9469 alignment requirements than load-store multiple, so we cannot
9472 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9473 the pipeline until completion.
9481 An ldr instruction takes 1-3 cycles, but does not block the
9490 Best case ldr will always win. However, the more ldr instructions
9491 we issue, the less likely we are to be able to schedule them well.
9492 Using ldr instructions also increases code size.
9494 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9495 for counts of 3 or 4 regs. */
9496 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
9501 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9502 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9503 an array ORDER which describes the sequence to use when accessing the
9504 offsets that produces an ascending order. In this sequence, each
9505 offset must be larger by exactly 4 than the previous one. ORDER[0]
9506 must have been filled in with the lowest offset by the caller.
9507 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9508 we use to verify that ORDER produces an ascending order of registers.
9509 Return true if it was possible to construct such an order, false if
9513 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
9517 for (i
= 1; i
< nops
; i
++)
9521 order
[i
] = order
[i
- 1];
9522 for (j
= 0; j
< nops
; j
++)
9523 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
9525 /* We must find exactly one offset that is higher than the
9526 previous one by 4. */
9527 if (order
[i
] != order
[i
- 1])
9531 if (order
[i
] == order
[i
- 1])
9533 /* The register numbers must be ascending. */
9534 if (unsorted_regs
!= NULL
9535 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
9541 /* Used to determine in a peephole whether a sequence of load
9542 instructions can be changed into a load-multiple instruction.
9543 NOPS is the number of separate load instructions we are examining. The
9544 first NOPS entries in OPERANDS are the destination registers, the
9545 next NOPS entries are memory operands. If this function is
9546 successful, *BASE is set to the common base register of the memory
9547 accesses; *LOAD_OFFSET is set to the first memory location's offset
9548 from that base register.
9549 REGS is an array filled in with the destination register numbers.
9550 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9551 insn numbers to an ascending order of stores. If CHECK_REGS is true,
9552 the sequence of registers in REGS matches the loads from ascending memory
9553 locations, and the function verifies that the register numbers are
9554 themselves ascending. If CHECK_REGS is false, the register numbers
9555 are stored in the order they are found in the operands. */
9557 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
9558 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
9560 int unsorted_regs
[MAX_LDM_STM_OPS
];
9561 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
9562 int order
[MAX_LDM_STM_OPS
];
9563 rtx base_reg_rtx
= NULL
;
9567 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9568 easily extended if required. */
9569 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
9571 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
9573 /* Loop over the operands and check that the memory references are
9574 suitable (i.e. immediate offsets from the same base register). At
9575 the same time, extract the target register, and the memory
9577 for (i
= 0; i
< nops
; i
++)
9582 /* Convert a subreg of a mem into the mem itself. */
9583 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9584 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9586 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9588 /* Don't reorder volatile memory references; it doesn't seem worth
9589 looking for the case where the order is ok anyway. */
9590 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9593 offset
= const0_rtx
;
9595 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9596 || (GET_CODE (reg
) == SUBREG
9597 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9598 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9599 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9601 || (GET_CODE (reg
) == SUBREG
9602 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9603 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9608 base_reg
= REGNO (reg
);
9610 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
9613 else if (base_reg
!= (int) REGNO (reg
))
9614 /* Not addressed from the same base register. */
9617 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
9618 ? REGNO (operands
[i
])
9619 : REGNO (SUBREG_REG (operands
[i
])));
9621 /* If it isn't an integer register, or if it overwrites the
9622 base register but isn't the last insn in the list, then
9623 we can't do this. */
9624 if (unsorted_regs
[i
] < 0
9625 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
9626 || unsorted_regs
[i
] > 14
9627 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
9630 unsorted_offsets
[i
] = INTVAL (offset
);
9631 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
9635 /* Not a suitable memory address. */
9639 /* All the useful information has now been extracted from the
9640 operands into unsorted_regs and unsorted_offsets; additionally,
9641 order[0] has been set to the lowest offset in the list. Sort
9642 the offsets into order, verifying that they are adjacent, and
9643 check that the register numbers are ascending. */
9644 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
9645 check_regs
? unsorted_regs
: NULL
))
9649 memcpy (saved_order
, order
, sizeof order
);
9655 for (i
= 0; i
< nops
; i
++)
9656 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
9658 *load_offset
= unsorted_offsets
[order
[0]];
9662 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
9665 if (unsorted_offsets
[order
[0]] == 0)
9666 ldm_case
= 1; /* ldmia */
9667 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
9668 ldm_case
= 2; /* ldmib */
9669 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
9670 ldm_case
= 3; /* ldmda */
9671 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
9672 ldm_case
= 4; /* ldmdb */
9673 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
9674 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
9679 if (!multiple_operation_profitable_p (false, nops
,
9681 ? unsorted_offsets
[order
[0]] : 0))
9687 /* Used to determine in a peephole whether a sequence of store instructions can
9688 be changed into a store-multiple instruction.
9689 NOPS is the number of separate store instructions we are examining.
9690 NOPS_TOTAL is the total number of instructions recognized by the peephole
9692 The first NOPS entries in OPERANDS are the source registers, the next
9693 NOPS entries are memory operands. If this function is successful, *BASE is
9694 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9695 to the first memory location's offset from that base register. REGS is an
9696 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9697 likewise filled with the corresponding rtx's.
9698 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9699 numbers to an ascending order of stores.
9700 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9701 from ascending memory locations, and the function verifies that the register
9702 numbers are themselves ascending. If CHECK_REGS is false, the register
9703 numbers are stored in the order they are found in the operands. */
9705 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
9706 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
9707 HOST_WIDE_INT
*load_offset
, bool check_regs
)
9709 int unsorted_regs
[MAX_LDM_STM_OPS
];
9710 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
9711 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
9712 int order
[MAX_LDM_STM_OPS
];
9714 rtx base_reg_rtx
= NULL
;
9717 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9718 easily extended if required. */
9719 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
9721 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
9723 /* Loop over the operands and check that the memory references are
9724 suitable (i.e. immediate offsets from the same base register). At
9725 the same time, extract the target register, and the memory
9727 for (i
= 0; i
< nops
; i
++)
9732 /* Convert a subreg of a mem into the mem itself. */
9733 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9734 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9736 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9738 /* Don't reorder volatile memory references; it doesn't seem worth
9739 looking for the case where the order is ok anyway. */
9740 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9743 offset
= const0_rtx
;
9745 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9746 || (GET_CODE (reg
) == SUBREG
9747 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9748 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9749 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9751 || (GET_CODE (reg
) == SUBREG
9752 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9753 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9756 unsorted_reg_rtxs
[i
] = (GET_CODE (operands
[i
]) == REG
9757 ? operands
[i
] : SUBREG_REG (operands
[i
]));
9758 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
9762 base_reg
= REGNO (reg
);
9764 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
9767 else if (base_reg
!= (int) REGNO (reg
))
9768 /* Not addressed from the same base register. */
9771 /* If it isn't an integer register, then we can't do this. */
9772 if (unsorted_regs
[i
] < 0
9773 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
9774 || (TARGET_THUMB2
&& unsorted_regs
[i
] == base_reg
)
9775 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
9776 || unsorted_regs
[i
] > 14)
9779 unsorted_offsets
[i
] = INTVAL (offset
);
9780 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
9784 /* Not a suitable memory address. */
9788 /* All the useful information has now been extracted from the
9789 operands into unsorted_regs and unsorted_offsets; additionally,
9790 order[0] has been set to the lowest offset in the list. Sort
9791 the offsets into order, verifying that they are adjacent, and
9792 check that the register numbers are ascending. */
9793 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
9794 check_regs
? unsorted_regs
: NULL
))
9798 memcpy (saved_order
, order
, sizeof order
);
9804 for (i
= 0; i
< nops
; i
++)
9806 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
9808 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
9811 *load_offset
= unsorted_offsets
[order
[0]];
9815 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
9818 if (unsorted_offsets
[order
[0]] == 0)
9819 stm_case
= 1; /* stmia */
9820 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
9821 stm_case
= 2; /* stmib */
9822 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
9823 stm_case
= 3; /* stmda */
9824 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
9825 stm_case
= 4; /* stmdb */
9829 if (!multiple_operation_profitable_p (false, nops
, 0))
9835 /* Routines for use in generating RTL. */
9837 /* Generate a load-multiple instruction. COUNT is the number of loads in
9838 the instruction; REGS and MEMS are arrays containing the operands.
9839 BASEREG is the base register to be used in addressing the memory operands.
9840 WBACK_OFFSET is nonzero if the instruction should update the base
9844 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
9845 HOST_WIDE_INT wback_offset
)
9850 if (!multiple_operation_profitable_p (false, count
, 0))
9856 for (i
= 0; i
< count
; i
++)
9857 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
9859 if (wback_offset
!= 0)
9860 emit_move_insn (basereg
, plus_constant (basereg
, wback_offset
));
9868 result
= gen_rtx_PARALLEL (VOIDmode
,
9869 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
9870 if (wback_offset
!= 0)
9872 XVECEXP (result
, 0, 0)
9873 = gen_rtx_SET (VOIDmode
, basereg
,
9874 plus_constant (basereg
, wback_offset
));
9879 for (j
= 0; i
< count
; i
++, j
++)
9880 XVECEXP (result
, 0, i
)
9881 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
9886 /* Generate a store-multiple instruction. COUNT is the number of stores in
9887 the instruction; REGS and MEMS are arrays containing the operands.
9888 BASEREG is the base register to be used in addressing the memory operands.
9889 WBACK_OFFSET is nonzero if the instruction should update the base
9893 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
9894 HOST_WIDE_INT wback_offset
)
9899 if (GET_CODE (basereg
) == PLUS
)
9900 basereg
= XEXP (basereg
, 0);
9902 if (!multiple_operation_profitable_p (false, count
, 0))
9908 for (i
= 0; i
< count
; i
++)
9909 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
9911 if (wback_offset
!= 0)
9912 emit_move_insn (basereg
, plus_constant (basereg
, wback_offset
));
9920 result
= gen_rtx_PARALLEL (VOIDmode
,
9921 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
9922 if (wback_offset
!= 0)
9924 XVECEXP (result
, 0, 0)
9925 = gen_rtx_SET (VOIDmode
, basereg
,
9926 plus_constant (basereg
, wback_offset
));
9931 for (j
= 0; i
< count
; i
++, j
++)
9932 XVECEXP (result
, 0, i
)
9933 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
9938 /* Generate either a load-multiple or a store-multiple instruction. This
9939 function can be used in situations where we can start with a single MEM
9940 rtx and adjust its address upwards.
9941 COUNT is the number of operations in the instruction, not counting a
9942 possible update of the base register. REGS is an array containing the
9944 BASEREG is the base register to be used in addressing the memory operands,
9945 which are constructed from BASEMEM.
9946 WRITE_BACK specifies whether the generated instruction should include an
9947 update of the base register.
9948 OFFSETP is used to pass an offset to and from this function; this offset
9949 is not used when constructing the address (instead BASEMEM should have an
9950 appropriate offset in its address), it is used only for setting
9951 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9954 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
9955 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
9957 rtx mems
[MAX_LDM_STM_OPS
];
9958 HOST_WIDE_INT offset
= *offsetp
;
9961 gcc_assert (count
<= MAX_LDM_STM_OPS
);
9963 if (GET_CODE (basereg
) == PLUS
)
9964 basereg
= XEXP (basereg
, 0);
9966 for (i
= 0; i
< count
; i
++)
9968 rtx addr
= plus_constant (basereg
, i
* 4);
9969 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
9977 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
9978 write_back
? 4 * count
: 0);
9980 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
9981 write_back
? 4 * count
: 0);
9985 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
9986 rtx basemem
, HOST_WIDE_INT
*offsetp
)
9988 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
9993 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
9994 rtx basemem
, HOST_WIDE_INT
*offsetp
)
9996 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
10000 /* Called from a peephole2 expander to turn a sequence of loads into an
10001 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10002 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10003 is true if we can reorder the registers because they are used commutatively
10005 Returns true iff we could generate a new instruction. */
10008 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
10010 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10011 rtx mems
[MAX_LDM_STM_OPS
];
10012 int i
, j
, base_reg
;
10014 HOST_WIDE_INT offset
;
10015 int write_back
= FALSE
;
10019 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
10020 &base_reg
, &offset
, !sort_regs
);
10026 for (i
= 0; i
< nops
- 1; i
++)
10027 for (j
= i
+ 1; j
< nops
; j
++)
10028 if (regs
[i
] > regs
[j
])
10034 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10038 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
10039 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
10045 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
10046 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
10048 if (!TARGET_THUMB1
)
10050 base_reg
= regs
[0];
10051 base_reg_rtx
= newbase
;
10055 for (i
= 0; i
< nops
; i
++)
10057 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10058 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10061 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10062 write_back
? offset
+ i
* 4 : 0));
10066 /* Called from a peephole2 expander to turn a sequence of stores into an
10067 STM instruction. OPERANDS are the operands found by the peephole matcher;
10068 NOPS indicates how many separate stores we are trying to combine.
10069 Returns true iff we could generate a new instruction. */
10072 gen_stm_seq (rtx
*operands
, int nops
)
10075 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10076 rtx mems
[MAX_LDM_STM_OPS
];
10079 HOST_WIDE_INT offset
;
10080 int write_back
= FALSE
;
10083 bool base_reg_dies
;
10085 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
10086 mem_order
, &base_reg
, &offset
, true);
10091 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10093 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
10096 gcc_assert (base_reg_dies
);
10102 gcc_assert (base_reg_dies
);
10103 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10107 addr
= plus_constant (base_reg_rtx
, offset
);
10109 for (i
= 0; i
< nops
; i
++)
10111 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10112 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10115 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10116 write_back
? offset
+ i
* 4 : 0));
10120 /* Called from a peephole2 expander to turn a sequence of stores that are
10121 preceded by constant loads into an STM instruction. OPERANDS are the
10122 operands found by the peephole matcher; NOPS indicates how many
10123 separate stores we are trying to combine; there are 2 * NOPS
10124 instructions in the peephole.
10125 Returns true iff we could generate a new instruction. */
10128 gen_const_stm_seq (rtx
*operands
, int nops
)
10130 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
10131 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10132 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
10133 rtx mems
[MAX_LDM_STM_OPS
];
10136 HOST_WIDE_INT offset
;
10137 int write_back
= FALSE
;
10140 bool base_reg_dies
;
10142 HARD_REG_SET allocated
;
10144 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
10145 mem_order
, &base_reg
, &offset
, false);
10150 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
10152 /* If the same register is used more than once, try to find a free
10154 CLEAR_HARD_REG_SET (allocated
);
10155 for (i
= 0; i
< nops
; i
++)
10157 for (j
= i
+ 1; j
< nops
; j
++)
10158 if (regs
[i
] == regs
[j
])
10160 rtx t
= peep2_find_free_register (0, nops
* 2,
10161 TARGET_THUMB1
? "l" : "r",
10162 SImode
, &allocated
);
10166 regs
[i
] = REGNO (t
);
10170 /* Compute an ordering that maps the register numbers to an ascending
10173 for (i
= 0; i
< nops
; i
++)
10174 if (regs
[i
] < regs
[reg_order
[0]])
10177 for (i
= 1; i
< nops
; i
++)
10179 int this_order
= reg_order
[i
- 1];
10180 for (j
= 0; j
< nops
; j
++)
10181 if (regs
[j
] > regs
[reg_order
[i
- 1]]
10182 && (this_order
== reg_order
[i
- 1]
10183 || regs
[j
] < regs
[this_order
]))
10185 reg_order
[i
] = this_order
;
10188 /* Ensure that registers that must be live after the instruction end
10189 up with the correct value. */
10190 for (i
= 0; i
< nops
; i
++)
10192 int this_order
= reg_order
[i
];
10193 if ((this_order
!= mem_order
[i
]
10194 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
10195 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
10199 /* Load the constants. */
10200 for (i
= 0; i
< nops
; i
++)
10202 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
10203 sorted_regs
[i
] = regs
[reg_order
[i
]];
10204 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
10207 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10209 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
10212 gcc_assert (base_reg_dies
);
10218 gcc_assert (base_reg_dies
);
10219 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10223 addr
= plus_constant (base_reg_rtx
, offset
);
10225 for (i
= 0; i
< nops
; i
++)
10227 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10228 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10231 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
10232 write_back
? offset
+ i
* 4 : 0));
10237 arm_gen_movmemqi (rtx
*operands
)
10239 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
10240 HOST_WIDE_INT srcoffset
, dstoffset
;
10242 rtx src
, dst
, srcbase
, dstbase
;
10243 rtx part_bytes_reg
= NULL
;
10246 if (GET_CODE (operands
[2]) != CONST_INT
10247 || GET_CODE (operands
[3]) != CONST_INT
10248 || INTVAL (operands
[2]) > 64
10249 || INTVAL (operands
[3]) & 3)
10252 dstbase
= operands
[0];
10253 srcbase
= operands
[1];
10255 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
10256 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
10258 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
10259 out_words_to_go
= INTVAL (operands
[2]) / 4;
10260 last_bytes
= INTVAL (operands
[2]) & 3;
10261 dstoffset
= srcoffset
= 0;
10263 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
10264 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
10266 for (i
= 0; in_words_to_go
>= 2; i
+=4)
10268 if (in_words_to_go
> 4)
10269 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
10270 TRUE
, srcbase
, &srcoffset
));
10272 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
10273 src
, FALSE
, srcbase
,
10276 if (out_words_to_go
)
10278 if (out_words_to_go
> 4)
10279 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
10280 TRUE
, dstbase
, &dstoffset
));
10281 else if (out_words_to_go
!= 1)
10282 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
10283 out_words_to_go
, dst
,
10286 dstbase
, &dstoffset
));
10289 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
10290 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
10291 if (last_bytes
!= 0)
10293 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
10299 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
10300 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
10303 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10304 if (out_words_to_go
)
10308 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
10309 sreg
= copy_to_reg (mem
);
10311 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
10312 emit_move_insn (mem
, sreg
);
10315 gcc_assert (!in_words_to_go
); /* Sanity check */
10318 if (in_words_to_go
)
10320 gcc_assert (in_words_to_go
> 0);
10322 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
10323 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
10326 gcc_assert (!last_bytes
|| part_bytes_reg
);
10328 if (BYTES_BIG_ENDIAN
&& last_bytes
)
10330 rtx tmp
= gen_reg_rtx (SImode
);
10332 /* The bytes we want are in the top end of the word. */
10333 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
10334 GEN_INT (8 * (4 - last_bytes
))));
10335 part_bytes_reg
= tmp
;
10339 mem
= adjust_automodify_address (dstbase
, QImode
,
10340 plus_constant (dst
, last_bytes
- 1),
10341 dstoffset
+ last_bytes
- 1);
10342 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
10346 tmp
= gen_reg_rtx (SImode
);
10347 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
10348 part_bytes_reg
= tmp
;
10355 if (last_bytes
> 1)
10357 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
10358 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
10362 rtx tmp
= gen_reg_rtx (SImode
);
10363 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
10364 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
10365 part_bytes_reg
= tmp
;
10372 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
10373 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
10380 /* Select a dominance comparison mode if possible for a test of the general
10381 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10382 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10383 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10384 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10385 In all cases OP will be either EQ or NE, but we don't need to know which
10386 here. If we are unable to support a dominance comparison we return
10387 CC mode. This will then fail to match for the RTL expressions that
10388 generate this call. */
10390 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
10392 enum rtx_code cond1
, cond2
;
10395 /* Currently we will probably get the wrong result if the individual
10396 comparisons are not simple. This also ensures that it is safe to
10397 reverse a comparison if necessary. */
10398 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
10400 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
10404 /* The if_then_else variant of this tests the second condition if the
10405 first passes, but is true if the first fails. Reverse the first
10406 condition to get a true "inclusive-or" expression. */
10407 if (cond_or
== DOM_CC_NX_OR_Y
)
10408 cond1
= reverse_condition (cond1
);
10410 /* If the comparisons are not equal, and one doesn't dominate the other,
10411 then we can't do this. */
10413 && !comparison_dominates_p (cond1
, cond2
)
10414 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
10419 enum rtx_code temp
= cond1
;
10427 if (cond_or
== DOM_CC_X_AND_Y
)
10432 case EQ
: return CC_DEQmode
;
10433 case LE
: return CC_DLEmode
;
10434 case LEU
: return CC_DLEUmode
;
10435 case GE
: return CC_DGEmode
;
10436 case GEU
: return CC_DGEUmode
;
10437 default: gcc_unreachable ();
10441 if (cond_or
== DOM_CC_X_AND_Y
)
10453 gcc_unreachable ();
10457 if (cond_or
== DOM_CC_X_AND_Y
)
10469 gcc_unreachable ();
10473 if (cond_or
== DOM_CC_X_AND_Y
)
10474 return CC_DLTUmode
;
10479 return CC_DLTUmode
;
10481 return CC_DLEUmode
;
10485 gcc_unreachable ();
10489 if (cond_or
== DOM_CC_X_AND_Y
)
10490 return CC_DGTUmode
;
10495 return CC_DGTUmode
;
10497 return CC_DGEUmode
;
10501 gcc_unreachable ();
10504 /* The remaining cases only occur when both comparisons are the
10507 gcc_assert (cond1
== cond2
);
10511 gcc_assert (cond1
== cond2
);
10515 gcc_assert (cond1
== cond2
);
10519 gcc_assert (cond1
== cond2
);
10520 return CC_DLEUmode
;
10523 gcc_assert (cond1
== cond2
);
10524 return CC_DGEUmode
;
10527 gcc_unreachable ();
10532 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
10534 /* All floating point compares return CCFP if it is an equality
10535 comparison, and CCFPE otherwise. */
10536 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
10556 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10561 gcc_unreachable ();
10565 /* A compare with a shifted operand. Because of canonicalization, the
10566 comparison will have to be swapped when we emit the assembler. */
10567 if (GET_MODE (y
) == SImode
10568 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
10569 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
10570 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
10571 || GET_CODE (x
) == ROTATERT
))
10574 /* This operation is performed swapped, but since we only rely on the Z
10575 flag we don't need an additional mode. */
10576 if (GET_MODE (y
) == SImode
10577 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
10578 && GET_CODE (x
) == NEG
10579 && (op
== EQ
|| op
== NE
))
10582 /* This is a special case that is used by combine to allow a
10583 comparison of a shifted byte load to be split into a zero-extend
10584 followed by a comparison of the shifted integer (only valid for
10585 equalities and unsigned inequalities). */
10586 if (GET_MODE (x
) == SImode
10587 && GET_CODE (x
) == ASHIFT
10588 && GET_CODE (XEXP (x
, 1)) == CONST_INT
&& INTVAL (XEXP (x
, 1)) == 24
10589 && GET_CODE (XEXP (x
, 0)) == SUBREG
10590 && GET_CODE (SUBREG_REG (XEXP (x
, 0))) == MEM
10591 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
10592 && (op
== EQ
|| op
== NE
10593 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
10594 && GET_CODE (y
) == CONST_INT
)
10597 /* A construct for a conditional compare, if the false arm contains
10598 0, then both conditions must be true, otherwise either condition
10599 must be true. Not all conditions are possible, so CCmode is
10600 returned if it can't be done. */
10601 if (GET_CODE (x
) == IF_THEN_ELSE
10602 && (XEXP (x
, 2) == const0_rtx
10603 || XEXP (x
, 2) == const1_rtx
)
10604 && COMPARISON_P (XEXP (x
, 0))
10605 && COMPARISON_P (XEXP (x
, 1)))
10606 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10607 INTVAL (XEXP (x
, 2)));
10609 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10610 if (GET_CODE (x
) == AND
10611 && (op
== EQ
|| op
== NE
)
10612 && COMPARISON_P (XEXP (x
, 0))
10613 && COMPARISON_P (XEXP (x
, 1)))
10614 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10617 if (GET_CODE (x
) == IOR
10618 && (op
== EQ
|| op
== NE
)
10619 && COMPARISON_P (XEXP (x
, 0))
10620 && COMPARISON_P (XEXP (x
, 1)))
10621 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10624 /* An operation (on Thumb) where we want to test for a single bit.
10625 This is done by shifting that bit up into the top bit of a
10626 scratch register; we can then branch on the sign bit. */
10628 && GET_MODE (x
) == SImode
10629 && (op
== EQ
|| op
== NE
)
10630 && GET_CODE (x
) == ZERO_EXTRACT
10631 && XEXP (x
, 1) == const1_rtx
)
10634 /* An operation that sets the condition codes as a side-effect, the
10635 V flag is not set correctly, so we can only use comparisons where
10636 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10638 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10639 if (GET_MODE (x
) == SImode
10641 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
10642 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
10643 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
10644 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
10645 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
10646 || GET_CODE (x
) == LSHIFTRT
10647 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
10648 || GET_CODE (x
) == ROTATERT
10649 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
10650 return CC_NOOVmode
;
10652 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
10655 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
10656 && GET_CODE (x
) == PLUS
10657 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
10660 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
10662 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10664 if (TARGET_ARM
&& TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10671 /* A DImode comparison against zero can be implemented by
10672 or'ing the two halves together. */
10673 if (y
== const0_rtx
)
10676 /* We can do an equality test in three Thumb instructions. */
10686 /* DImode unsigned comparisons can be implemented by cmp +
10687 cmpeq without a scratch register. Not worth doing in
10698 /* DImode signed and unsigned comparisons can be implemented
10699 by cmp + sbcs with a scratch register, but that does not
10700 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10701 gcc_assert (op
!= EQ
&& op
!= NE
);
10705 gcc_unreachable ();
10712 /* X and Y are two things to compare using CODE. Emit the compare insn and
10713 return the rtx for register 0 in the proper mode. FP means this is a
10714 floating point compare: I don't think that it is needed on the arm. */
10716 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
)
10718 enum machine_mode mode
;
10720 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
10722 /* We might have X as a constant, Y as a register because of the predicates
10723 used for cmpdi. If so, force X to a register here. */
10724 if (dimode_comparison
&& !REG_P (x
))
10725 x
= force_reg (DImode
, x
);
10727 mode
= SELECT_CC_MODE (code
, x
, y
);
10728 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
10730 if (dimode_comparison
10731 && !(TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10732 && mode
!= CC_CZmode
)
10736 /* To compare two non-zero values for equality, XOR them and
10737 then compare against zero. Not used for ARM mode; there
10738 CC_CZmode is cheaper. */
10739 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
10741 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
10744 /* A scratch register is required. */
10745 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (SImode
));
10746 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
10747 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
10750 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
10755 /* Generate a sequence of insns that will generate the correct return
10756 address mask depending on the physical architecture that the program
10759 arm_gen_return_addr_mask (void)
10761 rtx reg
= gen_reg_rtx (Pmode
);
10763 emit_insn (gen_return_addr_mask (reg
));
10768 arm_reload_in_hi (rtx
*operands
)
10770 rtx ref
= operands
[1];
10772 HOST_WIDE_INT offset
= 0;
10774 if (GET_CODE (ref
) == SUBREG
)
10776 offset
= SUBREG_BYTE (ref
);
10777 ref
= SUBREG_REG (ref
);
10780 if (GET_CODE (ref
) == REG
)
10782 /* We have a pseudo which has been spilt onto the stack; there
10783 are two cases here: the first where there is a simple
10784 stack-slot replacement and a second where the stack-slot is
10785 out of range, or is used as a subreg. */
10786 if (reg_equiv_mem (REGNO (ref
)))
10788 ref
= reg_equiv_mem (REGNO (ref
));
10789 base
= find_replacement (&XEXP (ref
, 0));
10792 /* The slot is out of range, or was dressed up in a SUBREG. */
10793 base
= reg_equiv_address (REGNO (ref
));
10796 base
= find_replacement (&XEXP (ref
, 0));
10798 /* Handle the case where the address is too complex to be offset by 1. */
10799 if (GET_CODE (base
) == MINUS
10800 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10802 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10804 emit_set_insn (base_plus
, base
);
10807 else if (GET_CODE (base
) == PLUS
)
10809 /* The addend must be CONST_INT, or we would have dealt with it above. */
10810 HOST_WIDE_INT hi
, lo
;
10812 offset
+= INTVAL (XEXP (base
, 1));
10813 base
= XEXP (base
, 0);
10815 /* Rework the address into a legal sequence of insns. */
10816 /* Valid range for lo is -4095 -> 4095 */
10819 : -((-offset
) & 0xfff));
10821 /* Corner case, if lo is the max offset then we would be out of range
10822 once we have added the additional 1 below, so bump the msb into the
10823 pre-loading insn(s). */
10827 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
10828 ^ (HOST_WIDE_INT
) 0x80000000)
10829 - (HOST_WIDE_INT
) 0x80000000);
10831 gcc_assert (hi
+ lo
== offset
);
10835 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10837 /* Get the base address; addsi3 knows how to handle constants
10838 that require more than one insn. */
10839 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
10845 /* Operands[2] may overlap operands[0] (though it won't overlap
10846 operands[1]), that's why we asked for a DImode reg -- so we can
10847 use the bit that does not overlap. */
10848 if (REGNO (operands
[2]) == REGNO (operands
[0]))
10849 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10851 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10853 emit_insn (gen_zero_extendqisi2 (scratch
,
10854 gen_rtx_MEM (QImode
,
10855 plus_constant (base
,
10857 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10858 gen_rtx_MEM (QImode
,
10859 plus_constant (base
,
10861 if (!BYTES_BIG_ENDIAN
)
10862 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10863 gen_rtx_IOR (SImode
,
10866 gen_rtx_SUBREG (SImode
, operands
[0], 0),
10870 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10871 gen_rtx_IOR (SImode
,
10872 gen_rtx_ASHIFT (SImode
, scratch
,
10874 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
10877 /* Handle storing a half-word to memory during reload by synthesizing as two
10878 byte stores. Take care not to clobber the input values until after we
10879 have moved them somewhere safe. This code assumes that if the DImode
10880 scratch in operands[2] overlaps either the input value or output address
10881 in some way, then that value must die in this insn (we absolutely need
10882 two scratch registers for some corner cases). */
10884 arm_reload_out_hi (rtx
*operands
)
10886 rtx ref
= operands
[0];
10887 rtx outval
= operands
[1];
10889 HOST_WIDE_INT offset
= 0;
10891 if (GET_CODE (ref
) == SUBREG
)
10893 offset
= SUBREG_BYTE (ref
);
10894 ref
= SUBREG_REG (ref
);
10897 if (GET_CODE (ref
) == REG
)
10899 /* We have a pseudo which has been spilt onto the stack; there
10900 are two cases here: the first where there is a simple
10901 stack-slot replacement and a second where the stack-slot is
10902 out of range, or is used as a subreg. */
10903 if (reg_equiv_mem (REGNO (ref
)))
10905 ref
= reg_equiv_mem (REGNO (ref
));
10906 base
= find_replacement (&XEXP (ref
, 0));
10909 /* The slot is out of range, or was dressed up in a SUBREG. */
10910 base
= reg_equiv_address (REGNO (ref
));
10913 base
= find_replacement (&XEXP (ref
, 0));
10915 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10917 /* Handle the case where the address is too complex to be offset by 1. */
10918 if (GET_CODE (base
) == MINUS
10919 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10921 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10923 /* Be careful not to destroy OUTVAL. */
10924 if (reg_overlap_mentioned_p (base_plus
, outval
))
10926 /* Updating base_plus might destroy outval, see if we can
10927 swap the scratch and base_plus. */
10928 if (!reg_overlap_mentioned_p (scratch
, outval
))
10931 scratch
= base_plus
;
10936 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
10938 /* Be conservative and copy OUTVAL into the scratch now,
10939 this should only be necessary if outval is a subreg
10940 of something larger than a word. */
10941 /* XXX Might this clobber base? I can't see how it can,
10942 since scratch is known to overlap with OUTVAL, and
10943 must be wider than a word. */
10944 emit_insn (gen_movhi (scratch_hi
, outval
));
10945 outval
= scratch_hi
;
10949 emit_set_insn (base_plus
, base
);
10952 else if (GET_CODE (base
) == PLUS
)
10954 /* The addend must be CONST_INT, or we would have dealt with it above. */
10955 HOST_WIDE_INT hi
, lo
;
10957 offset
+= INTVAL (XEXP (base
, 1));
10958 base
= XEXP (base
, 0);
10960 /* Rework the address into a legal sequence of insns. */
10961 /* Valid range for lo is -4095 -> 4095 */
10964 : -((-offset
) & 0xfff));
10966 /* Corner case, if lo is the max offset then we would be out of range
10967 once we have added the additional 1 below, so bump the msb into the
10968 pre-loading insn(s). */
10972 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
10973 ^ (HOST_WIDE_INT
) 0x80000000)
10974 - (HOST_WIDE_INT
) 0x80000000);
10976 gcc_assert (hi
+ lo
== offset
);
10980 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10982 /* Be careful not to destroy OUTVAL. */
10983 if (reg_overlap_mentioned_p (base_plus
, outval
))
10985 /* Updating base_plus might destroy outval, see if we
10986 can swap the scratch and base_plus. */
10987 if (!reg_overlap_mentioned_p (scratch
, outval
))
10990 scratch
= base_plus
;
10995 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
10997 /* Be conservative and copy outval into scratch now,
10998 this should only be necessary if outval is a
10999 subreg of something larger than a word. */
11000 /* XXX Might this clobber base? I can't see how it
11001 can, since scratch is known to overlap with
11003 emit_insn (gen_movhi (scratch_hi
, outval
));
11004 outval
= scratch_hi
;
11008 /* Get the base address; addsi3 knows how to handle constants
11009 that require more than one insn. */
11010 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
11016 if (BYTES_BIG_ENDIAN
)
11018 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
11019 plus_constant (base
, offset
+ 1)),
11020 gen_lowpart (QImode
, outval
)));
11021 emit_insn (gen_lshrsi3 (scratch
,
11022 gen_rtx_SUBREG (SImode
, outval
, 0),
11024 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
11025 gen_lowpart (QImode
, scratch
)));
11029 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
11030 gen_lowpart (QImode
, outval
)));
11031 emit_insn (gen_lshrsi3 (scratch
,
11032 gen_rtx_SUBREG (SImode
, outval
, 0),
11034 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
11035 plus_constant (base
, offset
+ 1)),
11036 gen_lowpart (QImode
, scratch
)));
11040 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11041 (padded to the size of a word) should be passed in a register. */
11044 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
11046 if (TARGET_AAPCS_BASED
)
11047 return must_pass_in_stack_var_size (mode
, type
);
11049 return must_pass_in_stack_var_size_or_pad (mode
, type
);
11053 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11054 Return true if an argument passed on the stack should be padded upwards,
11055 i.e. if the least-significant byte has useful data.
11056 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11057 aggregate types are placed in the lowest memory address. */
11060 arm_pad_arg_upward (enum machine_mode mode
, const_tree type
)
11062 if (!TARGET_AAPCS_BASED
)
11063 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
11065 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
11072 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11073 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11074 byte of the register has useful data, and return the opposite if the
11075 most significant byte does.
11076 For AAPCS, small aggregates and small complex types are always padded
11080 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
,
11081 tree type
, int first ATTRIBUTE_UNUSED
)
11083 if (TARGET_AAPCS_BASED
11084 && BYTES_BIG_ENDIAN
11085 && (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
)
11086 && int_size_in_bytes (type
) <= 4)
11089 /* Otherwise, use default padding. */
11090 return !BYTES_BIG_ENDIAN
;
11094 /* Print a symbolic form of X to the debug file, F. */
11096 arm_print_value (FILE *f
, rtx x
)
11098 switch (GET_CODE (x
))
11101 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
11105 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
11113 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
11115 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
11116 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
11124 fprintf (f
, "\"%s\"", XSTR (x
, 0));
11128 fprintf (f
, "`%s'", XSTR (x
, 0));
11132 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
11136 arm_print_value (f
, XEXP (x
, 0));
11140 arm_print_value (f
, XEXP (x
, 0));
11142 arm_print_value (f
, XEXP (x
, 1));
11150 fprintf (f
, "????");
11155 /* Routines for manipulation of the constant pool. */
11157 /* Arm instructions cannot load a large constant directly into a
11158 register; they have to come from a pc relative load. The constant
11159 must therefore be placed in the addressable range of the pc
11160 relative load. Depending on the precise pc relative load
11161 instruction the range is somewhere between 256 bytes and 4k. This
11162 means that we often have to dump a constant inside a function, and
11163 generate code to branch around it.
11165 It is important to minimize this, since the branches will slow
11166 things down and make the code larger.
11168 Normally we can hide the table after an existing unconditional
11169 branch so that there is no interruption of the flow, but in the
11170 worst case the code looks like this:
11188 We fix this by performing a scan after scheduling, which notices
11189 which instructions need to have their operands fetched from the
11190 constant table and builds the table.
11192 The algorithm starts by building a table of all the constants that
11193 need fixing up and all the natural barriers in the function (places
11194 where a constant table can be dropped without breaking the flow).
11195 For each fixup we note how far the pc-relative replacement will be
11196 able to reach and the offset of the instruction into the function.
11198 Having built the table we then group the fixes together to form
11199 tables that are as large as possible (subject to addressing
11200 constraints) and emit each table of constants after the last
11201 barrier that is within range of all the instructions in the group.
11202 If a group does not contain a barrier, then we forcibly create one
11203 by inserting a jump instruction into the flow. Once the table has
11204 been inserted, the insns are then modified to reference the
11205 relevant entry in the pool.
11207 Possible enhancements to the algorithm (not implemented) are:
11209 1) For some processors and object formats, there may be benefit in
11210 aligning the pools to the start of cache lines; this alignment
11211 would need to be taken into account when calculating addressability
11214 /* These typedefs are located at the start of this file, so that
11215 they can be used in the prototypes there. This comment is to
11216 remind readers of that fact so that the following structures
11217 can be understood more easily.
11219 typedef struct minipool_node Mnode;
11220 typedef struct minipool_fixup Mfix; */
11222 struct minipool_node
11224 /* Doubly linked chain of entries. */
11227 /* The maximum offset into the code that this entry can be placed. While
11228 pushing fixes for forward references, all entries are sorted in order
11229 of increasing max_address. */
11230 HOST_WIDE_INT max_address
;
11231 /* Similarly for an entry inserted for a backwards ref. */
11232 HOST_WIDE_INT min_address
;
11233 /* The number of fixes referencing this entry. This can become zero
11234 if we "unpush" an entry. In this case we ignore the entry when we
11235 come to emit the code. */
11237 /* The offset from the start of the minipool. */
11238 HOST_WIDE_INT offset
;
11239 /* The value in table. */
11241 /* The mode of value. */
11242 enum machine_mode mode
;
11243 /* The size of the value. With iWMMXt enabled
11244 sizes > 4 also imply an alignment of 8-bytes. */
11248 struct minipool_fixup
11252 HOST_WIDE_INT address
;
11254 enum machine_mode mode
;
11258 HOST_WIDE_INT forwards
;
11259 HOST_WIDE_INT backwards
;
11262 /* Fixes less than a word need padding out to a word boundary. */
11263 #define MINIPOOL_FIX_SIZE(mode) \
11264 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11266 static Mnode
* minipool_vector_head
;
11267 static Mnode
* minipool_vector_tail
;
11268 static rtx minipool_vector_label
;
11269 static int minipool_pad
;
11271 /* The linked list of all minipool fixes required for this function. */
11272 Mfix
* minipool_fix_head
;
11273 Mfix
* minipool_fix_tail
;
11274 /* The fix entry for the current minipool, once it has been placed. */
11275 Mfix
* minipool_barrier
;
11277 /* Determines if INSN is the start of a jump table. Returns the end
11278 of the TABLE or NULL_RTX. */
11280 is_jump_table (rtx insn
)
11284 if (GET_CODE (insn
) == JUMP_INSN
11285 && JUMP_LABEL (insn
) != NULL
11286 && ((table
= next_real_insn (JUMP_LABEL (insn
)))
11287 == next_real_insn (insn
))
11289 && GET_CODE (table
) == JUMP_INSN
11290 && (GET_CODE (PATTERN (table
)) == ADDR_VEC
11291 || GET_CODE (PATTERN (table
)) == ADDR_DIFF_VEC
))
11297 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11298 #define JUMP_TABLES_IN_TEXT_SECTION 0
11301 static HOST_WIDE_INT
11302 get_jump_table_size (rtx insn
)
11304 /* ADDR_VECs only take room if read-only data does into the text
11306 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
11308 rtx body
= PATTERN (insn
);
11309 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
11310 HOST_WIDE_INT size
;
11311 HOST_WIDE_INT modesize
;
11313 modesize
= GET_MODE_SIZE (GET_MODE (body
));
11314 size
= modesize
* XVECLEN (body
, elt
);
11318 /* Round up size of TBB table to a halfword boundary. */
11319 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
11322 /* No padding necessary for TBH. */
11325 /* Add two bytes for alignment on Thumb. */
11330 gcc_unreachable ();
11338 /* Move a minipool fix MP from its current location to before MAX_MP.
11339 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11340 constraints may need updating. */
11342 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
11343 HOST_WIDE_INT max_address
)
11345 /* The code below assumes these are different. */
11346 gcc_assert (mp
!= max_mp
);
11348 if (max_mp
== NULL
)
11350 if (max_address
< mp
->max_address
)
11351 mp
->max_address
= max_address
;
11355 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
11356 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
11358 mp
->max_address
= max_address
;
11360 /* Unlink MP from its current position. Since max_mp is non-null,
11361 mp->prev must be non-null. */
11362 mp
->prev
->next
= mp
->next
;
11363 if (mp
->next
!= NULL
)
11364 mp
->next
->prev
= mp
->prev
;
11366 minipool_vector_tail
= mp
->prev
;
11368 /* Re-insert it before MAX_MP. */
11370 mp
->prev
= max_mp
->prev
;
11373 if (mp
->prev
!= NULL
)
11374 mp
->prev
->next
= mp
;
11376 minipool_vector_head
= mp
;
11379 /* Save the new entry. */
11382 /* Scan over the preceding entries and adjust their addresses as
11384 while (mp
->prev
!= NULL
11385 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
11387 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
11394 /* Add a constant to the minipool for a forward reference. Returns the
11395 node added or NULL if the constant will not fit in this pool. */
11397 add_minipool_forward_ref (Mfix
*fix
)
11399 /* If set, max_mp is the first pool_entry that has a lower
11400 constraint than the one we are trying to add. */
11401 Mnode
* max_mp
= NULL
;
11402 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
11405 /* If the minipool starts before the end of FIX->INSN then this FIX
11406 can not be placed into the current pool. Furthermore, adding the
11407 new constant pool entry may cause the pool to start FIX_SIZE bytes
11409 if (minipool_vector_head
&&
11410 (fix
->address
+ get_attr_length (fix
->insn
)
11411 >= minipool_vector_head
->max_address
- fix
->fix_size
))
11414 /* Scan the pool to see if a constant with the same value has
11415 already been added. While we are doing this, also note the
11416 location where we must insert the constant if it doesn't already
11418 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11420 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
11421 && fix
->mode
== mp
->mode
11422 && (GET_CODE (fix
->value
) != CODE_LABEL
11423 || (CODE_LABEL_NUMBER (fix
->value
)
11424 == CODE_LABEL_NUMBER (mp
->value
)))
11425 && rtx_equal_p (fix
->value
, mp
->value
))
11427 /* More than one fix references this entry. */
11429 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
11432 /* Note the insertion point if necessary. */
11434 && mp
->max_address
> max_address
)
11437 /* If we are inserting an 8-bytes aligned quantity and
11438 we have not already found an insertion point, then
11439 make sure that all such 8-byte aligned quantities are
11440 placed at the start of the pool. */
11441 if (ARM_DOUBLEWORD_ALIGN
11443 && fix
->fix_size
>= 8
11444 && mp
->fix_size
< 8)
11447 max_address
= mp
->max_address
;
11451 /* The value is not currently in the minipool, so we need to create
11452 a new entry for it. If MAX_MP is NULL, the entry will be put on
11453 the end of the list since the placement is less constrained than
11454 any existing entry. Otherwise, we insert the new fix before
11455 MAX_MP and, if necessary, adjust the constraints on the other
11458 mp
->fix_size
= fix
->fix_size
;
11459 mp
->mode
= fix
->mode
;
11460 mp
->value
= fix
->value
;
11462 /* Not yet required for a backwards ref. */
11463 mp
->min_address
= -65536;
11465 if (max_mp
== NULL
)
11467 mp
->max_address
= max_address
;
11469 mp
->prev
= minipool_vector_tail
;
11471 if (mp
->prev
== NULL
)
11473 minipool_vector_head
= mp
;
11474 minipool_vector_label
= gen_label_rtx ();
11477 mp
->prev
->next
= mp
;
11479 minipool_vector_tail
= mp
;
11483 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
11484 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
11486 mp
->max_address
= max_address
;
11489 mp
->prev
= max_mp
->prev
;
11491 if (mp
->prev
!= NULL
)
11492 mp
->prev
->next
= mp
;
11494 minipool_vector_head
= mp
;
11497 /* Save the new entry. */
11500 /* Scan over the preceding entries and adjust their addresses as
11502 while (mp
->prev
!= NULL
11503 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
11505 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
11513 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
11514 HOST_WIDE_INT min_address
)
11516 HOST_WIDE_INT offset
;
11518 /* The code below assumes these are different. */
11519 gcc_assert (mp
!= min_mp
);
11521 if (min_mp
== NULL
)
11523 if (min_address
> mp
->min_address
)
11524 mp
->min_address
= min_address
;
11528 /* We will adjust this below if it is too loose. */
11529 mp
->min_address
= min_address
;
11531 /* Unlink MP from its current position. Since min_mp is non-null,
11532 mp->next must be non-null. */
11533 mp
->next
->prev
= mp
->prev
;
11534 if (mp
->prev
!= NULL
)
11535 mp
->prev
->next
= mp
->next
;
11537 minipool_vector_head
= mp
->next
;
11539 /* Reinsert it after MIN_MP. */
11541 mp
->next
= min_mp
->next
;
11543 if (mp
->next
!= NULL
)
11544 mp
->next
->prev
= mp
;
11546 minipool_vector_tail
= mp
;
11552 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11554 mp
->offset
= offset
;
11555 if (mp
->refcount
> 0)
11556 offset
+= mp
->fix_size
;
11558 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
11559 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
11565 /* Add a constant to the minipool for a backward reference. Returns the
11566 node added or NULL if the constant will not fit in this pool.
11568 Note that the code for insertion for a backwards reference can be
11569 somewhat confusing because the calculated offsets for each fix do
11570 not take into account the size of the pool (which is still under
11573 add_minipool_backward_ref (Mfix
*fix
)
11575 /* If set, min_mp is the last pool_entry that has a lower constraint
11576 than the one we are trying to add. */
11577 Mnode
*min_mp
= NULL
;
11578 /* This can be negative, since it is only a constraint. */
11579 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
11582 /* If we can't reach the current pool from this insn, or if we can't
11583 insert this entry at the end of the pool without pushing other
11584 fixes out of range, then we don't try. This ensures that we
11585 can't fail later on. */
11586 if (min_address
>= minipool_barrier
->address
11587 || (minipool_vector_tail
->min_address
+ fix
->fix_size
11588 >= minipool_barrier
->address
))
11591 /* Scan the pool to see if a constant with the same value has
11592 already been added. While we are doing this, also note the
11593 location where we must insert the constant if it doesn't already
11595 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
11597 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
11598 && fix
->mode
== mp
->mode
11599 && (GET_CODE (fix
->value
) != CODE_LABEL
11600 || (CODE_LABEL_NUMBER (fix
->value
)
11601 == CODE_LABEL_NUMBER (mp
->value
)))
11602 && rtx_equal_p (fix
->value
, mp
->value
)
11603 /* Check that there is enough slack to move this entry to the
11604 end of the table (this is conservative). */
11605 && (mp
->max_address
11606 > (minipool_barrier
->address
11607 + minipool_vector_tail
->offset
11608 + minipool_vector_tail
->fix_size
)))
11611 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
11614 if (min_mp
!= NULL
)
11615 mp
->min_address
+= fix
->fix_size
;
11618 /* Note the insertion point if necessary. */
11619 if (mp
->min_address
< min_address
)
11621 /* For now, we do not allow the insertion of 8-byte alignment
11622 requiring nodes anywhere but at the start of the pool. */
11623 if (ARM_DOUBLEWORD_ALIGN
11624 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
11629 else if (mp
->max_address
11630 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
11632 /* Inserting before this entry would push the fix beyond
11633 its maximum address (which can happen if we have
11634 re-located a forwards fix); force the new fix to come
11636 if (ARM_DOUBLEWORD_ALIGN
11637 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
11642 min_address
= mp
->min_address
+ fix
->fix_size
;
11645 /* Do not insert a non-8-byte aligned quantity before 8-byte
11646 aligned quantities. */
11647 else if (ARM_DOUBLEWORD_ALIGN
11648 && fix
->fix_size
< 8
11649 && mp
->fix_size
>= 8)
11652 min_address
= mp
->min_address
+ fix
->fix_size
;
11657 /* We need to create a new entry. */
11659 mp
->fix_size
= fix
->fix_size
;
11660 mp
->mode
= fix
->mode
;
11661 mp
->value
= fix
->value
;
11663 mp
->max_address
= minipool_barrier
->address
+ 65536;
11665 mp
->min_address
= min_address
;
11667 if (min_mp
== NULL
)
11670 mp
->next
= minipool_vector_head
;
11672 if (mp
->next
== NULL
)
11674 minipool_vector_tail
= mp
;
11675 minipool_vector_label
= gen_label_rtx ();
11678 mp
->next
->prev
= mp
;
11680 minipool_vector_head
= mp
;
11684 mp
->next
= min_mp
->next
;
11688 if (mp
->next
!= NULL
)
11689 mp
->next
->prev
= mp
;
11691 minipool_vector_tail
= mp
;
11694 /* Save the new entry. */
11702 /* Scan over the following entries and adjust their offsets. */
11703 while (mp
->next
!= NULL
)
11705 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
11706 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
11709 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
11711 mp
->next
->offset
= mp
->offset
;
11720 assign_minipool_offsets (Mfix
*barrier
)
11722 HOST_WIDE_INT offset
= 0;
11725 minipool_barrier
= barrier
;
11727 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11729 mp
->offset
= offset
;
11731 if (mp
->refcount
> 0)
11732 offset
+= mp
->fix_size
;
11736 /* Output the literal table */
11738 dump_minipool (rtx scan
)
11744 if (ARM_DOUBLEWORD_ALIGN
)
11745 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11746 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
11753 fprintf (dump_file
,
11754 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11755 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
11757 scan
= emit_label_after (gen_label_rtx (), scan
);
11758 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
11759 scan
= emit_label_after (minipool_vector_label
, scan
);
11761 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
11763 if (mp
->refcount
> 0)
11767 fprintf (dump_file
,
11768 ";; Offset %u, min %ld, max %ld ",
11769 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
11770 (unsigned long) mp
->max_address
);
11771 arm_print_value (dump_file
, mp
->value
);
11772 fputc ('\n', dump_file
);
11775 switch (mp
->fix_size
)
11777 #ifdef HAVE_consttable_1
11779 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
11783 #ifdef HAVE_consttable_2
11785 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
11789 #ifdef HAVE_consttable_4
11791 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
11795 #ifdef HAVE_consttable_8
11797 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
11801 #ifdef HAVE_consttable_16
11803 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
11808 gcc_unreachable ();
11816 minipool_vector_head
= minipool_vector_tail
= NULL
;
11817 scan
= emit_insn_after (gen_consttable_end (), scan
);
11818 scan
= emit_barrier_after (scan
);
11821 /* Return the cost of forcibly inserting a barrier after INSN. */
11823 arm_barrier_cost (rtx insn
)
11825 /* Basing the location of the pool on the loop depth is preferable,
11826 but at the moment, the basic block information seems to be
11827 corrupt by this stage of the compilation. */
11828 int base_cost
= 50;
11829 rtx next
= next_nonnote_insn (insn
);
11831 if (next
!= NULL
&& GET_CODE (next
) == CODE_LABEL
)
11834 switch (GET_CODE (insn
))
11837 /* It will always be better to place the table before the label, rather
11846 return base_cost
- 10;
11849 return base_cost
+ 10;
11853 /* Find the best place in the insn stream in the range
11854 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11855 Create the barrier by inserting a jump and add a new fix entry for
11858 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
11860 HOST_WIDE_INT count
= 0;
11862 rtx from
= fix
->insn
;
11863 /* The instruction after which we will insert the jump. */
11864 rtx selected
= NULL
;
11866 /* The address at which the jump instruction will be placed. */
11867 HOST_WIDE_INT selected_address
;
11869 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
11870 rtx label
= gen_label_rtx ();
11872 selected_cost
= arm_barrier_cost (from
);
11873 selected_address
= fix
->address
;
11875 while (from
&& count
< max_count
)
11880 /* This code shouldn't have been called if there was a natural barrier
11882 gcc_assert (GET_CODE (from
) != BARRIER
);
11884 /* Count the length of this insn. */
11885 count
+= get_attr_length (from
);
11887 /* If there is a jump table, add its length. */
11888 tmp
= is_jump_table (from
);
11891 count
+= get_jump_table_size (tmp
);
11893 /* Jump tables aren't in a basic block, so base the cost on
11894 the dispatch insn. If we select this location, we will
11895 still put the pool after the table. */
11896 new_cost
= arm_barrier_cost (from
);
11898 if (count
< max_count
11899 && (!selected
|| new_cost
<= selected_cost
))
11902 selected_cost
= new_cost
;
11903 selected_address
= fix
->address
+ count
;
11906 /* Continue after the dispatch table. */
11907 from
= NEXT_INSN (tmp
);
11911 new_cost
= arm_barrier_cost (from
);
11913 if (count
< max_count
11914 && (!selected
|| new_cost
<= selected_cost
))
11917 selected_cost
= new_cost
;
11918 selected_address
= fix
->address
+ count
;
11921 from
= NEXT_INSN (from
);
11924 /* Make sure that we found a place to insert the jump. */
11925 gcc_assert (selected
);
11927 /* Make sure we do not split a call and its corresponding
11928 CALL_ARG_LOCATION note. */
11929 if (CALL_P (selected
))
11931 rtx next
= NEXT_INSN (selected
);
11932 if (next
&& NOTE_P (next
)
11933 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
11937 /* Create a new JUMP_INSN that branches around a barrier. */
11938 from
= emit_jump_insn_after (gen_jump (label
), selected
);
11939 JUMP_LABEL (from
) = label
;
11940 barrier
= emit_barrier_after (from
);
11941 emit_label_after (label
, barrier
);
11943 /* Create a minipool barrier entry for the new barrier. */
11944 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
11945 new_fix
->insn
= barrier
;
11946 new_fix
->address
= selected_address
;
11947 new_fix
->next
= fix
->next
;
11948 fix
->next
= new_fix
;
11953 /* Record that there is a natural barrier in the insn stream at
11956 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
11958 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
11961 fix
->address
= address
;
11964 if (minipool_fix_head
!= NULL
)
11965 minipool_fix_tail
->next
= fix
;
11967 minipool_fix_head
= fix
;
11969 minipool_fix_tail
= fix
;
11972 /* Record INSN, which will need fixing up to load a value from the
11973 minipool. ADDRESS is the offset of the insn since the start of the
11974 function; LOC is a pointer to the part of the insn which requires
11975 fixing; VALUE is the constant that must be loaded, which is of type
11978 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
11979 enum machine_mode mode
, rtx value
)
11981 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
11984 fix
->address
= address
;
11987 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
11988 fix
->value
= value
;
11989 fix
->forwards
= get_attr_pool_range (insn
);
11990 fix
->backwards
= get_attr_neg_pool_range (insn
);
11991 fix
->minipool
= NULL
;
11993 /* If an insn doesn't have a range defined for it, then it isn't
11994 expecting to be reworked by this code. Better to stop now than
11995 to generate duff assembly code. */
11996 gcc_assert (fix
->forwards
|| fix
->backwards
);
11998 /* If an entry requires 8-byte alignment then assume all constant pools
11999 require 4 bytes of padding. Trying to do this later on a per-pool
12000 basis is awkward because existing pool entries have to be modified. */
12001 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
12006 fprintf (dump_file
,
12007 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12008 GET_MODE_NAME (mode
),
12009 INSN_UID (insn
), (unsigned long) address
,
12010 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
12011 arm_print_value (dump_file
, fix
->value
);
12012 fprintf (dump_file
, "\n");
12015 /* Add it to the chain of fixes. */
12018 if (minipool_fix_head
!= NULL
)
12019 minipool_fix_tail
->next
= fix
;
12021 minipool_fix_head
= fix
;
12023 minipool_fix_tail
= fix
;
12026 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12027 Returns the number of insns needed, or 99 if we don't know how to
12030 arm_const_double_inline_cost (rtx val
)
12032 rtx lowpart
, highpart
;
12033 enum machine_mode mode
;
12035 mode
= GET_MODE (val
);
12037 if (mode
== VOIDmode
)
12040 gcc_assert (GET_MODE_SIZE (mode
) == 8);
12042 lowpart
= gen_lowpart (SImode
, val
);
12043 highpart
= gen_highpart_mode (SImode
, mode
, val
);
12045 gcc_assert (GET_CODE (lowpart
) == CONST_INT
);
12046 gcc_assert (GET_CODE (highpart
) == CONST_INT
);
12048 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
12049 NULL_RTX
, NULL_RTX
, 0, 0)
12050 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
12051 NULL_RTX
, NULL_RTX
, 0, 0));
12054 /* Return true if it is worthwhile to split a 64-bit constant into two
12055 32-bit operations. This is the case if optimizing for size, or
12056 if we have load delay slots, or if one 32-bit part can be done with
12057 a single data operation. */
12059 arm_const_double_by_parts (rtx val
)
12061 enum machine_mode mode
= GET_MODE (val
);
12064 if (optimize_size
|| arm_ld_sched
)
12067 if (mode
== VOIDmode
)
12070 part
= gen_highpart_mode (SImode
, mode
, val
);
12072 gcc_assert (GET_CODE (part
) == CONST_INT
);
12074 if (const_ok_for_arm (INTVAL (part
))
12075 || const_ok_for_arm (~INTVAL (part
)))
12078 part
= gen_lowpart (SImode
, val
);
12080 gcc_assert (GET_CODE (part
) == CONST_INT
);
12082 if (const_ok_for_arm (INTVAL (part
))
12083 || const_ok_for_arm (~INTVAL (part
)))
12089 /* Return true if it is possible to inline both the high and low parts
12090 of a 64-bit constant into 32-bit data processing instructions. */
12092 arm_const_double_by_immediates (rtx val
)
12094 enum machine_mode mode
= GET_MODE (val
);
12097 if (mode
== VOIDmode
)
12100 part
= gen_highpart_mode (SImode
, mode
, val
);
12102 gcc_assert (GET_CODE (part
) == CONST_INT
);
12104 if (!const_ok_for_arm (INTVAL (part
)))
12107 part
= gen_lowpart (SImode
, val
);
12109 gcc_assert (GET_CODE (part
) == CONST_INT
);
12111 if (!const_ok_for_arm (INTVAL (part
)))
12117 /* Scan INSN and note any of its operands that need fixing.
12118 If DO_PUSHES is false we do not actually push any of the fixups
12119 needed. The function returns TRUE if any fixups were needed/pushed.
12120 This is used by arm_memory_load_p() which needs to know about loads
12121 of constants that will be converted into minipool loads. */
12123 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
12125 bool result
= false;
12128 extract_insn (insn
);
12130 if (!constrain_operands (1))
12131 fatal_insn_not_found (insn
);
12133 if (recog_data
.n_alternatives
== 0)
12136 /* Fill in recog_op_alt with information about the constraints of
12138 preprocess_constraints ();
12140 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12142 /* Things we need to fix can only occur in inputs. */
12143 if (recog_data
.operand_type
[opno
] != OP_IN
)
12146 /* If this alternative is a memory reference, then any mention
12147 of constants in this alternative is really to fool reload
12148 into allowing us to accept one there. We need to fix them up
12149 now so that we output the right code. */
12150 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
12152 rtx op
= recog_data
.operand
[opno
];
12154 if (CONSTANT_P (op
))
12157 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
12158 recog_data
.operand_mode
[opno
], op
);
12161 else if (GET_CODE (op
) == MEM
12162 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
12163 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
12167 rtx cop
= avoid_constant_pool_reference (op
);
12169 /* Casting the address of something to a mode narrower
12170 than a word can cause avoid_constant_pool_reference()
12171 to return the pool reference itself. That's no good to
12172 us here. Lets just hope that we can use the
12173 constant pool value directly. */
12175 cop
= get_pool_constant (XEXP (op
, 0));
12177 push_minipool_fix (insn
, address
,
12178 recog_data
.operand_loc
[opno
],
12179 recog_data
.operand_mode
[opno
], cop
);
12190 /* Convert instructions to their cc-clobbering variant if possible, since
12191 that allows us to use smaller encodings. */
12194 thumb2_reorg (void)
12199 INIT_REG_SET (&live
);
12201 /* We are freeing block_for_insn in the toplev to keep compatibility
12202 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12203 compute_bb_for_insn ();
12210 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
12211 df_simulate_initialize_backwards (bb
, &live
);
12212 FOR_BB_INSNS_REVERSE (bb
, insn
)
12214 if (NONJUMP_INSN_P (insn
)
12215 && !REGNO_REG_SET_P (&live
, CC_REGNUM
))
12217 rtx pat
= PATTERN (insn
);
12218 if (GET_CODE (pat
) == SET
12219 && low_register_operand (XEXP (pat
, 0), SImode
)
12220 && thumb_16bit_operator (XEXP (pat
, 1), SImode
)
12221 && low_register_operand (XEXP (XEXP (pat
, 1), 0), SImode
)
12222 && low_register_operand (XEXP (XEXP (pat
, 1), 1), SImode
))
12224 rtx dst
= XEXP (pat
, 0);
12225 rtx src
= XEXP (pat
, 1);
12226 rtx op0
= XEXP (src
, 0);
12227 rtx op1
= (GET_RTX_CLASS (GET_CODE (src
)) == RTX_COMM_ARITH
12228 ? XEXP (src
, 1) : NULL
);
12230 if (rtx_equal_p (dst
, op0
)
12231 || GET_CODE (src
) == PLUS
|| GET_CODE (src
) == MINUS
)
12233 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12234 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
12235 rtvec vec
= gen_rtvec (2, pat
, clobber
);
12237 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
12238 INSN_CODE (insn
) = -1;
12240 /* We can also handle a commutative operation where the
12241 second operand matches the destination. */
12242 else if (op1
&& rtx_equal_p (dst
, op1
))
12244 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12245 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
12248 src
= copy_rtx (src
);
12249 XEXP (src
, 0) = op1
;
12250 XEXP (src
, 1) = op0
;
12251 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
12252 vec
= gen_rtvec (2, pat
, clobber
);
12253 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
12254 INSN_CODE (insn
) = -1;
12259 if (NONDEBUG_INSN_P (insn
))
12260 df_simulate_one_insn_backwards (bb
, insn
, &live
);
12264 CLEAR_REG_SET (&live
);
12267 /* Gcc puts the pool in the wrong place for ARM, since we can only
12268 load addresses a limited distance around the pc. We do some
12269 special munging to move the constant pool values to the correct
12270 point in the code. */
12275 HOST_WIDE_INT address
= 0;
12281 minipool_fix_head
= minipool_fix_tail
= NULL
;
12283 /* The first insn must always be a note, or the code below won't
12284 scan it properly. */
12285 insn
= get_insns ();
12286 gcc_assert (GET_CODE (insn
) == NOTE
);
12289 /* Scan all the insns and record the operands that will need fixing. */
12290 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
12292 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12293 && (arm_cirrus_insn_p (insn
)
12294 || GET_CODE (insn
) == JUMP_INSN
12295 || arm_memory_load_p (insn
)))
12296 cirrus_reorg (insn
);
12298 if (GET_CODE (insn
) == BARRIER
)
12299 push_minipool_barrier (insn
, address
);
12300 else if (INSN_P (insn
))
12304 note_invalid_constants (insn
, address
, true);
12305 address
+= get_attr_length (insn
);
12307 /* If the insn is a vector jump, add the size of the table
12308 and skip the table. */
12309 if ((table
= is_jump_table (insn
)) != NULL
)
12311 address
+= get_jump_table_size (table
);
12317 fix
= minipool_fix_head
;
12319 /* Now scan the fixups and perform the required changes. */
12324 Mfix
* last_added_fix
;
12325 Mfix
* last_barrier
= NULL
;
12328 /* Skip any further barriers before the next fix. */
12329 while (fix
&& GET_CODE (fix
->insn
) == BARRIER
)
12332 /* No more fixes. */
12336 last_added_fix
= NULL
;
12338 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
12340 if (GET_CODE (ftmp
->insn
) == BARRIER
)
12342 if (ftmp
->address
>= minipool_vector_head
->max_address
)
12345 last_barrier
= ftmp
;
12347 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
12350 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
12353 /* If we found a barrier, drop back to that; any fixes that we
12354 could have reached but come after the barrier will now go in
12355 the next mini-pool. */
12356 if (last_barrier
!= NULL
)
12358 /* Reduce the refcount for those fixes that won't go into this
12360 for (fdel
= last_barrier
->next
;
12361 fdel
&& fdel
!= ftmp
;
12364 fdel
->minipool
->refcount
--;
12365 fdel
->minipool
= NULL
;
12368 ftmp
= last_barrier
;
12372 /* ftmp is first fix that we can't fit into this pool and
12373 there no natural barriers that we could use. Insert a
12374 new barrier in the code somewhere between the previous
12375 fix and this one, and arrange to jump around it. */
12376 HOST_WIDE_INT max_address
;
12378 /* The last item on the list of fixes must be a barrier, so
12379 we can never run off the end of the list of fixes without
12380 last_barrier being set. */
12383 max_address
= minipool_vector_head
->max_address
;
12384 /* Check that there isn't another fix that is in range that
12385 we couldn't fit into this pool because the pool was
12386 already too large: we need to put the pool before such an
12387 instruction. The pool itself may come just after the
12388 fix because create_fix_barrier also allows space for a
12389 jump instruction. */
12390 if (ftmp
->address
< max_address
)
12391 max_address
= ftmp
->address
+ 1;
12393 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
12396 assign_minipool_offsets (last_barrier
);
12400 if (GET_CODE (ftmp
->insn
) != BARRIER
12401 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
12408 /* Scan over the fixes we have identified for this pool, fixing them
12409 up and adding the constants to the pool itself. */
12410 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
12411 this_fix
= this_fix
->next
)
12412 if (GET_CODE (this_fix
->insn
) != BARRIER
)
12415 = plus_constant (gen_rtx_LABEL_REF (VOIDmode
,
12416 minipool_vector_label
),
12417 this_fix
->minipool
->offset
);
12418 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
12421 dump_minipool (last_barrier
->insn
);
12425 /* From now on we must synthesize any constants that we can't handle
12426 directly. This can happen if the RTL gets split during final
12427 instruction generation. */
12428 after_arm_reorg
= 1;
12430 /* Free the minipool memory. */
12431 obstack_free (&minipool_obstack
, minipool_startobj
);
12434 /* Routines to output assembly language. */
12436 /* If the rtx is the correct value then return the string of the number.
12437 In this way we can ensure that valid double constants are generated even
12438 when cross compiling. */
12440 fp_immediate_constant (rtx x
)
12445 if (!fp_consts_inited
)
12448 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12449 for (i
= 0; i
< 8; i
++)
12450 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
12451 return strings_fp
[i
];
12453 gcc_unreachable ();
12456 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12457 static const char *
12458 fp_const_from_val (REAL_VALUE_TYPE
*r
)
12462 if (!fp_consts_inited
)
12465 for (i
= 0; i
< 8; i
++)
12466 if (REAL_VALUES_EQUAL (*r
, values_fp
[i
]))
12467 return strings_fp
[i
];
12469 gcc_unreachable ();
12472 /* Output the operands of a LDM/STM instruction to STREAM.
12473 MASK is the ARM register set mask of which only bits 0-15 are important.
12474 REG is the base register, either the frame pointer or the stack pointer,
12475 INSTR is the possibly suffixed load or store instruction.
12476 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12479 print_multi_reg (FILE *stream
, const char *instr
, unsigned reg
,
12480 unsigned long mask
, int rfe
)
12483 bool not_first
= FALSE
;
12485 gcc_assert (!rfe
|| (mask
& (1 << PC_REGNUM
)));
12486 fputc ('\t', stream
);
12487 asm_fprintf (stream
, instr
, reg
);
12488 fputc ('{', stream
);
12490 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
12491 if (mask
& (1 << i
))
12494 fprintf (stream
, ", ");
12496 asm_fprintf (stream
, "%r", i
);
12501 fprintf (stream
, "}^\n");
12503 fprintf (stream
, "}\n");
12507 /* Output a FLDMD instruction to STREAM.
12508 BASE if the register containing the address.
12509 REG and COUNT specify the register range.
12510 Extra registers may be added to avoid hardware bugs.
12512 We output FLDMD even for ARMv5 VFP implementations. Although
12513 FLDMD is technically not supported until ARMv6, it is believed
12514 that all VFP implementations support its use in this context. */
12517 vfp_output_fldmd (FILE * stream
, unsigned int base
, int reg
, int count
)
12521 /* Workaround ARM10 VFPr1 bug. */
12522 if (count
== 2 && !arm_arch6
)
12529 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12530 load into multiple parts if we have to handle more than 16 registers. */
12533 vfp_output_fldmd (stream
, base
, reg
, 16);
12534 vfp_output_fldmd (stream
, base
, reg
+ 16, count
- 16);
12538 fputc ('\t', stream
);
12539 asm_fprintf (stream
, "fldmfdd\t%r!, {", base
);
12541 for (i
= reg
; i
< reg
+ count
; i
++)
12544 fputs (", ", stream
);
12545 asm_fprintf (stream
, "d%d", i
);
12547 fputs ("}\n", stream
);
12552 /* Output the assembly for a store multiple. */
12555 vfp_output_fstmd (rtx
* operands
)
12562 strcpy (pattern
, "fstmfdd\t%m0!, {%P1");
12563 p
= strlen (pattern
);
12565 gcc_assert (GET_CODE (operands
[1]) == REG
);
12567 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
12568 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
12570 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
12572 strcpy (&pattern
[p
], "}");
12574 output_asm_insn (pattern
, operands
);
12579 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12580 number of bytes pushed. */
12583 vfp_emit_fstmd (int base_reg
, int count
)
12590 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12591 register pairs are stored by a store multiple insn. We avoid this
12592 by pushing an extra pair. */
12593 if (count
== 2 && !arm_arch6
)
12595 if (base_reg
== LAST_VFP_REGNUM
- 3)
12600 /* FSTMD may not store more than 16 doubleword registers at once. Split
12601 larger stores into multiple parts (up to a maximum of two, in
12606 /* NOTE: base_reg is an internal register number, so each D register
12608 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
12609 saved
+= vfp_emit_fstmd (base_reg
, 16);
12613 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
12614 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
12616 reg
= gen_rtx_REG (DFmode
, base_reg
);
12619 XVECEXP (par
, 0, 0)
12620 = gen_rtx_SET (VOIDmode
,
12623 gen_rtx_PRE_MODIFY (Pmode
,
12626 (stack_pointer_rtx
,
12629 gen_rtx_UNSPEC (BLKmode
,
12630 gen_rtvec (1, reg
),
12631 UNSPEC_PUSH_MULT
));
12633 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
12634 plus_constant (stack_pointer_rtx
, -(count
* 8)));
12635 RTX_FRAME_RELATED_P (tmp
) = 1;
12636 XVECEXP (dwarf
, 0, 0) = tmp
;
12638 tmp
= gen_rtx_SET (VOIDmode
,
12639 gen_frame_mem (DFmode
, stack_pointer_rtx
),
12641 RTX_FRAME_RELATED_P (tmp
) = 1;
12642 XVECEXP (dwarf
, 0, 1) = tmp
;
12644 for (i
= 1; i
< count
; i
++)
12646 reg
= gen_rtx_REG (DFmode
, base_reg
);
12648 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
12650 tmp
= gen_rtx_SET (VOIDmode
,
12651 gen_frame_mem (DFmode
,
12652 plus_constant (stack_pointer_rtx
,
12655 RTX_FRAME_RELATED_P (tmp
) = 1;
12656 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
12659 par
= emit_insn (par
);
12660 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
12661 RTX_FRAME_RELATED_P (par
) = 1;
12666 /* Emit a call instruction with pattern PAT. ADDR is the address of
12667 the call target. */
12670 arm_emit_call_insn (rtx pat
, rtx addr
)
12674 insn
= emit_call_insn (pat
);
12676 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12677 If the call might use such an entry, add a use of the PIC register
12678 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12679 if (TARGET_VXWORKS_RTP
12681 && GET_CODE (addr
) == SYMBOL_REF
12682 && (SYMBOL_REF_DECL (addr
)
12683 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
12684 : !SYMBOL_REF_LOCAL_P (addr
)))
12686 require_pic_register ();
12687 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
12691 /* Output a 'call' insn. */
12693 output_call (rtx
*operands
)
12695 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
12697 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12698 if (REGNO (operands
[0]) == LR_REGNUM
)
12700 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
12701 output_asm_insn ("mov%?\t%0, %|lr", operands
);
12704 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12706 if (TARGET_INTERWORK
|| arm_arch4t
)
12707 output_asm_insn ("bx%?\t%0", operands
);
12709 output_asm_insn ("mov%?\t%|pc, %0", operands
);
12714 /* Output a 'call' insn that is a reference in memory. This is
12715 disabled for ARMv5 and we prefer a blx instead because otherwise
12716 there's a significant performance overhead. */
12718 output_call_mem (rtx
*operands
)
12720 gcc_assert (!arm_arch5
);
12721 if (TARGET_INTERWORK
)
12723 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
12724 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12725 output_asm_insn ("bx%?\t%|ip", operands
);
12727 else if (regno_use_in (LR_REGNUM
, operands
[0]))
12729 /* LR is used in the memory address. We load the address in the
12730 first instruction. It's safe to use IP as the target of the
12731 load since the call will kill it anyway. */
12732 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
12733 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12735 output_asm_insn ("bx%?\t%|ip", operands
);
12737 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
12741 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12742 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
12749 /* Output a move from arm registers to an fpa registers.
12750 OPERANDS[0] is an fpa register.
12751 OPERANDS[1] is the first registers of an arm register pair. */
12753 output_mov_long_double_fpa_from_arm (rtx
*operands
)
12755 int arm_reg0
= REGNO (operands
[1]);
12758 gcc_assert (arm_reg0
!= IP_REGNUM
);
12760 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12761 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12762 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
12764 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
12765 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands
);
12770 /* Output a move from an fpa register to arm registers.
12771 OPERANDS[0] is the first registers of an arm register pair.
12772 OPERANDS[1] is an fpa register. */
12774 output_mov_long_double_arm_from_fpa (rtx
*operands
)
12776 int arm_reg0
= REGNO (operands
[0]);
12779 gcc_assert (arm_reg0
!= IP_REGNUM
);
12781 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12782 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12783 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
12785 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands
);
12786 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
12790 /* Output a move from arm registers to arm registers of a long double
12791 OPERANDS[0] is the destination.
12792 OPERANDS[1] is the source. */
12794 output_mov_long_double_arm_from_arm (rtx
*operands
)
12796 /* We have to be careful here because the two might overlap. */
12797 int dest_start
= REGNO (operands
[0]);
12798 int src_start
= REGNO (operands
[1]);
12802 if (dest_start
< src_start
)
12804 for (i
= 0; i
< 3; i
++)
12806 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
12807 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
12808 output_asm_insn ("mov%?\t%0, %1", ops
);
12813 for (i
= 2; i
>= 0; i
--)
12815 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
12816 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
12817 output_asm_insn ("mov%?\t%0, %1", ops
);
12825 arm_emit_movpair (rtx dest
, rtx src
)
12827 /* If the src is an immediate, simplify it. */
12828 if (CONST_INT_P (src
))
12830 HOST_WIDE_INT val
= INTVAL (src
);
12831 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
12832 if ((val
>> 16) & 0x0000ffff)
12833 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
12835 GEN_INT ((val
>> 16) & 0x0000ffff));
12838 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
12839 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
12842 /* Output a move from arm registers to an fpa registers.
12843 OPERANDS[0] is an fpa register.
12844 OPERANDS[1] is the first registers of an arm register pair. */
12846 output_mov_double_fpa_from_arm (rtx
*operands
)
12848 int arm_reg0
= REGNO (operands
[1]);
12851 gcc_assert (arm_reg0
!= IP_REGNUM
);
12853 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12854 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12855 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops
);
12856 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands
);
12860 /* Output a move from an fpa register to arm registers.
12861 OPERANDS[0] is the first registers of an arm register pair.
12862 OPERANDS[1] is an fpa register. */
12864 output_mov_double_arm_from_fpa (rtx
*operands
)
12866 int arm_reg0
= REGNO (operands
[0]);
12869 gcc_assert (arm_reg0
!= IP_REGNUM
);
12871 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12872 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12873 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands
);
12874 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops
);
12878 /* Output a move between double words. It must be REG<-MEM
12881 output_move_double (rtx
*operands
)
12883 enum rtx_code code0
= GET_CODE (operands
[0]);
12884 enum rtx_code code1
= GET_CODE (operands
[1]);
12889 unsigned int reg0
= REGNO (operands
[0]);
12891 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
12893 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
12895 switch (GET_CODE (XEXP (operands
[1], 0)))
12899 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
12900 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
12902 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
12906 gcc_assert (TARGET_LDRD
);
12907 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
12912 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
12914 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
12919 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
12921 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
12925 gcc_assert (TARGET_LDRD
);
12926 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
12931 /* Autoicrement addressing modes should never have overlapping
12932 base and destination registers, and overlapping index registers
12933 are already prohibited, so this doesn't need to worry about
12935 otherops
[0] = operands
[0];
12936 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
12937 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
12939 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
12941 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
12943 /* Registers overlap so split out the increment. */
12944 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
12945 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
12949 /* Use a single insn if we can.
12950 FIXME: IWMMXT allows offsets larger than ldrd can
12951 handle, fix these up with a pair of ldr. */
12953 || GET_CODE (otherops
[2]) != CONST_INT
12954 || (INTVAL (otherops
[2]) > -256
12955 && INTVAL (otherops
[2]) < 256))
12956 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
12959 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
12960 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12966 /* Use a single insn if we can.
12967 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12968 fix these up with a pair of ldr. */
12970 || GET_CODE (otherops
[2]) != CONST_INT
12971 || (INTVAL (otherops
[2]) > -256
12972 && INTVAL (otherops
[2]) < 256))
12973 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
12976 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12977 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
12984 /* We might be able to use ldrd %0, %1 here. However the range is
12985 different to ldr/adr, and it is broken on some ARMv7-M
12986 implementations. */
12987 /* Use the second register of the pair to avoid problematic
12989 otherops
[1] = operands
[1];
12990 output_asm_insn ("adr%?\t%0, %1", otherops
);
12991 operands
[1] = otherops
[0];
12993 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
12995 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
12998 /* ??? This needs checking for thumb2. */
13000 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
13001 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
13003 otherops
[0] = operands
[0];
13004 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
13005 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
13007 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
13009 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
13011 switch ((int) INTVAL (otherops
[2]))
13014 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
13019 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
13024 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
13028 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
13029 operands
[1] = otherops
[0];
13031 && (GET_CODE (otherops
[2]) == REG
13033 || (GET_CODE (otherops
[2]) == CONST_INT
13034 && INTVAL (otherops
[2]) > -256
13035 && INTVAL (otherops
[2]) < 256)))
13037 if (reg_overlap_mentioned_p (operands
[0],
13041 /* Swap base and index registers over to
13042 avoid a conflict. */
13044 otherops
[1] = otherops
[2];
13047 /* If both registers conflict, it will usually
13048 have been fixed by a splitter. */
13049 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
13050 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
13052 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13053 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
13057 otherops
[0] = operands
[0];
13058 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
13063 if (GET_CODE (otherops
[2]) == CONST_INT
)
13065 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
13066 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
13068 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13071 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13074 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
13077 return "ldr%(d%)\t%0, [%1]";
13079 return "ldm%(ia%)\t%1, %M0";
13083 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
13084 /* Take care of overlapping base/data reg. */
13085 if (reg_mentioned_p (operands
[0], operands
[1]))
13087 output_asm_insn ("ldr%?\t%0, %1", otherops
);
13088 output_asm_insn ("ldr%?\t%0, %1", operands
);
13092 output_asm_insn ("ldr%?\t%0, %1", operands
);
13093 output_asm_insn ("ldr%?\t%0, %1", otherops
);
13100 /* Constraints should ensure this. */
13101 gcc_assert (code0
== MEM
&& code1
== REG
);
13102 gcc_assert (REGNO (operands
[1]) != IP_REGNUM
);
13104 switch (GET_CODE (XEXP (operands
[0], 0)))
13108 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
13110 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
13114 gcc_assert (TARGET_LDRD
);
13115 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
13120 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
13122 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
13127 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
13129 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
13133 gcc_assert (TARGET_LDRD
);
13134 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
13139 otherops
[0] = operands
[1];
13140 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
13141 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
13143 /* IWMMXT allows offsets larger than ldrd can handle,
13144 fix these up with a pair of ldr. */
13146 && GET_CODE (otherops
[2]) == CONST_INT
13147 && (INTVAL(otherops
[2]) <= -256
13148 || INTVAL(otherops
[2]) >= 256))
13150 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
13152 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
13153 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
13157 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
13158 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
13161 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
13162 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
13164 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
13168 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
13169 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
13171 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
13174 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
13180 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
13186 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
13191 && (GET_CODE (otherops
[2]) == REG
13193 || (GET_CODE (otherops
[2]) == CONST_INT
13194 && INTVAL (otherops
[2]) > -256
13195 && INTVAL (otherops
[2]) < 256)))
13197 otherops
[0] = operands
[1];
13198 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
13199 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
13205 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
13206 otherops
[1] = operands
[1];
13207 output_asm_insn ("str%?\t%1, %0", operands
);
13208 output_asm_insn ("str%?\t%H1, %0", otherops
);
13215 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13216 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13219 output_move_quad (rtx
*operands
)
13221 if (REG_P (operands
[0]))
13223 /* Load, or reg->reg move. */
13225 if (MEM_P (operands
[1]))
13227 switch (GET_CODE (XEXP (operands
[1], 0)))
13230 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
13235 output_asm_insn ("adr%?\t%0, %1", operands
);
13236 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
13240 gcc_unreachable ();
13248 gcc_assert (REG_P (operands
[1]));
13250 dest
= REGNO (operands
[0]);
13251 src
= REGNO (operands
[1]);
13253 /* This seems pretty dumb, but hopefully GCC won't try to do it
13256 for (i
= 0; i
< 4; i
++)
13258 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
13259 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
13260 output_asm_insn ("mov%?\t%0, %1", ops
);
13263 for (i
= 3; i
>= 0; i
--)
13265 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
13266 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
13267 output_asm_insn ("mov%?\t%0, %1", ops
);
13273 gcc_assert (MEM_P (operands
[0]));
13274 gcc_assert (REG_P (operands
[1]));
13275 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
13277 switch (GET_CODE (XEXP (operands
[0], 0)))
13280 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
13284 gcc_unreachable ();
13291 /* Output a VFP load or store instruction. */
13294 output_move_vfp (rtx
*operands
)
13296 rtx reg
, mem
, addr
, ops
[2];
13297 int load
= REG_P (operands
[0]);
13298 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
13299 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
13302 enum machine_mode mode
;
13304 reg
= operands
[!load
];
13305 mem
= operands
[load
];
13307 mode
= GET_MODE (reg
);
13309 gcc_assert (REG_P (reg
));
13310 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
13311 gcc_assert (mode
== SFmode
13315 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
13316 gcc_assert (MEM_P (mem
));
13318 addr
= XEXP (mem
, 0);
13320 switch (GET_CODE (addr
))
13323 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13324 ops
[0] = XEXP (addr
, 0);
13329 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
13330 ops
[0] = XEXP (addr
, 0);
13335 templ
= "f%s%c%%?\t%%%s0, %%1%s";
13341 sprintf (buff
, templ
,
13342 load
? "ld" : "st",
13345 integer_p
? "\t%@ int" : "");
13346 output_asm_insn (buff
, ops
);
13351 /* Output a Neon quad-word load or store, or a load or store for
13352 larger structure modes.
13354 WARNING: The ordering of elements is weird in big-endian mode,
13355 because we use VSTM, as required by the EABI. GCC RTL defines
13356 element ordering based on in-memory order. This can be differ
13357 from the architectural ordering of elements within a NEON register.
13358 The intrinsics defined in arm_neon.h use the NEON register element
13359 ordering, not the GCC RTL element ordering.
13361 For example, the in-memory ordering of a big-endian a quadword
13362 vector with 16-bit elements when stored from register pair {d0,d1}
13363 will be (lowest address first, d0[N] is NEON register element N):
13365 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13367 When necessary, quadword registers (dN, dN+1) are moved to ARM
13368 registers from rN in the order:
13370 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13372 So that STM/LDM can be used on vectors in ARM registers, and the
13373 same memory layout will result as if VSTM/VLDM were used. */
13376 output_move_neon (rtx
*operands
)
13378 rtx reg
, mem
, addr
, ops
[2];
13379 int regno
, load
= REG_P (operands
[0]);
13382 enum machine_mode mode
;
13384 reg
= operands
[!load
];
13385 mem
= operands
[load
];
13387 mode
= GET_MODE (reg
);
13389 gcc_assert (REG_P (reg
));
13390 regno
= REGNO (reg
);
13391 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
13392 || NEON_REGNO_OK_FOR_QUAD (regno
));
13393 gcc_assert (VALID_NEON_DREG_MODE (mode
)
13394 || VALID_NEON_QREG_MODE (mode
)
13395 || VALID_NEON_STRUCT_MODE (mode
));
13396 gcc_assert (MEM_P (mem
));
13398 addr
= XEXP (mem
, 0);
13400 /* Strip off const from addresses like (const (plus (...))). */
13401 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
13402 addr
= XEXP (addr
, 0);
13404 switch (GET_CODE (addr
))
13407 templ
= "v%smia%%?\t%%0!, %%h1";
13408 ops
[0] = XEXP (addr
, 0);
13413 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13414 templ
= "v%smdb%%?\t%%0!, %%h1";
13415 ops
[0] = XEXP (addr
, 0);
13420 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13421 gcc_unreachable ();
13426 int nregs
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
13429 for (i
= 0; i
< nregs
; i
++)
13431 /* We're only using DImode here because it's a convenient size. */
13432 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
13433 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
13434 if (reg_overlap_mentioned_p (ops
[0], mem
))
13436 gcc_assert (overlap
== -1);
13441 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
13442 output_asm_insn (buff
, ops
);
13447 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
13448 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
13449 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
13450 output_asm_insn (buff
, ops
);
13457 templ
= "v%smia%%?\t%%m0, %%h1";
13462 sprintf (buff
, templ
, load
? "ld" : "st");
13463 output_asm_insn (buff
, ops
);
13468 /* Compute and return the length of neon_mov<mode>, where <mode> is
13469 one of VSTRUCT modes: EI, OI, CI or XI. */
13471 arm_attr_length_move_neon (rtx insn
)
13473 rtx reg
, mem
, addr
;
13475 enum machine_mode mode
;
13477 extract_insn_cached (insn
);
13479 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
13481 mode
= GET_MODE (recog_data
.operand
[0]);
13492 gcc_unreachable ();
13496 load
= REG_P (recog_data
.operand
[0]);
13497 reg
= recog_data
.operand
[!load
];
13498 mem
= recog_data
.operand
[load
];
13500 gcc_assert (MEM_P (mem
));
13502 mode
= GET_MODE (reg
);
13503 addr
= XEXP (mem
, 0);
13505 /* Strip off const from addresses like (const (plus (...))). */
13506 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
13507 addr
= XEXP (addr
, 0);
13509 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
13511 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
13518 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13522 arm_address_offset_is_imm (rtx insn
)
13526 extract_insn_cached (insn
);
13528 if (REG_P (recog_data
.operand
[0]))
13531 mem
= recog_data
.operand
[0];
13533 gcc_assert (MEM_P (mem
));
13535 addr
= XEXP (mem
, 0);
13537 if (GET_CODE (addr
) == REG
13538 || (GET_CODE (addr
) == PLUS
13539 && GET_CODE (XEXP (addr
, 0)) == REG
13540 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
))
13546 /* Output an ADD r, s, #n where n may be too big for one instruction.
13547 If adding zero to one register, output nothing. */
13549 output_add_immediate (rtx
*operands
)
13551 HOST_WIDE_INT n
= INTVAL (operands
[2]);
13553 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
13556 output_multi_immediate (operands
,
13557 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13560 output_multi_immediate (operands
,
13561 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13568 /* Output a multiple immediate operation.
13569 OPERANDS is the vector of operands referred to in the output patterns.
13570 INSTR1 is the output pattern to use for the first constant.
13571 INSTR2 is the output pattern to use for subsequent constants.
13572 IMMED_OP is the index of the constant slot in OPERANDS.
13573 N is the constant value. */
13574 static const char *
13575 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
13576 int immed_op
, HOST_WIDE_INT n
)
13578 #if HOST_BITS_PER_WIDE_INT > 32
13584 /* Quick and easy output. */
13585 operands
[immed_op
] = const0_rtx
;
13586 output_asm_insn (instr1
, operands
);
13591 const char * instr
= instr1
;
13593 /* Note that n is never zero here (which would give no output). */
13594 for (i
= 0; i
< 32; i
+= 2)
13598 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
13599 output_asm_insn (instr
, operands
);
13609 /* Return the name of a shifter operation. */
13610 static const char *
13611 arm_shift_nmem(enum rtx_code code
)
13616 return ARM_LSL_NAME
;
13632 /* Return the appropriate ARM instruction for the operation code.
13633 The returned result should not be overwritten. OP is the rtx of the
13634 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13637 arithmetic_instr (rtx op
, int shift_first_arg
)
13639 switch (GET_CODE (op
))
13645 return shift_first_arg
? "rsb" : "sub";
13660 return arm_shift_nmem(GET_CODE(op
));
13663 gcc_unreachable ();
13667 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13668 for the operation code. The returned result should not be overwritten.
13669 OP is the rtx code of the shift.
13670 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13672 static const char *
13673 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
13676 enum rtx_code code
= GET_CODE (op
);
13678 switch (GET_CODE (XEXP (op
, 1)))
13686 *amountp
= INTVAL (XEXP (op
, 1));
13690 gcc_unreachable ();
13696 gcc_assert (*amountp
!= -1);
13697 *amountp
= 32 - *amountp
;
13700 /* Fall through. */
13706 mnem
= arm_shift_nmem(code
);
13710 /* We never have to worry about the amount being other than a
13711 power of 2, since this case can never be reloaded from a reg. */
13712 gcc_assert (*amountp
!= -1);
13713 *amountp
= int_log2 (*amountp
);
13714 return ARM_LSL_NAME
;
13717 gcc_unreachable ();
13720 if (*amountp
!= -1)
13722 /* This is not 100% correct, but follows from the desire to merge
13723 multiplication by a power of 2 with the recognizer for a
13724 shift. >=32 is not a valid shift for "lsl", so we must try and
13725 output a shift that produces the correct arithmetical result.
13726 Using lsr #32 is identical except for the fact that the carry bit
13727 is not set correctly if we set the flags; but we never use the
13728 carry bit from such an operation, so we can ignore that. */
13729 if (code
== ROTATERT
)
13730 /* Rotate is just modulo 32. */
13732 else if (*amountp
!= (*amountp
& 31))
13734 if (code
== ASHIFT
)
13739 /* Shifts of 0 are no-ops. */
13747 /* Obtain the shift from the POWER of two. */
13749 static HOST_WIDE_INT
13750 int_log2 (HOST_WIDE_INT power
)
13752 HOST_WIDE_INT shift
= 0;
13754 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
13756 gcc_assert (shift
<= 31);
13763 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13764 because /bin/as is horribly restrictive. The judgement about
13765 whether or not each character is 'printable' (and can be output as
13766 is) or not (and must be printed with an octal escape) must be made
13767 with reference to the *host* character set -- the situation is
13768 similar to that discussed in the comments above pp_c_char in
13769 c-pretty-print.c. */
13771 #define MAX_ASCII_LEN 51
13774 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
13777 int len_so_far
= 0;
13779 fputs ("\t.ascii\t\"", stream
);
13781 for (i
= 0; i
< len
; i
++)
13785 if (len_so_far
>= MAX_ASCII_LEN
)
13787 fputs ("\"\n\t.ascii\t\"", stream
);
13793 if (c
== '\\' || c
== '\"')
13795 putc ('\\', stream
);
13803 fprintf (stream
, "\\%03o", c
);
13808 fputs ("\"\n", stream
);
13811 /* Compute the register save mask for registers 0 through 12
13812 inclusive. This code is used by arm_compute_save_reg_mask. */
13814 static unsigned long
13815 arm_compute_save_reg0_reg12_mask (void)
13817 unsigned long func_type
= arm_current_func_type ();
13818 unsigned long save_reg_mask
= 0;
13821 if (IS_INTERRUPT (func_type
))
13823 unsigned int max_reg
;
13824 /* Interrupt functions must not corrupt any registers,
13825 even call clobbered ones. If this is a leaf function
13826 we can just examine the registers used by the RTL, but
13827 otherwise we have to assume that whatever function is
13828 called might clobber anything, and so we have to save
13829 all the call-clobbered registers as well. */
13830 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
13831 /* FIQ handlers have registers r8 - r12 banked, so
13832 we only need to check r0 - r7, Normal ISRs only
13833 bank r14 and r15, so we must check up to r12.
13834 r13 is the stack pointer which is always preserved,
13835 so we do not need to consider it here. */
13840 for (reg
= 0; reg
<= max_reg
; reg
++)
13841 if (df_regs_ever_live_p (reg
)
13842 || (! current_function_is_leaf
&& call_used_regs
[reg
]))
13843 save_reg_mask
|= (1 << reg
);
13845 /* Also save the pic base register if necessary. */
13847 && !TARGET_SINGLE_PIC_BASE
13848 && arm_pic_register
!= INVALID_REGNUM
13849 && crtl
->uses_pic_offset_table
)
13850 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13852 else if (IS_VOLATILE(func_type
))
13854 /* For noreturn functions we historically omitted register saves
13855 altogether. However this really messes up debugging. As a
13856 compromise save just the frame pointers. Combined with the link
13857 register saved elsewhere this should be sufficient to get
13859 if (frame_pointer_needed
)
13860 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
13861 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
13862 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
13863 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
13864 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
13868 /* In the normal case we only need to save those registers
13869 which are call saved and which are used by this function. */
13870 for (reg
= 0; reg
<= 11; reg
++)
13871 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
13872 save_reg_mask
|= (1 << reg
);
13874 /* Handle the frame pointer as a special case. */
13875 if (frame_pointer_needed
)
13876 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
13878 /* If we aren't loading the PIC register,
13879 don't stack it even though it may be live. */
13881 && !TARGET_SINGLE_PIC_BASE
13882 && arm_pic_register
!= INVALID_REGNUM
13883 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
13884 || crtl
->uses_pic_offset_table
))
13885 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13887 /* The prologue will copy SP into R0, so save it. */
13888 if (IS_STACKALIGN (func_type
))
13889 save_reg_mask
|= 1;
13892 /* Save registers so the exception handler can modify them. */
13893 if (crtl
->calls_eh_return
)
13899 reg
= EH_RETURN_DATA_REGNO (i
);
13900 if (reg
== INVALID_REGNUM
)
13902 save_reg_mask
|= 1 << reg
;
13906 return save_reg_mask
;
13910 /* Compute the number of bytes used to store the static chain register on the
13911 stack, above the stack frame. We need to know this accurately to get the
13912 alignment of the rest of the stack frame correct. */
13914 static int arm_compute_static_chain_stack_bytes (void)
13916 unsigned long func_type
= arm_current_func_type ();
13917 int static_chain_stack_bytes
= 0;
13919 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
&&
13920 IS_NESTED (func_type
) &&
13921 df_regs_ever_live_p (3) && crtl
->args
.pretend_args_size
== 0)
13922 static_chain_stack_bytes
= 4;
13924 return static_chain_stack_bytes
;
13928 /* Compute a bit mask of which registers need to be
13929 saved on the stack for the current function.
13930 This is used by arm_get_frame_offsets, which may add extra registers. */
13932 static unsigned long
13933 arm_compute_save_reg_mask (void)
13935 unsigned int save_reg_mask
= 0;
13936 unsigned long func_type
= arm_current_func_type ();
13939 if (IS_NAKED (func_type
))
13940 /* This should never really happen. */
13943 /* If we are creating a stack frame, then we must save the frame pointer,
13944 IP (which will hold the old stack pointer), LR and the PC. */
13945 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
13947 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
13950 | (1 << PC_REGNUM
);
13952 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
13954 /* Decide if we need to save the link register.
13955 Interrupt routines have their own banked link register,
13956 so they never need to save it.
13957 Otherwise if we do not use the link register we do not need to save
13958 it. If we are pushing other registers onto the stack however, we
13959 can save an instruction in the epilogue by pushing the link register
13960 now and then popping it back into the PC. This incurs extra memory
13961 accesses though, so we only do it when optimizing for size, and only
13962 if we know that we will not need a fancy return sequence. */
13963 if (df_regs_ever_live_p (LR_REGNUM
)
13966 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
13967 && !crtl
->calls_eh_return
))
13968 save_reg_mask
|= 1 << LR_REGNUM
;
13970 if (cfun
->machine
->lr_save_eliminated
)
13971 save_reg_mask
&= ~ (1 << LR_REGNUM
);
13973 if (TARGET_REALLY_IWMMXT
13974 && ((bit_count (save_reg_mask
)
13975 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
13976 arm_compute_static_chain_stack_bytes())
13979 /* The total number of registers that are going to be pushed
13980 onto the stack is odd. We need to ensure that the stack
13981 is 64-bit aligned before we start to save iWMMXt registers,
13982 and also before we start to create locals. (A local variable
13983 might be a double or long long which we will load/store using
13984 an iWMMXt instruction). Therefore we need to push another
13985 ARM register, so that the stack will be 64-bit aligned. We
13986 try to avoid using the arg registers (r0 -r3) as they might be
13987 used to pass values in a tail call. */
13988 for (reg
= 4; reg
<= 12; reg
++)
13989 if ((save_reg_mask
& (1 << reg
)) == 0)
13993 save_reg_mask
|= (1 << reg
);
13996 cfun
->machine
->sibcall_blocked
= 1;
13997 save_reg_mask
|= (1 << 3);
14001 /* We may need to push an additional register for use initializing the
14002 PIC base register. */
14003 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
14004 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
14006 reg
= thumb_find_work_register (1 << 4);
14007 if (!call_used_regs
[reg
])
14008 save_reg_mask
|= (1 << reg
);
14011 return save_reg_mask
;
14015 /* Compute a bit mask of which registers need to be
14016 saved on the stack for the current function. */
14017 static unsigned long
14018 thumb1_compute_save_reg_mask (void)
14020 unsigned long mask
;
14024 for (reg
= 0; reg
< 12; reg
++)
14025 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14029 && !TARGET_SINGLE_PIC_BASE
14030 && arm_pic_register
!= INVALID_REGNUM
14031 && crtl
->uses_pic_offset_table
)
14032 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
14034 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14035 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
14036 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
14038 /* LR will also be pushed if any lo regs are pushed. */
14039 if (mask
& 0xff || thumb_force_lr_save ())
14040 mask
|= (1 << LR_REGNUM
);
14042 /* Make sure we have a low work register if we need one.
14043 We will need one if we are going to push a high register,
14044 but we are not currently intending to push a low register. */
14045 if ((mask
& 0xff) == 0
14046 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
14048 /* Use thumb_find_work_register to choose which register
14049 we will use. If the register is live then we will
14050 have to push it. Use LAST_LO_REGNUM as our fallback
14051 choice for the register to select. */
14052 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
14053 /* Make sure the register returned by thumb_find_work_register is
14054 not part of the return value. */
14055 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
14056 reg
= LAST_LO_REGNUM
;
14058 if (! call_used_regs
[reg
])
14062 /* The 504 below is 8 bytes less than 512 because there are two possible
14063 alignment words. We can't tell here if they will be present or not so we
14064 have to play it safe and assume that they are. */
14065 if ((CALLER_INTERWORKING_SLOT_SIZE
+
14066 ROUND_UP_WORD (get_frame_size ()) +
14067 crtl
->outgoing_args_size
) >= 504)
14069 /* This is the same as the code in thumb1_expand_prologue() which
14070 determines which register to use for stack decrement. */
14071 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
14072 if (mask
& (1 << reg
))
14075 if (reg
> LAST_LO_REGNUM
)
14077 /* Make sure we have a register available for stack decrement. */
14078 mask
|= 1 << LAST_LO_REGNUM
;
14086 /* Return the number of bytes required to save VFP registers. */
14088 arm_get_vfp_saved_size (void)
14090 unsigned int regno
;
14095 /* Space for saved VFP registers. */
14096 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14099 for (regno
= FIRST_VFP_REGNUM
;
14100 regno
< LAST_VFP_REGNUM
;
14103 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
14104 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
14108 /* Workaround ARM10 VFPr1 bug. */
14109 if (count
== 2 && !arm_arch6
)
14111 saved
+= count
* 8;
14120 if (count
== 2 && !arm_arch6
)
14122 saved
+= count
* 8;
14129 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14130 everything bar the final return instruction. */
14132 output_return_instruction (rtx operand
, int really_return
, int reverse
)
14134 char conditional
[10];
14137 unsigned long live_regs_mask
;
14138 unsigned long func_type
;
14139 arm_stack_offsets
*offsets
;
14141 func_type
= arm_current_func_type ();
14143 if (IS_NAKED (func_type
))
14146 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
14148 /* If this function was declared non-returning, and we have
14149 found a tail call, then we have to trust that the called
14150 function won't return. */
14155 /* Otherwise, trap an attempted return by aborting. */
14157 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
14159 assemble_external_libcall (ops
[1]);
14160 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
14166 gcc_assert (!cfun
->calls_alloca
|| really_return
);
14168 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
14170 cfun
->machine
->return_used_this_function
= 1;
14172 offsets
= arm_get_frame_offsets ();
14173 live_regs_mask
= offsets
->saved_regs_mask
;
14175 if (live_regs_mask
)
14177 const char * return_reg
;
14179 /* If we do not have any special requirements for function exit
14180 (e.g. interworking) then we can load the return address
14181 directly into the PC. Otherwise we must load it into LR. */
14183 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
14184 return_reg
= reg_names
[PC_REGNUM
];
14186 return_reg
= reg_names
[LR_REGNUM
];
14188 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
14190 /* There are three possible reasons for the IP register
14191 being saved. 1) a stack frame was created, in which case
14192 IP contains the old stack pointer, or 2) an ISR routine
14193 corrupted it, or 3) it was saved to align the stack on
14194 iWMMXt. In case 1, restore IP into SP, otherwise just
14196 if (frame_pointer_needed
)
14198 live_regs_mask
&= ~ (1 << IP_REGNUM
);
14199 live_regs_mask
|= (1 << SP_REGNUM
);
14202 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
14205 /* On some ARM architectures it is faster to use LDR rather than
14206 LDM to load a single register. On other architectures, the
14207 cost is the same. In 26 bit mode, or for exception handlers,
14208 we have to use LDM to load the PC so that the CPSR is also
14210 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
14211 if (live_regs_mask
== (1U << reg
))
14214 if (reg
<= LAST_ARM_REGNUM
14215 && (reg
!= LR_REGNUM
14217 || ! IS_INTERRUPT (func_type
)))
14219 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
14220 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
14227 /* Generate the load multiple instruction to restore the
14228 registers. Note we can get here, even if
14229 frame_pointer_needed is true, but only if sp already
14230 points to the base of the saved core registers. */
14231 if (live_regs_mask
& (1 << SP_REGNUM
))
14233 unsigned HOST_WIDE_INT stack_adjust
;
14235 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
14236 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
14238 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
14239 if (TARGET_UNIFIED_ASM
)
14240 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
14242 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
14245 /* If we can't use ldmib (SA110 bug),
14246 then try to pop r3 instead. */
14248 live_regs_mask
|= 1 << 3;
14250 if (TARGET_UNIFIED_ASM
)
14251 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
14253 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
14257 if (TARGET_UNIFIED_ASM
)
14258 sprintf (instr
, "pop%s\t{", conditional
);
14260 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
14262 p
= instr
+ strlen (instr
);
14264 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
14265 if (live_regs_mask
& (1 << reg
))
14267 int l
= strlen (reg_names
[reg
]);
14273 memcpy (p
, ", ", 2);
14277 memcpy (p
, "%|", 2);
14278 memcpy (p
+ 2, reg_names
[reg
], l
);
14282 if (live_regs_mask
& (1 << LR_REGNUM
))
14284 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
14285 /* If returning from an interrupt, restore the CPSR. */
14286 if (IS_INTERRUPT (func_type
))
14293 output_asm_insn (instr
, & operand
);
14295 /* See if we need to generate an extra instruction to
14296 perform the actual function return. */
14298 && func_type
!= ARM_FT_INTERWORKED
14299 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
14301 /* The return has already been handled
14302 by loading the LR into the PC. */
14309 switch ((int) ARM_FUNC_TYPE (func_type
))
14313 /* ??? This is wrong for unified assembly syntax. */
14314 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
14317 case ARM_FT_INTERWORKED
:
14318 sprintf (instr
, "bx%s\t%%|lr", conditional
);
14321 case ARM_FT_EXCEPTION
:
14322 /* ??? This is wrong for unified assembly syntax. */
14323 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
14327 /* Use bx if it's available. */
14328 if (arm_arch5
|| arm_arch4t
)
14329 sprintf (instr
, "bx%s\t%%|lr", conditional
);
14331 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
14335 output_asm_insn (instr
, & operand
);
14341 /* Write the function name into the code section, directly preceding
14342 the function prologue.
14344 Code will be output similar to this:
14346 .ascii "arm_poke_function_name", 0
14349 .word 0xff000000 + (t1 - t0)
14350 arm_poke_function_name
14352 stmfd sp!, {fp, ip, lr, pc}
14355 When performing a stack backtrace, code can inspect the value
14356 of 'pc' stored at 'fp' + 0. If the trace function then looks
14357 at location pc - 12 and the top 8 bits are set, then we know
14358 that there is a function name embedded immediately preceding this
14359 location and has length ((pc[-3]) & 0xff000000).
14361 We assume that pc is declared as a pointer to an unsigned long.
14363 It is of no benefit to output the function name if we are assembling
14364 a leaf function. These function types will not contain a stack
14365 backtrace structure, therefore it is not possible to determine the
14368 arm_poke_function_name (FILE *stream
, const char *name
)
14370 unsigned long alignlength
;
14371 unsigned long length
;
14374 length
= strlen (name
) + 1;
14375 alignlength
= ROUND_UP_WORD (length
);
14377 ASM_OUTPUT_ASCII (stream
, name
, length
);
14378 ASM_OUTPUT_ALIGN (stream
, 2);
14379 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
14380 assemble_aligned_integer (UNITS_PER_WORD
, x
);
14383 /* Place some comments into the assembler stream
14384 describing the current function. */
14386 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
14388 unsigned long func_type
;
14392 thumb1_output_function_prologue (f
, frame_size
);
14396 /* Sanity check. */
14397 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
14399 func_type
= arm_current_func_type ();
14401 switch ((int) ARM_FUNC_TYPE (func_type
))
14404 case ARM_FT_NORMAL
:
14406 case ARM_FT_INTERWORKED
:
14407 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
14410 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
14413 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
14415 case ARM_FT_EXCEPTION
:
14416 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
14420 if (IS_NAKED (func_type
))
14421 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14423 if (IS_VOLATILE (func_type
))
14424 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
14426 if (IS_NESTED (func_type
))
14427 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
14428 if (IS_STACKALIGN (func_type
))
14429 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14431 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14433 crtl
->args
.pretend_args_size
, frame_size
);
14435 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14436 frame_pointer_needed
,
14437 cfun
->machine
->uses_anonymous_args
);
14439 if (cfun
->machine
->lr_save_eliminated
)
14440 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
14442 if (crtl
->calls_eh_return
)
14443 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
14448 arm_output_epilogue (rtx sibling
)
14451 unsigned long saved_regs_mask
;
14452 unsigned long func_type
;
14453 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14454 frame that is $fp + 4 for a non-variadic function. */
14455 int floats_offset
= 0;
14457 FILE * f
= asm_out_file
;
14458 unsigned int lrm_count
= 0;
14459 int really_return
= (sibling
== NULL
);
14461 arm_stack_offsets
*offsets
;
14463 /* If we have already generated the return instruction
14464 then it is futile to generate anything else. */
14465 if (use_return_insn (FALSE
, sibling
) &&
14466 (cfun
->machine
->return_used_this_function
!= 0))
14469 func_type
= arm_current_func_type ();
14471 if (IS_NAKED (func_type
))
14472 /* Naked functions don't have epilogues. */
14475 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
14479 /* A volatile function should never return. Call abort. */
14480 op
= gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)" : "abort");
14481 assemble_external_libcall (op
);
14482 output_asm_insn ("bl\t%a0", &op
);
14487 /* If we are throwing an exception, then we really must be doing a
14488 return, so we can't tail-call. */
14489 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
14491 offsets
= arm_get_frame_offsets ();
14492 saved_regs_mask
= offsets
->saved_regs_mask
;
14495 lrm_count
= bit_count (saved_regs_mask
);
14497 floats_offset
= offsets
->saved_args
;
14498 /* Compute how far away the floats will be. */
14499 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
14500 if (saved_regs_mask
& (1 << reg
))
14501 floats_offset
+= 4;
14503 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
14505 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14506 int vfp_offset
= offsets
->frame
;
14508 if (TARGET_FPA_EMU2
)
14510 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14511 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14513 floats_offset
+= 12;
14514 asm_fprintf (f
, "\tldfe\t%r, [%r, #-%d]\n",
14515 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
14520 start_reg
= LAST_FPA_REGNUM
;
14522 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14524 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14526 floats_offset
+= 12;
14528 /* We can't unstack more than four registers at once. */
14529 if (start_reg
- reg
== 3)
14531 asm_fprintf (f
, "\tlfm\t%r, 4, [%r, #-%d]\n",
14532 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
14533 start_reg
= reg
- 1;
14538 if (reg
!= start_reg
)
14539 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
14540 reg
+ 1, start_reg
- reg
,
14541 FP_REGNUM
, floats_offset
- vfp_offset
);
14542 start_reg
= reg
- 1;
14546 /* Just in case the last register checked also needs unstacking. */
14547 if (reg
!= start_reg
)
14548 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
14549 reg
+ 1, start_reg
- reg
,
14550 FP_REGNUM
, floats_offset
- vfp_offset
);
14553 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14557 /* The fldmd insns do not have base+offset addressing
14558 modes, so we use IP to hold the address. */
14559 saved_size
= arm_get_vfp_saved_size ();
14561 if (saved_size
> 0)
14563 floats_offset
+= saved_size
;
14564 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", IP_REGNUM
,
14565 FP_REGNUM
, floats_offset
- vfp_offset
);
14567 start_reg
= FIRST_VFP_REGNUM
;
14568 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
14570 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
14571 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
14573 if (start_reg
!= reg
)
14574 vfp_output_fldmd (f
, IP_REGNUM
,
14575 (start_reg
- FIRST_VFP_REGNUM
) / 2,
14576 (reg
- start_reg
) / 2);
14577 start_reg
= reg
+ 2;
14580 if (start_reg
!= reg
)
14581 vfp_output_fldmd (f
, IP_REGNUM
,
14582 (start_reg
- FIRST_VFP_REGNUM
) / 2,
14583 (reg
- start_reg
) / 2);
14588 /* The frame pointer is guaranteed to be non-double-word aligned.
14589 This is because it is set to (old_stack_pointer - 4) and the
14590 old_stack_pointer was double word aligned. Thus the offset to
14591 the iWMMXt registers to be loaded must also be non-double-word
14592 sized, so that the resultant address *is* double-word aligned.
14593 We can ignore floats_offset since that was already included in
14594 the live_regs_mask. */
14595 lrm_count
+= (lrm_count
% 2 ? 2 : 1);
14597 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
14598 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14600 asm_fprintf (f
, "\twldrd\t%r, [%r, #-%d]\n",
14601 reg
, FP_REGNUM
, lrm_count
* 4);
14606 /* saved_regs_mask should contain the IP, which at the time of stack
14607 frame generation actually contains the old stack pointer. So a
14608 quick way to unwind the stack is just pop the IP register directly
14609 into the stack pointer. */
14610 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
14611 saved_regs_mask
&= ~ (1 << IP_REGNUM
);
14612 saved_regs_mask
|= (1 << SP_REGNUM
);
14614 /* There are two registers left in saved_regs_mask - LR and PC. We
14615 only need to restore the LR register (the return address), but to
14616 save time we can load it directly into the PC, unless we need a
14617 special function exit sequence, or we are not really returning. */
14619 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
14620 && !crtl
->calls_eh_return
)
14621 /* Delete the LR from the register mask, so that the LR on
14622 the stack is loaded into the PC in the register mask. */
14623 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
14625 saved_regs_mask
&= ~ (1 << PC_REGNUM
);
14627 /* We must use SP as the base register, because SP is one of the
14628 registers being restored. If an interrupt or page fault
14629 happens in the ldm instruction, the SP might or might not
14630 have been restored. That would be bad, as then SP will no
14631 longer indicate the safe area of stack, and we can get stack
14632 corruption. Using SP as the base register means that it will
14633 be reset correctly to the original value, should an interrupt
14634 occur. If the stack pointer already points at the right
14635 place, then omit the subtraction. */
14636 if (offsets
->outgoing_args
!= (1 + (int) bit_count (saved_regs_mask
))
14637 || cfun
->calls_alloca
)
14638 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", SP_REGNUM
, FP_REGNUM
,
14639 4 * bit_count (saved_regs_mask
));
14640 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
, 0);
14642 if (IS_INTERRUPT (func_type
))
14643 /* Interrupt handlers will have pushed the
14644 IP onto the stack, so restore it now. */
14645 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, 1 << IP_REGNUM
, 0);
14649 /* This branch is executed for ARM mode (non-apcs frames) and
14650 Thumb-2 mode. Frame layout is essentially the same for those
14651 cases, except that in ARM mode frame pointer points to the
14652 first saved register, while in Thumb-2 mode the frame pointer points
14653 to the last saved register.
14655 It is possible to make frame pointer point to last saved
14656 register in both cases, and remove some conditionals below.
14657 That means that fp setup in prologue would be just "mov fp, sp"
14658 and sp restore in epilogue would be just "mov sp, fp", whereas
14659 now we have to use add/sub in those cases. However, the value
14660 of that would be marginal, as both mov and add/sub are 32-bit
14661 in ARM mode, and it would require extra conditionals
14662 in arm_expand_prologue to distingish ARM-apcs-frame case
14663 (where frame pointer is required to point at first register)
14664 and ARM-non-apcs-frame. Therefore, such change is postponed
14665 until real need arise. */
14666 unsigned HOST_WIDE_INT amount
;
14668 /* Restore stack pointer if necessary. */
14669 if (TARGET_ARM
&& frame_pointer_needed
)
14671 operands
[0] = stack_pointer_rtx
;
14672 operands
[1] = hard_frame_pointer_rtx
;
14674 operands
[2] = GEN_INT (offsets
->frame
- offsets
->saved_regs
);
14675 output_add_immediate (operands
);
14679 if (frame_pointer_needed
)
14681 /* For Thumb-2 restore sp from the frame pointer.
14682 Operand restrictions mean we have to incrememnt FP, then copy
14684 amount
= offsets
->locals_base
- offsets
->saved_regs
;
14685 operands
[0] = hard_frame_pointer_rtx
;
14689 unsigned long count
;
14690 operands
[0] = stack_pointer_rtx
;
14691 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
14692 /* pop call clobbered registers if it avoids a
14693 separate stack adjustment. */
14694 count
= offsets
->saved_regs
- offsets
->saved_args
;
14697 && !crtl
->calls_eh_return
14698 && bit_count(saved_regs_mask
) * 4 == count
14699 && !IS_INTERRUPT (func_type
)
14700 && !crtl
->tail_call_emit
)
14702 unsigned long mask
;
14703 /* Preserve return values, of any size. */
14704 mask
= (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14706 mask
&= ~saved_regs_mask
;
14708 while (bit_count (mask
) * 4 > amount
)
14710 while ((mask
& (1 << reg
)) == 0)
14712 mask
&= ~(1 << reg
);
14714 if (bit_count (mask
) * 4 == amount
) {
14716 saved_regs_mask
|= mask
;
14723 operands
[1] = operands
[0];
14724 operands
[2] = GEN_INT (amount
);
14725 output_add_immediate (operands
);
14727 if (frame_pointer_needed
)
14728 asm_fprintf (f
, "\tmov\t%r, %r\n",
14729 SP_REGNUM
, HARD_FRAME_POINTER_REGNUM
);
14732 if (TARGET_FPA_EMU2
)
14734 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
14735 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14736 asm_fprintf (f
, "\tldfe\t%r, [%r], #12\n",
14741 start_reg
= FIRST_FPA_REGNUM
;
14743 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
14745 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14747 if (reg
- start_reg
== 3)
14749 asm_fprintf (f
, "\tlfmfd\t%r, 4, [%r]!\n",
14750 start_reg
, SP_REGNUM
);
14751 start_reg
= reg
+ 1;
14756 if (reg
!= start_reg
)
14757 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
14758 start_reg
, reg
- start_reg
,
14761 start_reg
= reg
+ 1;
14765 /* Just in case the last register checked also needs unstacking. */
14766 if (reg
!= start_reg
)
14767 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
14768 start_reg
, reg
- start_reg
, SP_REGNUM
);
14771 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14773 int end_reg
= LAST_VFP_REGNUM
+ 1;
14775 /* Scan the registers in reverse order. We need to match
14776 any groupings made in the prologue and generate matching
14778 for (reg
= LAST_VFP_REGNUM
- 1; reg
>= FIRST_VFP_REGNUM
; reg
-= 2)
14780 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
14781 && (!df_regs_ever_live_p (reg
+ 1)
14782 || call_used_regs
[reg
+ 1]))
14784 if (end_reg
> reg
+ 2)
14785 vfp_output_fldmd (f
, SP_REGNUM
,
14786 (reg
+ 2 - FIRST_VFP_REGNUM
) / 2,
14787 (end_reg
- (reg
+ 2)) / 2);
14791 if (end_reg
> reg
+ 2)
14792 vfp_output_fldmd (f
, SP_REGNUM
, 0,
14793 (end_reg
- (reg
+ 2)) / 2);
14797 for (reg
= FIRST_IWMMXT_REGNUM
; reg
<= LAST_IWMMXT_REGNUM
; reg
++)
14798 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14799 asm_fprintf (f
, "\twldrd\t%r, [%r], #8\n", reg
, SP_REGNUM
);
14801 /* If we can, restore the LR into the PC. */
14802 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
14803 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
14804 && !IS_STACKALIGN (func_type
)
14806 && crtl
->args
.pretend_args_size
== 0
14807 && saved_regs_mask
& (1 << LR_REGNUM
)
14808 && !crtl
->calls_eh_return
)
14810 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
14811 saved_regs_mask
|= (1 << PC_REGNUM
);
14812 rfe
= IS_INTERRUPT (func_type
);
14817 /* Load the registers off the stack. If we only have one register
14818 to load use the LDR instruction - it is faster. For Thumb-2
14819 always use pop and the assembler will pick the best instruction.*/
14820 if (TARGET_ARM
&& saved_regs_mask
== (1 << LR_REGNUM
)
14821 && !IS_INTERRUPT(func_type
))
14823 asm_fprintf (f
, "\tldr\t%r, [%r], #4\n", LR_REGNUM
, SP_REGNUM
);
14825 else if (saved_regs_mask
)
14827 if (saved_regs_mask
& (1 << SP_REGNUM
))
14828 /* Note - write back to the stack register is not enabled
14829 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14830 in the list of registers and if we add writeback the
14831 instruction becomes UNPREDICTABLE. */
14832 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
,
14834 else if (TARGET_ARM
)
14835 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, saved_regs_mask
,
14838 print_multi_reg (f
, "pop\t", SP_REGNUM
, saved_regs_mask
, 0);
14841 if (crtl
->args
.pretend_args_size
)
14843 /* Unwind the pre-pushed regs. */
14844 operands
[0] = operands
[1] = stack_pointer_rtx
;
14845 operands
[2] = GEN_INT (crtl
->args
.pretend_args_size
);
14846 output_add_immediate (operands
);
14850 /* We may have already restored PC directly from the stack. */
14851 if (!really_return
|| saved_regs_mask
& (1 << PC_REGNUM
))
14854 /* Stack adjustment for exception handler. */
14855 if (crtl
->calls_eh_return
)
14856 asm_fprintf (f
, "\tadd\t%r, %r, %r\n", SP_REGNUM
, SP_REGNUM
,
14857 ARM_EH_STACKADJ_REGNUM
);
14859 /* Generate the return instruction. */
14860 switch ((int) ARM_FUNC_TYPE (func_type
))
14864 asm_fprintf (f
, "\tsubs\t%r, %r, #4\n", PC_REGNUM
, LR_REGNUM
);
14867 case ARM_FT_EXCEPTION
:
14868 asm_fprintf (f
, "\tmovs\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
14871 case ARM_FT_INTERWORKED
:
14872 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
14876 if (IS_STACKALIGN (func_type
))
14878 /* See comment in arm_expand_prologue. */
14879 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, 0);
14881 if (arm_arch5
|| arm_arch4t
)
14882 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
14884 asm_fprintf (f
, "\tmov\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
14892 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
14893 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
14895 arm_stack_offsets
*offsets
;
14901 /* Emit any call-via-reg trampolines that are needed for v4t support
14902 of call_reg and call_value_reg type insns. */
14903 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
14905 rtx label
= cfun
->machine
->call_via
[regno
];
14909 switch_to_section (function_section (current_function_decl
));
14910 targetm
.asm_out
.internal_label (asm_out_file
, "L",
14911 CODE_LABEL_NUMBER (label
));
14912 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
14916 /* ??? Probably not safe to set this here, since it assumes that a
14917 function will be emitted as assembly immediately after we generate
14918 RTL for it. This does not happen for inline functions. */
14919 cfun
->machine
->return_used_this_function
= 0;
14921 else /* TARGET_32BIT */
14923 /* We need to take into account any stack-frame rounding. */
14924 offsets
= arm_get_frame_offsets ();
14926 gcc_assert (!use_return_insn (FALSE
, NULL
)
14927 || (cfun
->machine
->return_used_this_function
!= 0)
14928 || offsets
->saved_regs
== offsets
->outgoing_args
14929 || frame_pointer_needed
);
14931 /* Reset the ARM-specific per-function variables. */
14932 after_arm_reorg
= 0;
14936 /* Generate and emit an insn that we will recognize as a push_multi.
14937 Unfortunately, since this insn does not reflect very well the actual
14938 semantics of the operation, we need to annotate the insn for the benefit
14939 of DWARF2 frame unwind information. */
14941 emit_multi_reg_push (unsigned long mask
)
14944 int num_dwarf_regs
;
14948 int dwarf_par_index
;
14951 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
14952 if (mask
& (1 << i
))
14955 gcc_assert (num_regs
&& num_regs
<= 16);
14957 /* We don't record the PC in the dwarf frame information. */
14958 num_dwarf_regs
= num_regs
;
14959 if (mask
& (1 << PC_REGNUM
))
14962 /* For the body of the insn we are going to generate an UNSPEC in
14963 parallel with several USEs. This allows the insn to be recognized
14964 by the push_multi pattern in the arm.md file.
14966 The body of the insn looks something like this:
14969 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14970 (const_int:SI <num>)))
14971 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14977 For the frame note however, we try to be more explicit and actually
14978 show each register being stored into the stack frame, plus a (single)
14979 decrement of the stack pointer. We do it this way in order to be
14980 friendly to the stack unwinding code, which only wants to see a single
14981 stack decrement per instruction. The RTL we generate for the note looks
14982 something like this:
14985 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14986 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14987 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14988 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14992 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14993 instead we'd have a parallel expression detailing all
14994 the stores to the various memory addresses so that debug
14995 information is more up-to-date. Remember however while writing
14996 this to take care of the constraints with the push instruction.
14998 Note also that this has to be taken care of for the VFP registers.
15000 For more see PR43399. */
15002 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
15003 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
15004 dwarf_par_index
= 1;
15006 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15008 if (mask
& (1 << i
))
15010 reg
= gen_rtx_REG (SImode
, i
);
15012 XVECEXP (par
, 0, 0)
15013 = gen_rtx_SET (VOIDmode
,
15016 gen_rtx_PRE_MODIFY (Pmode
,
15019 (stack_pointer_rtx
,
15022 gen_rtx_UNSPEC (BLKmode
,
15023 gen_rtvec (1, reg
),
15024 UNSPEC_PUSH_MULT
));
15026 if (i
!= PC_REGNUM
)
15028 tmp
= gen_rtx_SET (VOIDmode
,
15029 gen_frame_mem (SImode
, stack_pointer_rtx
),
15031 RTX_FRAME_RELATED_P (tmp
) = 1;
15032 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
15040 for (j
= 1, i
++; j
< num_regs
; i
++)
15042 if (mask
& (1 << i
))
15044 reg
= gen_rtx_REG (SImode
, i
);
15046 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
15048 if (i
!= PC_REGNUM
)
15051 = gen_rtx_SET (VOIDmode
,
15054 plus_constant (stack_pointer_rtx
,
15057 RTX_FRAME_RELATED_P (tmp
) = 1;
15058 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
15065 par
= emit_insn (par
);
15067 tmp
= gen_rtx_SET (VOIDmode
,
15069 plus_constant (stack_pointer_rtx
, -4 * num_regs
));
15070 RTX_FRAME_RELATED_P (tmp
) = 1;
15071 XVECEXP (dwarf
, 0, 0) = tmp
;
15073 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
15078 /* Calculate the size of the return value that is passed in registers. */
15080 arm_size_return_regs (void)
15082 enum machine_mode mode
;
15084 if (crtl
->return_rtx
!= 0)
15085 mode
= GET_MODE (crtl
->return_rtx
);
15087 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
15089 return GET_MODE_SIZE (mode
);
15093 emit_sfm (int base_reg
, int count
)
15100 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
15101 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
15103 reg
= gen_rtx_REG (XFmode
, base_reg
++);
15105 XVECEXP (par
, 0, 0)
15106 = gen_rtx_SET (VOIDmode
,
15109 gen_rtx_PRE_MODIFY (Pmode
,
15112 (stack_pointer_rtx
,
15115 gen_rtx_UNSPEC (BLKmode
,
15116 gen_rtvec (1, reg
),
15117 UNSPEC_PUSH_MULT
));
15118 tmp
= gen_rtx_SET (VOIDmode
,
15119 gen_frame_mem (XFmode
, stack_pointer_rtx
), reg
);
15120 RTX_FRAME_RELATED_P (tmp
) = 1;
15121 XVECEXP (dwarf
, 0, 1) = tmp
;
15123 for (i
= 1; i
< count
; i
++)
15125 reg
= gen_rtx_REG (XFmode
, base_reg
++);
15126 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
15128 tmp
= gen_rtx_SET (VOIDmode
,
15129 gen_frame_mem (XFmode
,
15130 plus_constant (stack_pointer_rtx
,
15133 RTX_FRAME_RELATED_P (tmp
) = 1;
15134 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
15137 tmp
= gen_rtx_SET (VOIDmode
,
15139 plus_constant (stack_pointer_rtx
, -12 * count
));
15141 RTX_FRAME_RELATED_P (tmp
) = 1;
15142 XVECEXP (dwarf
, 0, 0) = tmp
;
15144 par
= emit_insn (par
);
15145 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
15151 /* Return true if the current function needs to save/restore LR. */
15154 thumb_force_lr_save (void)
15156 return !cfun
->machine
->lr_save_eliminated
15157 && (!leaf_function_p ()
15158 || thumb_far_jump_used_p ()
15159 || df_regs_ever_live_p (LR_REGNUM
));
15163 /* Return true if r3 is used by any of the tail call insns in the
15164 current function. */
15167 any_sibcall_uses_r3 (void)
15172 if (!crtl
->tail_call_emit
)
15174 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
15175 if (e
->flags
& EDGE_SIBCALL
)
15177 rtx call
= BB_END (e
->src
);
15178 if (!CALL_P (call
))
15179 call
= prev_nonnote_nondebug_insn (call
);
15180 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
15181 if (find_regno_fusage (call
, USE
, 3))
15188 /* Compute the distance from register FROM to register TO.
15189 These can be the arg pointer (26), the soft frame pointer (25),
15190 the stack pointer (13) or the hard frame pointer (11).
15191 In thumb mode r7 is used as the soft frame pointer, if needed.
15192 Typical stack layout looks like this:
15194 old stack pointer -> | |
15197 | | saved arguments for
15198 | | vararg functions
15201 hard FP & arg pointer -> | | \
15209 soft frame pointer -> | | /
15214 locals base pointer -> | | /
15219 current stack pointer -> | | /
15222 For a given function some or all of these stack components
15223 may not be needed, giving rise to the possibility of
15224 eliminating some of the registers.
15226 The values returned by this function must reflect the behavior
15227 of arm_expand_prologue() and arm_compute_save_reg_mask().
15229 The sign of the number returned reflects the direction of stack
15230 growth, so the values are positive for all eliminations except
15231 from the soft frame pointer to the hard frame pointer.
15233 SFP may point just inside the local variables block to ensure correct
15237 /* Calculate stack offsets. These are used to calculate register elimination
15238 offsets and in prologue/epilogue code. Also calculates which registers
15239 should be saved. */
15241 static arm_stack_offsets
*
15242 arm_get_frame_offsets (void)
15244 struct arm_stack_offsets
*offsets
;
15245 unsigned long func_type
;
15249 HOST_WIDE_INT frame_size
;
15252 offsets
= &cfun
->machine
->stack_offsets
;
15254 /* We need to know if we are a leaf function. Unfortunately, it
15255 is possible to be called after start_sequence has been called,
15256 which causes get_insns to return the insns for the sequence,
15257 not the function, which will cause leaf_function_p to return
15258 the incorrect result.
15260 to know about leaf functions once reload has completed, and the
15261 frame size cannot be changed after that time, so we can safely
15262 use the cached value. */
15264 if (reload_completed
)
15267 /* Initially this is the size of the local variables. It will translated
15268 into an offset once we have determined the size of preceding data. */
15269 frame_size
= ROUND_UP_WORD (get_frame_size ());
15271 leaf
= leaf_function_p ();
15273 /* Space for variadic functions. */
15274 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
15276 /* In Thumb mode this is incorrect, but never used. */
15277 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
15278 arm_compute_static_chain_stack_bytes();
15282 unsigned int regno
;
15284 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
15285 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
15286 saved
= core_saved
;
15288 /* We know that SP will be doubleword aligned on entry, and we must
15289 preserve that condition at any subroutine call. We also require the
15290 soft frame pointer to be doubleword aligned. */
15292 if (TARGET_REALLY_IWMMXT
)
15294 /* Check for the call-saved iWMMXt registers. */
15295 for (regno
= FIRST_IWMMXT_REGNUM
;
15296 regno
<= LAST_IWMMXT_REGNUM
;
15298 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
15302 func_type
= arm_current_func_type ();
15303 if (! IS_VOLATILE (func_type
))
15305 /* Space for saved FPA registers. */
15306 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
15307 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
15310 /* Space for saved VFP registers. */
15311 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15312 saved
+= arm_get_vfp_saved_size ();
15315 else /* TARGET_THUMB1 */
15317 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
15318 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
15319 saved
= core_saved
;
15320 if (TARGET_BACKTRACE
)
15324 /* Saved registers include the stack frame. */
15325 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
15326 arm_compute_static_chain_stack_bytes();
15327 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
15328 /* A leaf function does not need any stack alignment if it has nothing
15330 if (leaf
&& frame_size
== 0
15331 /* However if it calls alloca(), we have a dynamically allocated
15332 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15333 && ! cfun
->calls_alloca
)
15335 offsets
->outgoing_args
= offsets
->soft_frame
;
15336 offsets
->locals_base
= offsets
->soft_frame
;
15340 /* Ensure SFP has the correct alignment. */
15341 if (ARM_DOUBLEWORD_ALIGN
15342 && (offsets
->soft_frame
& 7))
15344 offsets
->soft_frame
+= 4;
15345 /* Try to align stack by pushing an extra reg. Don't bother doing this
15346 when there is a stack frame as the alignment will be rolled into
15347 the normal stack adjustment. */
15348 if (frame_size
+ crtl
->outgoing_args_size
== 0)
15352 /* If it is safe to use r3, then do so. This sometimes
15353 generates better code on Thumb-2 by avoiding the need to
15354 use 32-bit push/pop instructions. */
15355 if (! any_sibcall_uses_r3 ()
15356 && arm_size_return_regs () <= 12
15357 && (offsets
->saved_regs_mask
& (1 << 3)) == 0)
15362 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
15364 if ((offsets
->saved_regs_mask
& (1 << i
)) == 0)
15373 offsets
->saved_regs
+= 4;
15374 offsets
->saved_regs_mask
|= (1 << reg
);
15379 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
15380 offsets
->outgoing_args
= (offsets
->locals_base
15381 + crtl
->outgoing_args_size
);
15383 if (ARM_DOUBLEWORD_ALIGN
)
15385 /* Ensure SP remains doubleword aligned. */
15386 if (offsets
->outgoing_args
& 7)
15387 offsets
->outgoing_args
+= 4;
15388 gcc_assert (!(offsets
->outgoing_args
& 7));
15395 /* Calculate the relative offsets for the different stack pointers. Positive
15396 offsets are in the direction of stack growth. */
15399 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
15401 arm_stack_offsets
*offsets
;
15403 offsets
= arm_get_frame_offsets ();
15405 /* OK, now we have enough information to compute the distances.
15406 There must be an entry in these switch tables for each pair
15407 of registers in ELIMINABLE_REGS, even if some of the entries
15408 seem to be redundant or useless. */
15411 case ARG_POINTER_REGNUM
:
15414 case THUMB_HARD_FRAME_POINTER_REGNUM
:
15417 case FRAME_POINTER_REGNUM
:
15418 /* This is the reverse of the soft frame pointer
15419 to hard frame pointer elimination below. */
15420 return offsets
->soft_frame
- offsets
->saved_args
;
15422 case ARM_HARD_FRAME_POINTER_REGNUM
:
15423 /* This is only non-zero in the case where the static chain register
15424 is stored above the frame. */
15425 return offsets
->frame
- offsets
->saved_args
- 4;
15427 case STACK_POINTER_REGNUM
:
15428 /* If nothing has been pushed on the stack at all
15429 then this will return -4. This *is* correct! */
15430 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
15433 gcc_unreachable ();
15435 gcc_unreachable ();
15437 case FRAME_POINTER_REGNUM
:
15440 case THUMB_HARD_FRAME_POINTER_REGNUM
:
15443 case ARM_HARD_FRAME_POINTER_REGNUM
:
15444 /* The hard frame pointer points to the top entry in the
15445 stack frame. The soft frame pointer to the bottom entry
15446 in the stack frame. If there is no stack frame at all,
15447 then they are identical. */
15449 return offsets
->frame
- offsets
->soft_frame
;
15451 case STACK_POINTER_REGNUM
:
15452 return offsets
->outgoing_args
- offsets
->soft_frame
;
15455 gcc_unreachable ();
15457 gcc_unreachable ();
15460 /* You cannot eliminate from the stack pointer.
15461 In theory you could eliminate from the hard frame
15462 pointer to the stack pointer, but this will never
15463 happen, since if a stack frame is not needed the
15464 hard frame pointer will never be used. */
15465 gcc_unreachable ();
15469 /* Given FROM and TO register numbers, say whether this elimination is
15470 allowed. Frame pointer elimination is automatically handled.
15472 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15473 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15474 pointer, we must eliminate FRAME_POINTER_REGNUM into
15475 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15476 ARG_POINTER_REGNUM. */
15479 arm_can_eliminate (const int from
, const int to
)
15481 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
15482 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
15483 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
15484 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
15488 /* Emit RTL to save coprocessor registers on function entry. Returns the
15489 number of bytes pushed. */
15492 arm_save_coproc_regs(void)
15494 int saved_size
= 0;
15496 unsigned start_reg
;
15499 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
15500 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
15502 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
15503 insn
= gen_rtx_MEM (V2SImode
, insn
);
15504 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
15505 RTX_FRAME_RELATED_P (insn
) = 1;
15509 /* Save any floating point call-saved registers used by this
15511 if (TARGET_FPA_EMU2
)
15513 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
15514 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15516 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
15517 insn
= gen_rtx_MEM (XFmode
, insn
);
15518 insn
= emit_set_insn (insn
, gen_rtx_REG (XFmode
, reg
));
15519 RTX_FRAME_RELATED_P (insn
) = 1;
15525 start_reg
= LAST_FPA_REGNUM
;
15527 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
15529 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15531 if (start_reg
- reg
== 3)
15533 insn
= emit_sfm (reg
, 4);
15534 RTX_FRAME_RELATED_P (insn
) = 1;
15536 start_reg
= reg
- 1;
15541 if (start_reg
!= reg
)
15543 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
15544 RTX_FRAME_RELATED_P (insn
) = 1;
15545 saved_size
+= (start_reg
- reg
) * 12;
15547 start_reg
= reg
- 1;
15551 if (start_reg
!= reg
)
15553 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
15554 saved_size
+= (start_reg
- reg
) * 12;
15555 RTX_FRAME_RELATED_P (insn
) = 1;
15558 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15560 start_reg
= FIRST_VFP_REGNUM
;
15562 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
15564 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
15565 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
15567 if (start_reg
!= reg
)
15568 saved_size
+= vfp_emit_fstmd (start_reg
,
15569 (reg
- start_reg
) / 2);
15570 start_reg
= reg
+ 2;
15573 if (start_reg
!= reg
)
15574 saved_size
+= vfp_emit_fstmd (start_reg
,
15575 (reg
- start_reg
) / 2);
15581 /* Set the Thumb frame pointer from the stack pointer. */
15584 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
15586 HOST_WIDE_INT amount
;
15589 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
15591 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15592 stack_pointer_rtx
, GEN_INT (amount
)));
15595 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
15596 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15597 expects the first two operands to be the same. */
15600 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15602 hard_frame_pointer_rtx
));
15606 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15607 hard_frame_pointer_rtx
,
15608 stack_pointer_rtx
));
15610 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
15611 plus_constant (stack_pointer_rtx
, amount
));
15612 RTX_FRAME_RELATED_P (dwarf
) = 1;
15613 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15616 RTX_FRAME_RELATED_P (insn
) = 1;
15619 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15622 arm_expand_prologue (void)
15627 unsigned long live_regs_mask
;
15628 unsigned long func_type
;
15630 int saved_pretend_args
= 0;
15631 int saved_regs
= 0;
15632 unsigned HOST_WIDE_INT args_to_push
;
15633 arm_stack_offsets
*offsets
;
15635 func_type
= arm_current_func_type ();
15637 /* Naked functions don't have prologues. */
15638 if (IS_NAKED (func_type
))
15641 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15642 args_to_push
= crtl
->args
.pretend_args_size
;
15644 /* Compute which register we will have to save onto the stack. */
15645 offsets
= arm_get_frame_offsets ();
15646 live_regs_mask
= offsets
->saved_regs_mask
;
15648 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
15650 if (IS_STACKALIGN (func_type
))
15655 /* Handle a word-aligned stack pointer. We generate the following:
15660 <save and restore r0 in normal prologue/epilogue>
15664 The unwinder doesn't need to know about the stack realignment.
15665 Just tell it we saved SP in r0. */
15666 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
15668 r0
= gen_rtx_REG (SImode
, 0);
15669 r1
= gen_rtx_REG (SImode
, 1);
15670 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15671 compiler won't choke. */
15672 dwarf
= gen_rtx_UNSPEC (SImode
, rtvec_alloc (0), UNSPEC_STACK_ALIGN
);
15673 dwarf
= gen_rtx_SET (VOIDmode
, r0
, dwarf
);
15674 insn
= gen_movsi (r0
, stack_pointer_rtx
);
15675 RTX_FRAME_RELATED_P (insn
) = 1;
15676 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15678 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
15679 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
15682 /* For APCS frames, if IP register is clobbered
15683 when creating frame, save that register in a special
15685 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
15687 if (IS_INTERRUPT (func_type
))
15689 /* Interrupt functions must not corrupt any registers.
15690 Creating a frame pointer however, corrupts the IP
15691 register, so we must push it first. */
15692 insn
= emit_multi_reg_push (1 << IP_REGNUM
);
15694 /* Do not set RTX_FRAME_RELATED_P on this insn.
15695 The dwarf stack unwinding code only wants to see one
15696 stack decrement per function, and this is not it. If
15697 this instruction is labeled as being part of the frame
15698 creation sequence then dwarf2out_frame_debug_expr will
15699 die when it encounters the assignment of IP to FP
15700 later on, since the use of SP here establishes SP as
15701 the CFA register and not IP.
15703 Anyway this instruction is not really part of the stack
15704 frame creation although it is part of the prologue. */
15706 else if (IS_NESTED (func_type
))
15708 /* The Static chain register is the same as the IP register
15709 used as a scratch register during stack frame creation.
15710 To get around this need to find somewhere to store IP
15711 whilst the frame is being created. We try the following
15714 1. The last argument register.
15715 2. A slot on the stack above the frame. (This only
15716 works if the function is not a varargs function).
15717 3. Register r3, after pushing the argument registers
15720 Note - we only need to tell the dwarf2 backend about the SP
15721 adjustment in the second variant; the static chain register
15722 doesn't need to be unwound, as it doesn't contain a value
15723 inherited from the caller. */
15725 if (df_regs_ever_live_p (3) == false)
15726 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
15727 else if (args_to_push
== 0)
15731 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15734 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
15735 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
15738 /* Just tell the dwarf backend that we adjusted SP. */
15739 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
15740 plus_constant (stack_pointer_rtx
,
15742 RTX_FRAME_RELATED_P (insn
) = 1;
15743 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15747 /* Store the args on the stack. */
15748 if (cfun
->machine
->uses_anonymous_args
)
15749 insn
= emit_multi_reg_push
15750 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
15753 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15754 GEN_INT (- args_to_push
)));
15756 RTX_FRAME_RELATED_P (insn
) = 1;
15758 saved_pretend_args
= 1;
15759 fp_offset
= args_to_push
;
15762 /* Now reuse r3 to preserve IP. */
15763 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
15767 insn
= emit_set_insn (ip_rtx
,
15768 plus_constant (stack_pointer_rtx
, fp_offset
));
15769 RTX_FRAME_RELATED_P (insn
) = 1;
15774 /* Push the argument registers, or reserve space for them. */
15775 if (cfun
->machine
->uses_anonymous_args
)
15776 insn
= emit_multi_reg_push
15777 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
15780 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15781 GEN_INT (- args_to_push
)));
15782 RTX_FRAME_RELATED_P (insn
) = 1;
15785 /* If this is an interrupt service routine, and the link register
15786 is going to be pushed, and we're not generating extra
15787 push of IP (needed when frame is needed and frame layout if apcs),
15788 subtracting four from LR now will mean that the function return
15789 can be done with a single instruction. */
15790 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
15791 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
15792 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
15795 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
15797 emit_set_insn (lr
, plus_constant (lr
, -4));
15800 if (live_regs_mask
)
15802 saved_regs
+= bit_count (live_regs_mask
) * 4;
15803 if (optimize_size
&& !frame_pointer_needed
15804 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
15806 /* If no coprocessor registers are being pushed and we don't have
15807 to worry about a frame pointer then push extra registers to
15808 create the stack frame. This is done is a way that does not
15809 alter the frame layout, so is independent of the epilogue. */
15813 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
15815 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
15816 if (frame
&& n
* 4 >= frame
)
15819 live_regs_mask
|= (1 << n
) - 1;
15820 saved_regs
+= frame
;
15823 insn
= emit_multi_reg_push (live_regs_mask
);
15824 RTX_FRAME_RELATED_P (insn
) = 1;
15827 if (! IS_VOLATILE (func_type
))
15828 saved_regs
+= arm_save_coproc_regs ();
15830 if (frame_pointer_needed
&& TARGET_ARM
)
15832 /* Create the new frame pointer. */
15833 if (TARGET_APCS_FRAME
)
15835 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
15836 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
15837 RTX_FRAME_RELATED_P (insn
) = 1;
15839 if (IS_NESTED (func_type
))
15841 /* Recover the static chain register. */
15842 if (!df_regs_ever_live_p (3)
15843 || saved_pretend_args
)
15844 insn
= gen_rtx_REG (SImode
, 3);
15845 else /* if (crtl->args.pretend_args_size == 0) */
15847 insn
= plus_constant (hard_frame_pointer_rtx
, 4);
15848 insn
= gen_frame_mem (SImode
, insn
);
15850 emit_set_insn (ip_rtx
, insn
);
15851 /* Add a USE to stop propagate_one_insn() from barfing. */
15852 emit_insn (gen_prologue_use (ip_rtx
));
15857 insn
= GEN_INT (saved_regs
- 4);
15858 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15859 stack_pointer_rtx
, insn
));
15860 RTX_FRAME_RELATED_P (insn
) = 1;
15864 if (flag_stack_usage_info
)
15865 current_function_static_stack_size
15866 = offsets
->outgoing_args
- offsets
->saved_args
;
15868 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
15870 /* This add can produce multiple insns for a large constant, so we
15871 need to get tricky. */
15872 rtx last
= get_last_insn ();
15874 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
15875 - offsets
->outgoing_args
);
15877 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15881 last
= last
? NEXT_INSN (last
) : get_insns ();
15882 RTX_FRAME_RELATED_P (last
) = 1;
15884 while (last
!= insn
);
15886 /* If the frame pointer is needed, emit a special barrier that
15887 will prevent the scheduler from moving stores to the frame
15888 before the stack adjustment. */
15889 if (frame_pointer_needed
)
15890 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
15891 hard_frame_pointer_rtx
));
15895 if (frame_pointer_needed
&& TARGET_THUMB2
)
15896 thumb_set_frame_pointer (offsets
);
15898 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
15900 unsigned long mask
;
15902 mask
= live_regs_mask
;
15903 mask
&= THUMB2_WORK_REGS
;
15904 if (!IS_NESTED (func_type
))
15905 mask
|= (1 << IP_REGNUM
);
15906 arm_load_pic_register (mask
);
15909 /* If we are profiling, make sure no instructions are scheduled before
15910 the call to mcount. Similarly if the user has requested no
15911 scheduling in the prolog. Similarly if we want non-call exceptions
15912 using the EABI unwinder, to prevent faulting instructions from being
15913 swapped with a stack adjustment. */
15914 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
15915 || (arm_except_unwind_info (&global_options
) == UI_TARGET
15916 && cfun
->can_throw_non_call_exceptions
))
15917 emit_insn (gen_blockage ());
15919 /* If the link register is being kept alive, with the return address in it,
15920 then make sure that it does not get reused by the ce2 pass. */
15921 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
15922 cfun
->machine
->lr_save_eliminated
= 1;
15925 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15927 arm_print_condition (FILE *stream
)
15929 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
15931 /* Branch conversion is not implemented for Thumb-2. */
15934 output_operand_lossage ("predicated Thumb instruction");
15937 if (current_insn_predicate
!= NULL
)
15939 output_operand_lossage
15940 ("predicated instruction in conditional sequence");
15944 fputs (arm_condition_codes
[arm_current_cc
], stream
);
15946 else if (current_insn_predicate
)
15948 enum arm_cond_code code
;
15952 output_operand_lossage ("predicated Thumb instruction");
15956 code
= get_arm_condition_code (current_insn_predicate
);
15957 fputs (arm_condition_codes
[code
], stream
);
15962 /* If CODE is 'd', then the X is a condition operand and the instruction
15963 should only be executed if the condition is true.
15964 if CODE is 'D', then the X is a condition operand and the instruction
15965 should only be executed if the condition is false: however, if the mode
15966 of the comparison is CCFPEmode, then always execute the instruction -- we
15967 do this because in these circumstances !GE does not necessarily imply LT;
15968 in these cases the instruction pattern will take care to make sure that
15969 an instruction containing %d will follow, thereby undoing the effects of
15970 doing this instruction unconditionally.
15971 If CODE is 'N' then X is a floating point operand that must be negated
15973 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15974 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15976 arm_print_operand (FILE *stream
, rtx x
, int code
)
15981 fputs (ASM_COMMENT_START
, stream
);
15985 fputs (user_label_prefix
, stream
);
15989 fputs (REGISTER_PREFIX
, stream
);
15993 arm_print_condition (stream
);
15997 /* Nothing in unified syntax, otherwise the current condition code. */
15998 if (!TARGET_UNIFIED_ASM
)
15999 arm_print_condition (stream
);
16003 /* The current condition code in unified syntax, otherwise nothing. */
16004 if (TARGET_UNIFIED_ASM
)
16005 arm_print_condition (stream
);
16009 /* The current condition code for a condition code setting instruction.
16010 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16011 if (TARGET_UNIFIED_ASM
)
16013 fputc('s', stream
);
16014 arm_print_condition (stream
);
16018 arm_print_condition (stream
);
16019 fputc('s', stream
);
16024 /* If the instruction is conditionally executed then print
16025 the current condition code, otherwise print 's'. */
16026 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
16027 if (current_insn_predicate
)
16028 arm_print_condition (stream
);
16030 fputc('s', stream
);
16033 /* %# is a "break" sequence. It doesn't output anything, but is used to
16034 separate e.g. operand numbers from following text, if that text consists
16035 of further digits which we don't want to be part of the operand
16043 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
16044 r
= real_value_negate (&r
);
16045 fprintf (stream
, "%s", fp_const_from_val (&r
));
16049 /* An integer or symbol address without a preceding # sign. */
16051 switch (GET_CODE (x
))
16054 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
16058 output_addr_const (stream
, x
);
16062 gcc_unreachable ();
16067 if (GET_CODE (x
) == CONST_INT
)
16070 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
16071 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
16075 putc ('~', stream
);
16076 output_addr_const (stream
, x
);
16081 /* The low 16 bits of an immediate constant. */
16082 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
16086 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
16089 /* Truncate Cirrus shift counts. */
16091 if (GET_CODE (x
) == CONST_INT
)
16093 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 0x3f);
16096 arm_print_operand (stream
, x
, 0);
16100 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
16108 if (!shift_operator (x
, SImode
))
16110 output_operand_lossage ("invalid shift operand");
16114 shift
= shift_op (x
, &val
);
16118 fprintf (stream
, ", %s ", shift
);
16120 arm_print_operand (stream
, XEXP (x
, 1), 0);
16122 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
16127 /* An explanation of the 'Q', 'R' and 'H' register operands:
16129 In a pair of registers containing a DI or DF value the 'Q'
16130 operand returns the register number of the register containing
16131 the least significant part of the value. The 'R' operand returns
16132 the register number of the register containing the most
16133 significant part of the value.
16135 The 'H' operand returns the higher of the two register numbers.
16136 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16137 same as the 'Q' operand, since the most significant part of the
16138 value is held in the lower number register. The reverse is true
16139 on systems where WORDS_BIG_ENDIAN is false.
16141 The purpose of these operands is to distinguish between cases
16142 where the endian-ness of the values is important (for example
16143 when they are added together), and cases where the endian-ness
16144 is irrelevant, but the order of register operations is important.
16145 For example when loading a value from memory into a register
16146 pair, the endian-ness does not matter. Provided that the value
16147 from the lower memory address is put into the lower numbered
16148 register, and the value from the higher address is put into the
16149 higher numbered register, the load will work regardless of whether
16150 the value being loaded is big-wordian or little-wordian. The
16151 order of the two register loads can matter however, if the address
16152 of the memory location is actually held in one of the registers
16153 being overwritten by the load.
16155 The 'Q' and 'R' constraints are also available for 64-bit
16158 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
16160 rtx part
= gen_lowpart (SImode
, x
);
16161 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
16165 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16167 output_operand_lossage ("invalid operand for code '%c'", code
);
16171 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
16175 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
16177 enum machine_mode mode
= GET_MODE (x
);
16180 if (mode
== VOIDmode
)
16182 part
= gen_highpart_mode (SImode
, mode
, x
);
16183 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
16187 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16189 output_operand_lossage ("invalid operand for code '%c'", code
);
16193 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
16197 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16199 output_operand_lossage ("invalid operand for code '%c'", code
);
16203 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
16207 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16209 output_operand_lossage ("invalid operand for code '%c'", code
);
16213 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
16217 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16219 output_operand_lossage ("invalid operand for code '%c'", code
);
16223 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
16227 asm_fprintf (stream
, "%r",
16228 GET_CODE (XEXP (x
, 0)) == REG
16229 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
16233 asm_fprintf (stream
, "{%r-%r}",
16235 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
16238 /* Like 'M', but writing doubleword vector registers, for use by Neon
16242 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
16243 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
16245 asm_fprintf (stream
, "{d%d}", regno
);
16247 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
16252 /* CONST_TRUE_RTX means always -- that's the default. */
16253 if (x
== const_true_rtx
)
16256 if (!COMPARISON_P (x
))
16258 output_operand_lossage ("invalid operand for code '%c'", code
);
16262 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
16267 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16268 want to do that. */
16269 if (x
== const_true_rtx
)
16271 output_operand_lossage ("instruction never executed");
16274 if (!COMPARISON_P (x
))
16276 output_operand_lossage ("invalid operand for code '%c'", code
);
16280 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
16281 (get_arm_condition_code (x
))],
16285 /* Cirrus registers can be accessed in a variety of ways:
16286 single floating point (f)
16287 double floating point (d)
16289 64bit integer (dx). */
16290 case 'W': /* Cirrus register in F mode. */
16291 case 'X': /* Cirrus register in D mode. */
16292 case 'Y': /* Cirrus register in FX mode. */
16293 case 'Z': /* Cirrus register in DX mode. */
16294 gcc_assert (GET_CODE (x
) == REG
16295 && REGNO_REG_CLASS (REGNO (x
)) == CIRRUS_REGS
);
16297 fprintf (stream
, "mv%s%s",
16299 : code
== 'X' ? "d"
16300 : code
== 'Y' ? "fx" : "dx", reg_names
[REGNO (x
)] + 2);
16304 /* Print cirrus register in the mode specified by the register's mode. */
16307 int mode
= GET_MODE (x
);
16309 if (GET_CODE (x
) != REG
|| REGNO_REG_CLASS (REGNO (x
)) != CIRRUS_REGS
)
16311 output_operand_lossage ("invalid operand for code '%c'", code
);
16315 fprintf (stream
, "mv%s%s",
16316 mode
== DFmode
? "d"
16317 : mode
== SImode
? "fx"
16318 : mode
== DImode
? "dx"
16319 : "f", reg_names
[REGNO (x
)] + 2);
16325 if (GET_CODE (x
) != REG
16326 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
16327 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
16328 /* Bad value for wCG register number. */
16330 output_operand_lossage ("invalid operand for code '%c'", code
);
16335 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
16338 /* Print an iWMMXt control register name. */
16340 if (GET_CODE (x
) != CONST_INT
16342 || INTVAL (x
) >= 16)
16343 /* Bad value for wC register number. */
16345 output_operand_lossage ("invalid operand for code '%c'", code
);
16351 static const char * wc_reg_names
[16] =
16353 "wCID", "wCon", "wCSSF", "wCASF",
16354 "wC4", "wC5", "wC6", "wC7",
16355 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16356 "wC12", "wC13", "wC14", "wC15"
16359 fprintf (stream
, wc_reg_names
[INTVAL (x
)]);
16363 /* Print the high single-precision register of a VFP double-precision
16367 int mode
= GET_MODE (x
);
16370 if (GET_MODE_SIZE (mode
) != 8 || GET_CODE (x
) != REG
)
16372 output_operand_lossage ("invalid operand for code '%c'", code
);
16377 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
16379 output_operand_lossage ("invalid operand for code '%c'", code
);
16383 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
16387 /* Print a VFP/Neon double precision or quad precision register name. */
16391 int mode
= GET_MODE (x
);
16392 int is_quad
= (code
== 'q');
16395 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
16397 output_operand_lossage ("invalid operand for code '%c'", code
);
16401 if (GET_CODE (x
) != REG
16402 || !IS_VFP_REGNUM (REGNO (x
)))
16404 output_operand_lossage ("invalid operand for code '%c'", code
);
16409 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
16410 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
16412 output_operand_lossage ("invalid operand for code '%c'", code
);
16416 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
16417 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
16421 /* These two codes print the low/high doubleword register of a Neon quad
16422 register, respectively. For pair-structure types, can also print
16423 low/high quadword registers. */
16427 int mode
= GET_MODE (x
);
16430 if ((GET_MODE_SIZE (mode
) != 16
16431 && GET_MODE_SIZE (mode
) != 32) || GET_CODE (x
) != REG
)
16433 output_operand_lossage ("invalid operand for code '%c'", code
);
16438 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
16440 output_operand_lossage ("invalid operand for code '%c'", code
);
16444 if (GET_MODE_SIZE (mode
) == 16)
16445 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
16446 + (code
== 'f' ? 1 : 0));
16448 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
16449 + (code
== 'f' ? 1 : 0));
16453 /* Print a VFPv3 floating-point constant, represented as an integer
16457 int index
= vfp3_const_double_index (x
);
16458 gcc_assert (index
!= -1);
16459 fprintf (stream
, "%d", index
);
16463 /* Print bits representing opcode features for Neon.
16465 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16466 and polynomials as unsigned.
16468 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16470 Bit 2 is 1 for rounding functions, 0 otherwise. */
16472 /* Identify the type as 's', 'u', 'p' or 'f'. */
16475 HOST_WIDE_INT bits
= INTVAL (x
);
16476 fputc ("uspf"[bits
& 3], stream
);
16480 /* Likewise, but signed and unsigned integers are both 'i'. */
16483 HOST_WIDE_INT bits
= INTVAL (x
);
16484 fputc ("iipf"[bits
& 3], stream
);
16488 /* As for 'T', but emit 'u' instead of 'p'. */
16491 HOST_WIDE_INT bits
= INTVAL (x
);
16492 fputc ("usuf"[bits
& 3], stream
);
16496 /* Bit 2: rounding (vs none). */
16499 HOST_WIDE_INT bits
= INTVAL (x
);
16500 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
16504 /* Memory operand for vld1/vst1 instruction. */
16508 bool postinc
= FALSE
;
16509 unsigned align
, memsize
, align_bits
;
16511 gcc_assert (GET_CODE (x
) == MEM
);
16512 addr
= XEXP (x
, 0);
16513 if (GET_CODE (addr
) == POST_INC
)
16516 addr
= XEXP (addr
, 0);
16518 asm_fprintf (stream
, "[%r", REGNO (addr
));
16520 /* We know the alignment of this access, so we can emit a hint in the
16521 instruction (for some alignments) as an aid to the memory subsystem
16523 align
= MEM_ALIGN (x
) >> 3;
16524 memsize
= INTVAL (MEM_SIZE (x
));
16526 /* Only certain alignment specifiers are supported by the hardware. */
16527 if (memsize
== 16 && (align
% 32) == 0)
16529 else if ((memsize
== 8 || memsize
== 16) && (align
% 16) == 0)
16531 else if ((align
% 8) == 0)
16536 if (align_bits
!= 0)
16537 asm_fprintf (stream
, ":%d", align_bits
);
16539 asm_fprintf (stream
, "]");
16542 fputs("!", stream
);
16550 gcc_assert (GET_CODE (x
) == MEM
);
16551 addr
= XEXP (x
, 0);
16552 gcc_assert (GET_CODE (addr
) == REG
);
16553 asm_fprintf (stream
, "[%r]", REGNO (addr
));
16557 /* Translate an S register number into a D register number and element index. */
16560 int mode
= GET_MODE (x
);
16563 if (GET_MODE_SIZE (mode
) != 4 || GET_CODE (x
) != REG
)
16565 output_operand_lossage ("invalid operand for code '%c'", code
);
16570 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
16572 output_operand_lossage ("invalid operand for code '%c'", code
);
16576 regno
= regno
- FIRST_VFP_REGNUM
;
16577 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
16581 /* Register specifier for vld1.16/vst1.16. Translate the S register
16582 number into a D register number and element index. */
16585 int mode
= GET_MODE (x
);
16588 if (GET_MODE_SIZE (mode
) != 2 || GET_CODE (x
) != REG
)
16590 output_operand_lossage ("invalid operand for code '%c'", code
);
16595 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
16597 output_operand_lossage ("invalid operand for code '%c'", code
);
16601 regno
= regno
- FIRST_VFP_REGNUM
;
16602 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
16609 output_operand_lossage ("missing operand");
16613 switch (GET_CODE (x
))
16616 asm_fprintf (stream
, "%r", REGNO (x
));
16620 output_memory_reference_mode
= GET_MODE (x
);
16621 output_address (XEXP (x
, 0));
16628 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
16629 sizeof (fpstr
), 0, 1);
16630 fprintf (stream
, "#%s", fpstr
);
16633 fprintf (stream
, "#%s", fp_immediate_constant (x
));
16637 gcc_assert (GET_CODE (x
) != NEG
);
16638 fputc ('#', stream
);
16639 if (GET_CODE (x
) == HIGH
)
16641 fputs (":lower16:", stream
);
16645 output_addr_const (stream
, x
);
16651 /* Target hook for printing a memory address. */
16653 arm_print_operand_address (FILE *stream
, rtx x
)
16657 int is_minus
= GET_CODE (x
) == MINUS
;
16659 if (GET_CODE (x
) == REG
)
16660 asm_fprintf (stream
, "[%r, #0]", REGNO (x
));
16661 else if (GET_CODE (x
) == PLUS
|| is_minus
)
16663 rtx base
= XEXP (x
, 0);
16664 rtx index
= XEXP (x
, 1);
16665 HOST_WIDE_INT offset
= 0;
16666 if (GET_CODE (base
) != REG
16667 || (GET_CODE (index
) == REG
&& REGNO (index
) == SP_REGNUM
))
16669 /* Ensure that BASE is a register. */
16670 /* (one of them must be). */
16671 /* Also ensure the SP is not used as in index register. */
16676 switch (GET_CODE (index
))
16679 offset
= INTVAL (index
);
16682 asm_fprintf (stream
, "[%r, #%wd]",
16683 REGNO (base
), offset
);
16687 asm_fprintf (stream
, "[%r, %s%r]",
16688 REGNO (base
), is_minus
? "-" : "",
16698 asm_fprintf (stream
, "[%r, %s%r",
16699 REGNO (base
), is_minus
? "-" : "",
16700 REGNO (XEXP (index
, 0)));
16701 arm_print_operand (stream
, index
, 'S');
16702 fputs ("]", stream
);
16707 gcc_unreachable ();
16710 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
16711 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
16713 extern enum machine_mode output_memory_reference_mode
;
16715 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
16717 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
16718 asm_fprintf (stream
, "[%r, #%s%d]!",
16719 REGNO (XEXP (x
, 0)),
16720 GET_CODE (x
) == PRE_DEC
? "-" : "",
16721 GET_MODE_SIZE (output_memory_reference_mode
));
16723 asm_fprintf (stream
, "[%r], #%s%d",
16724 REGNO (XEXP (x
, 0)),
16725 GET_CODE (x
) == POST_DEC
? "-" : "",
16726 GET_MODE_SIZE (output_memory_reference_mode
));
16728 else if (GET_CODE (x
) == PRE_MODIFY
)
16730 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
16731 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
16732 asm_fprintf (stream
, "#%wd]!",
16733 INTVAL (XEXP (XEXP (x
, 1), 1)));
16735 asm_fprintf (stream
, "%r]!",
16736 REGNO (XEXP (XEXP (x
, 1), 1)));
16738 else if (GET_CODE (x
) == POST_MODIFY
)
16740 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
16741 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
16742 asm_fprintf (stream
, "#%wd",
16743 INTVAL (XEXP (XEXP (x
, 1), 1)));
16745 asm_fprintf (stream
, "%r",
16746 REGNO (XEXP (XEXP (x
, 1), 1)));
16748 else output_addr_const (stream
, x
);
16752 if (GET_CODE (x
) == REG
)
16753 asm_fprintf (stream
, "[%r]", REGNO (x
));
16754 else if (GET_CODE (x
) == POST_INC
)
16755 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
16756 else if (GET_CODE (x
) == PLUS
)
16758 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
16759 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
16760 asm_fprintf (stream
, "[%r, #%wd]",
16761 REGNO (XEXP (x
, 0)),
16762 INTVAL (XEXP (x
, 1)));
16764 asm_fprintf (stream
, "[%r, %r]",
16765 REGNO (XEXP (x
, 0)),
16766 REGNO (XEXP (x
, 1)));
16769 output_addr_const (stream
, x
);
16773 /* Target hook for indicating whether a punctuation character for
16774 TARGET_PRINT_OPERAND is valid. */
16776 arm_print_operand_punct_valid_p (unsigned char code
)
16778 return (code
== '@' || code
== '|' || code
== '.'
16779 || code
== '(' || code
== ')' || code
== '#'
16780 || (TARGET_32BIT
&& (code
== '?'))
16781 || (TARGET_THUMB2
&& (code
== '!'))
16782 || (TARGET_THUMB
&& (code
== '_')));
16785 /* Target hook for assembling integer objects. The ARM version needs to
16786 handle word-sized values specially. */
16788 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
16790 enum machine_mode mode
;
16792 if (size
== UNITS_PER_WORD
&& aligned_p
)
16794 fputs ("\t.word\t", asm_out_file
);
16795 output_addr_const (asm_out_file
, x
);
16797 /* Mark symbols as position independent. We only do this in the
16798 .text segment, not in the .data segment. */
16799 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
16800 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
16802 /* See legitimize_pic_address for an explanation of the
16803 TARGET_VXWORKS_RTP check. */
16804 if (TARGET_VXWORKS_RTP
16805 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
16806 fputs ("(GOT)", asm_out_file
);
16808 fputs ("(GOTOFF)", asm_out_file
);
16810 fputc ('\n', asm_out_file
);
16814 mode
= GET_MODE (x
);
16816 if (arm_vector_mode_supported_p (mode
))
16820 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
16822 units
= CONST_VECTOR_NUNITS (x
);
16823 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
16825 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16826 for (i
= 0; i
< units
; i
++)
16828 rtx elt
= CONST_VECTOR_ELT (x
, i
);
16830 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
16833 for (i
= 0; i
< units
; i
++)
16835 rtx elt
= CONST_VECTOR_ELT (x
, i
);
16836 REAL_VALUE_TYPE rval
;
16838 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
16841 (rval
, GET_MODE_INNER (mode
),
16842 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
16848 return default_assemble_integer (x
, size
, aligned_p
);
16852 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
16856 if (!TARGET_AAPCS_BASED
)
16859 default_named_section_asm_out_constructor
16860 : default_named_section_asm_out_destructor
) (symbol
, priority
);
16864 /* Put these in the .init_array section, using a special relocation. */
16865 if (priority
!= DEFAULT_INIT_PRIORITY
)
16868 sprintf (buf
, "%s.%.5u",
16869 is_ctor
? ".init_array" : ".fini_array",
16871 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
16878 switch_to_section (s
);
16879 assemble_align (POINTER_SIZE
);
16880 fputs ("\t.word\t", asm_out_file
);
16881 output_addr_const (asm_out_file
, symbol
);
16882 fputs ("(target1)\n", asm_out_file
);
16885 /* Add a function to the list of static constructors. */
16888 arm_elf_asm_constructor (rtx symbol
, int priority
)
16890 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
16893 /* Add a function to the list of static destructors. */
16896 arm_elf_asm_destructor (rtx symbol
, int priority
)
16898 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
16901 /* A finite state machine takes care of noticing whether or not instructions
16902 can be conditionally executed, and thus decrease execution time and code
16903 size by deleting branch instructions. The fsm is controlled by
16904 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16906 /* The state of the fsm controlling condition codes are:
16907 0: normal, do nothing special
16908 1: make ASM_OUTPUT_OPCODE not output this instruction
16909 2: make ASM_OUTPUT_OPCODE not output this instruction
16910 3: make instructions conditional
16911 4: make instructions conditional
16913 State transitions (state->state by whom under condition):
16914 0 -> 1 final_prescan_insn if the `target' is a label
16915 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16916 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16917 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16918 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16919 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16920 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16921 (the target insn is arm_target_insn).
16923 If the jump clobbers the conditions then we use states 2 and 4.
16925 A similar thing can be done with conditional return insns.
16927 XXX In case the `target' is an unconditional branch, this conditionalising
16928 of the instructions always reduces code size, but not always execution
16929 time. But then, I want to reduce the code size to somewhere near what
16930 /bin/cc produces. */
16932 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16933 instructions. When a COND_EXEC instruction is seen the subsequent
16934 instructions are scanned so that multiple conditional instructions can be
16935 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16936 specify the length and true/false mask for the IT block. These will be
16937 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16939 /* Returns the index of the ARM condition code string in
16940 `arm_condition_codes'. COMPARISON should be an rtx like
16941 `(eq (...) (...))'. */
16942 static enum arm_cond_code
16943 get_arm_condition_code (rtx comparison
)
16945 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
16946 enum arm_cond_code code
;
16947 enum rtx_code comp_code
= GET_CODE (comparison
);
16949 if (GET_MODE_CLASS (mode
) != MODE_CC
)
16950 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
16951 XEXP (comparison
, 1));
16955 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
16956 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
16957 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
16958 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
16959 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
16960 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
16961 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
16962 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
16963 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
16964 case CC_DLTUmode
: code
= ARM_CC
;
16967 gcc_assert (comp_code
== EQ
|| comp_code
== NE
);
16969 if (comp_code
== EQ
)
16970 return ARM_INVERSE_CONDITION_CODE (code
);
16976 case NE
: return ARM_NE
;
16977 case EQ
: return ARM_EQ
;
16978 case GE
: return ARM_PL
;
16979 case LT
: return ARM_MI
;
16980 default: gcc_unreachable ();
16986 case NE
: return ARM_NE
;
16987 case EQ
: return ARM_EQ
;
16988 default: gcc_unreachable ();
16994 case NE
: return ARM_MI
;
16995 case EQ
: return ARM_PL
;
16996 default: gcc_unreachable ();
17001 /* These encodings assume that AC=1 in the FPA system control
17002 byte. This allows us to handle all cases except UNEQ and
17006 case GE
: return ARM_GE
;
17007 case GT
: return ARM_GT
;
17008 case LE
: return ARM_LS
;
17009 case LT
: return ARM_MI
;
17010 case NE
: return ARM_NE
;
17011 case EQ
: return ARM_EQ
;
17012 case ORDERED
: return ARM_VC
;
17013 case UNORDERED
: return ARM_VS
;
17014 case UNLT
: return ARM_LT
;
17015 case UNLE
: return ARM_LE
;
17016 case UNGT
: return ARM_HI
;
17017 case UNGE
: return ARM_PL
;
17018 /* UNEQ and LTGT do not have a representation. */
17019 case UNEQ
: /* Fall through. */
17020 case LTGT
: /* Fall through. */
17021 default: gcc_unreachable ();
17027 case NE
: return ARM_NE
;
17028 case EQ
: return ARM_EQ
;
17029 case GE
: return ARM_LE
;
17030 case GT
: return ARM_LT
;
17031 case LE
: return ARM_GE
;
17032 case LT
: return ARM_GT
;
17033 case GEU
: return ARM_LS
;
17034 case GTU
: return ARM_CC
;
17035 case LEU
: return ARM_CS
;
17036 case LTU
: return ARM_HI
;
17037 default: gcc_unreachable ();
17043 case LTU
: return ARM_CS
;
17044 case GEU
: return ARM_CC
;
17045 default: gcc_unreachable ();
17051 case NE
: return ARM_NE
;
17052 case EQ
: return ARM_EQ
;
17053 case GEU
: return ARM_CS
;
17054 case GTU
: return ARM_HI
;
17055 case LEU
: return ARM_LS
;
17056 case LTU
: return ARM_CC
;
17057 default: gcc_unreachable ();
17063 case GE
: return ARM_GE
;
17064 case LT
: return ARM_LT
;
17065 case GEU
: return ARM_CS
;
17066 case LTU
: return ARM_CC
;
17067 default: gcc_unreachable ();
17073 case NE
: return ARM_NE
;
17074 case EQ
: return ARM_EQ
;
17075 case GE
: return ARM_GE
;
17076 case GT
: return ARM_GT
;
17077 case LE
: return ARM_LE
;
17078 case LT
: return ARM_LT
;
17079 case GEU
: return ARM_CS
;
17080 case GTU
: return ARM_HI
;
17081 case LEU
: return ARM_LS
;
17082 case LTU
: return ARM_CC
;
17083 default: gcc_unreachable ();
17086 default: gcc_unreachable ();
17090 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17093 thumb2_final_prescan_insn (rtx insn
)
17095 rtx first_insn
= insn
;
17096 rtx body
= PATTERN (insn
);
17098 enum arm_cond_code code
;
17102 /* Remove the previous insn from the count of insns to be output. */
17103 if (arm_condexec_count
)
17104 arm_condexec_count
--;
17106 /* Nothing to do if we are already inside a conditional block. */
17107 if (arm_condexec_count
)
17110 if (GET_CODE (body
) != COND_EXEC
)
17113 /* Conditional jumps are implemented directly. */
17114 if (GET_CODE (insn
) == JUMP_INSN
)
17117 predicate
= COND_EXEC_TEST (body
);
17118 arm_current_cc
= get_arm_condition_code (predicate
);
17120 n
= get_attr_ce_count (insn
);
17121 arm_condexec_count
= 1;
17122 arm_condexec_mask
= (1 << n
) - 1;
17123 arm_condexec_masklen
= n
;
17124 /* See if subsequent instructions can be combined into the same block. */
17127 insn
= next_nonnote_insn (insn
);
17129 /* Jumping into the middle of an IT block is illegal, so a label or
17130 barrier terminates the block. */
17131 if (GET_CODE (insn
) != INSN
&& GET_CODE(insn
) != JUMP_INSN
)
17134 body
= PATTERN (insn
);
17135 /* USE and CLOBBER aren't really insns, so just skip them. */
17136 if (GET_CODE (body
) == USE
17137 || GET_CODE (body
) == CLOBBER
)
17140 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17141 if (GET_CODE (body
) != COND_EXEC
)
17143 /* Allow up to 4 conditionally executed instructions in a block. */
17144 n
= get_attr_ce_count (insn
);
17145 if (arm_condexec_masklen
+ n
> 4)
17148 predicate
= COND_EXEC_TEST (body
);
17149 code
= get_arm_condition_code (predicate
);
17150 mask
= (1 << n
) - 1;
17151 if (arm_current_cc
== code
)
17152 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
17153 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
17156 arm_condexec_count
++;
17157 arm_condexec_masklen
+= n
;
17159 /* A jump must be the last instruction in a conditional block. */
17160 if (GET_CODE(insn
) == JUMP_INSN
)
17163 /* Restore recog_data (getting the attributes of other insns can
17164 destroy this array, but final.c assumes that it remains intact
17165 across this call). */
17166 extract_constrain_insn_cached (first_insn
);
17170 arm_final_prescan_insn (rtx insn
)
17172 /* BODY will hold the body of INSN. */
17173 rtx body
= PATTERN (insn
);
17175 /* This will be 1 if trying to repeat the trick, and things need to be
17176 reversed if it appears to fail. */
17179 /* If we start with a return insn, we only succeed if we find another one. */
17180 int seeking_return
= 0;
17182 /* START_INSN will hold the insn from where we start looking. This is the
17183 first insn after the following code_label if REVERSE is true. */
17184 rtx start_insn
= insn
;
17186 /* If in state 4, check if the target branch is reached, in order to
17187 change back to state 0. */
17188 if (arm_ccfsm_state
== 4)
17190 if (insn
== arm_target_insn
)
17192 arm_target_insn
= NULL
;
17193 arm_ccfsm_state
= 0;
17198 /* If in state 3, it is possible to repeat the trick, if this insn is an
17199 unconditional branch to a label, and immediately following this branch
17200 is the previous target label which is only used once, and the label this
17201 branch jumps to is not too far off. */
17202 if (arm_ccfsm_state
== 3)
17204 if (simplejump_p (insn
))
17206 start_insn
= next_nonnote_insn (start_insn
);
17207 if (GET_CODE (start_insn
) == BARRIER
)
17209 /* XXX Isn't this always a barrier? */
17210 start_insn
= next_nonnote_insn (start_insn
);
17212 if (GET_CODE (start_insn
) == CODE_LABEL
17213 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
17214 && LABEL_NUSES (start_insn
) == 1)
17219 else if (GET_CODE (body
) == RETURN
)
17221 start_insn
= next_nonnote_insn (start_insn
);
17222 if (GET_CODE (start_insn
) == BARRIER
)
17223 start_insn
= next_nonnote_insn (start_insn
);
17224 if (GET_CODE (start_insn
) == CODE_LABEL
17225 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
17226 && LABEL_NUSES (start_insn
) == 1)
17229 seeking_return
= 1;
17238 gcc_assert (!arm_ccfsm_state
|| reverse
);
17239 if (GET_CODE (insn
) != JUMP_INSN
)
17242 /* This jump might be paralleled with a clobber of the condition codes
17243 the jump should always come first */
17244 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
17245 body
= XVECEXP (body
, 0, 0);
17248 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
17249 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
17252 int fail
= FALSE
, succeed
= FALSE
;
17253 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17254 int then_not_else
= TRUE
;
17255 rtx this_insn
= start_insn
, label
= 0;
17257 /* Register the insn jumped to. */
17260 if (!seeking_return
)
17261 label
= XEXP (SET_SRC (body
), 0);
17263 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
17264 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
17265 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
17267 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
17268 then_not_else
= FALSE
;
17270 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == RETURN
)
17271 seeking_return
= 1;
17272 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == RETURN
)
17274 seeking_return
= 1;
17275 then_not_else
= FALSE
;
17278 gcc_unreachable ();
17280 /* See how many insns this branch skips, and what kind of insns. If all
17281 insns are okay, and the label or unconditional branch to the same
17282 label is not too far away, succeed. */
17283 for (insns_skipped
= 0;
17284 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
17288 this_insn
= next_nonnote_insn (this_insn
);
17292 switch (GET_CODE (this_insn
))
17295 /* Succeed if it is the target label, otherwise fail since
17296 control falls in from somewhere else. */
17297 if (this_insn
== label
)
17299 arm_ccfsm_state
= 1;
17307 /* Succeed if the following insn is the target label.
17309 If return insns are used then the last insn in a function
17310 will be a barrier. */
17311 this_insn
= next_nonnote_insn (this_insn
);
17312 if (this_insn
&& this_insn
== label
)
17314 arm_ccfsm_state
= 1;
17322 /* The AAPCS says that conditional calls should not be
17323 used since they make interworking inefficient (the
17324 linker can't transform BL<cond> into BLX). That's
17325 only a problem if the machine has BLX. */
17332 /* Succeed if the following insn is the target label, or
17333 if the following two insns are a barrier and the
17335 this_insn
= next_nonnote_insn (this_insn
);
17336 if (this_insn
&& GET_CODE (this_insn
) == BARRIER
)
17337 this_insn
= next_nonnote_insn (this_insn
);
17339 if (this_insn
&& this_insn
== label
17340 && insns_skipped
< max_insns_skipped
)
17342 arm_ccfsm_state
= 1;
17350 /* If this is an unconditional branch to the same label, succeed.
17351 If it is to another label, do nothing. If it is conditional,
17353 /* XXX Probably, the tests for SET and the PC are
17356 scanbody
= PATTERN (this_insn
);
17357 if (GET_CODE (scanbody
) == SET
17358 && GET_CODE (SET_DEST (scanbody
)) == PC
)
17360 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
17361 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
17363 arm_ccfsm_state
= 2;
17366 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
17369 /* Fail if a conditional return is undesirable (e.g. on a
17370 StrongARM), but still allow this if optimizing for size. */
17371 else if (GET_CODE (scanbody
) == RETURN
17372 && !use_return_insn (TRUE
, NULL
)
17375 else if (GET_CODE (scanbody
) == RETURN
17378 arm_ccfsm_state
= 2;
17381 else if (GET_CODE (scanbody
) == PARALLEL
)
17383 switch (get_attr_conds (this_insn
))
17393 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
17398 /* Instructions using or affecting the condition codes make it
17400 scanbody
= PATTERN (this_insn
);
17401 if (!(GET_CODE (scanbody
) == SET
17402 || GET_CODE (scanbody
) == PARALLEL
)
17403 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
17406 /* A conditional cirrus instruction must be followed by
17407 a non Cirrus instruction. However, since we
17408 conditionalize instructions in this function and by
17409 the time we get here we can't add instructions
17410 (nops), because shorten_branches() has already been
17411 called, we will disable conditionalizing Cirrus
17412 instructions to be safe. */
17413 if (GET_CODE (scanbody
) != USE
17414 && GET_CODE (scanbody
) != CLOBBER
17415 && get_attr_cirrus (this_insn
) != CIRRUS_NOT
)
17425 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
17426 arm_target_label
= CODE_LABEL_NUMBER (label
);
17429 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
17431 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
17433 this_insn
= next_nonnote_insn (this_insn
);
17434 gcc_assert (!this_insn
17435 || (GET_CODE (this_insn
) != BARRIER
17436 && GET_CODE (this_insn
) != CODE_LABEL
));
17440 /* Oh, dear! we ran off the end.. give up. */
17441 extract_constrain_insn_cached (insn
);
17442 arm_ccfsm_state
= 0;
17443 arm_target_insn
= NULL
;
17446 arm_target_insn
= this_insn
;
17449 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17452 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
17454 if (reverse
|| then_not_else
)
17455 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
17458 /* Restore recog_data (getting the attributes of other insns can
17459 destroy this array, but final.c assumes that it remains intact
17460 across this call. */
17461 extract_constrain_insn_cached (insn
);
17465 /* Output IT instructions. */
17467 thumb2_asm_output_opcode (FILE * stream
)
17472 if (arm_condexec_mask
)
17474 for (n
= 0; n
< arm_condexec_masklen
; n
++)
17475 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
17477 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
17478 arm_condition_codes
[arm_current_cc
]);
17479 arm_condexec_mask
= 0;
17483 /* Returns true if REGNO is a valid register
17484 for holding a quantity of type MODE. */
17486 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
17488 if (GET_MODE_CLASS (mode
) == MODE_CC
)
17489 return (regno
== CC_REGNUM
17490 || (TARGET_HARD_FLOAT
&& TARGET_VFP
17491 && regno
== VFPCC_REGNUM
));
17494 /* For the Thumb we only allow values bigger than SImode in
17495 registers 0 - 6, so that there is always a second low
17496 register available to hold the upper part of the value.
17497 We probably we ought to ensure that the register is the
17498 start of an even numbered register pair. */
17499 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
17501 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
17502 && IS_CIRRUS_REGNUM (regno
))
17503 /* We have outlawed SI values in Cirrus registers because they
17504 reside in the lower 32 bits, but SF values reside in the
17505 upper 32 bits. This causes gcc all sorts of grief. We can't
17506 even split the registers into pairs because Cirrus SI values
17507 get sign extended to 64bits-- aldyh. */
17508 return (GET_MODE_CLASS (mode
) == MODE_FLOAT
) || (mode
== DImode
);
17510 if (TARGET_HARD_FLOAT
&& TARGET_VFP
17511 && IS_VFP_REGNUM (regno
))
17513 if (mode
== SFmode
|| mode
== SImode
)
17514 return VFP_REGNO_OK_FOR_SINGLE (regno
);
17516 if (mode
== DFmode
)
17517 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
17519 /* VFP registers can hold HFmode values, but there is no point in
17520 putting them there unless we have hardware conversion insns. */
17521 if (mode
== HFmode
)
17522 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
17525 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
17526 || (VALID_NEON_QREG_MODE (mode
)
17527 && NEON_REGNO_OK_FOR_QUAD (regno
))
17528 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
17529 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
17530 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
17531 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
17532 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
17537 if (TARGET_REALLY_IWMMXT
)
17539 if (IS_IWMMXT_GR_REGNUM (regno
))
17540 return mode
== SImode
;
17542 if (IS_IWMMXT_REGNUM (regno
))
17543 return VALID_IWMMXT_REG_MODE (mode
);
17546 /* We allow almost any value to be stored in the general registers.
17547 Restrict doubleword quantities to even register pairs so that we can
17548 use ldrd. Do not allow very large Neon structure opaque modes in
17549 general registers; they would use too many. */
17550 if (regno
<= LAST_ARM_REGNUM
)
17551 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
17552 && ARM_NUM_REGS (mode
) <= 4;
17554 if (regno
== FRAME_POINTER_REGNUM
17555 || regno
== ARG_POINTER_REGNUM
)
17556 /* We only allow integers in the fake hard registers. */
17557 return GET_MODE_CLASS (mode
) == MODE_INT
;
17559 /* The only registers left are the FPA registers
17560 which we only allow to hold FP values. */
17561 return (TARGET_HARD_FLOAT
&& TARGET_FPA
17562 && GET_MODE_CLASS (mode
) == MODE_FLOAT
17563 && regno
>= FIRST_FPA_REGNUM
17564 && regno
<= LAST_FPA_REGNUM
);
17567 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17568 not used in arm mode. */
17571 arm_regno_class (int regno
)
17575 if (regno
== STACK_POINTER_REGNUM
)
17577 if (regno
== CC_REGNUM
)
17584 if (TARGET_THUMB2
&& regno
< 8)
17587 if ( regno
<= LAST_ARM_REGNUM
17588 || regno
== FRAME_POINTER_REGNUM
17589 || regno
== ARG_POINTER_REGNUM
)
17590 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
17592 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
17593 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
17595 if (IS_CIRRUS_REGNUM (regno
))
17596 return CIRRUS_REGS
;
17598 if (IS_VFP_REGNUM (regno
))
17600 if (regno
<= D7_VFP_REGNUM
)
17601 return VFP_D0_D7_REGS
;
17602 else if (regno
<= LAST_LO_VFP_REGNUM
)
17603 return VFP_LO_REGS
;
17605 return VFP_HI_REGS
;
17608 if (IS_IWMMXT_REGNUM (regno
))
17609 return IWMMXT_REGS
;
17611 if (IS_IWMMXT_GR_REGNUM (regno
))
17612 return IWMMXT_GR_REGS
;
17617 /* Handle a special case when computing the offset
17618 of an argument from the frame pointer. */
17620 arm_debugger_arg_offset (int value
, rtx addr
)
17624 /* We are only interested if dbxout_parms() failed to compute the offset. */
17628 /* We can only cope with the case where the address is held in a register. */
17629 if (GET_CODE (addr
) != REG
)
17632 /* If we are using the frame pointer to point at the argument, then
17633 an offset of 0 is correct. */
17634 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
17637 /* If we are using the stack pointer to point at the
17638 argument, then an offset of 0 is correct. */
17639 /* ??? Check this is consistent with thumb2 frame layout. */
17640 if ((TARGET_THUMB
|| !frame_pointer_needed
)
17641 && REGNO (addr
) == SP_REGNUM
)
17644 /* Oh dear. The argument is pointed to by a register rather
17645 than being held in a register, or being stored at a known
17646 offset from the frame pointer. Since GDB only understands
17647 those two kinds of argument we must translate the address
17648 held in the register into an offset from the frame pointer.
17649 We do this by searching through the insns for the function
17650 looking to see where this register gets its value. If the
17651 register is initialized from the frame pointer plus an offset
17652 then we are in luck and we can continue, otherwise we give up.
17654 This code is exercised by producing debugging information
17655 for a function with arguments like this:
17657 double func (double a, double b, int c, double d) {return d;}
17659 Without this code the stab for parameter 'd' will be set to
17660 an offset of 0 from the frame pointer, rather than 8. */
17662 /* The if() statement says:
17664 If the insn is a normal instruction
17665 and if the insn is setting the value in a register
17666 and if the register being set is the register holding the address of the argument
17667 and if the address is computing by an addition
17668 that involves adding to a register
17669 which is the frame pointer
17674 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
17676 if ( GET_CODE (insn
) == INSN
17677 && GET_CODE (PATTERN (insn
)) == SET
17678 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
17679 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
17680 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 0)) == REG
17681 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17682 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 1)) == CONST_INT
17685 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
17694 warning (0, "unable to compute real location of stacked parameter");
17695 value
= 8; /* XXX magic hack */
17715 T_MAX
/* Size of enum. Keep last. */
17716 } neon_builtin_type_mode
;
17718 #define TYPE_MODE_BIT(X) (1 << (X))
17720 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
17721 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
17722 | TYPE_MODE_BIT (T_DI))
17723 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
17724 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
17725 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
17727 #define v8qi_UP T_V8QI
17728 #define v4hi_UP T_V4HI
17729 #define v2si_UP T_V2SI
17730 #define v2sf_UP T_V2SF
17732 #define v16qi_UP T_V16QI
17733 #define v8hi_UP T_V8HI
17734 #define v4si_UP T_V4SI
17735 #define v4sf_UP T_V4SF
17736 #define v2di_UP T_V2DI
17741 #define UP(X) X##_UP
17774 NEON_LOADSTRUCTLANE
,
17776 NEON_STORESTRUCTLANE
,
17785 const neon_itype itype
;
17786 const neon_builtin_type_mode mode
;
17787 const enum insn_code code
;
17788 unsigned int fcode
;
17789 } neon_builtin_datum
;
17791 #define CF(N,X) CODE_FOR_neon_##N##X
17793 #define VAR1(T, N, A) \
17794 {#N, NEON_##T, UP (A), CF (N, A), 0}
17795 #define VAR2(T, N, A, B) \
17797 {#N, NEON_##T, UP (B), CF (N, B), 0}
17798 #define VAR3(T, N, A, B, C) \
17799 VAR2 (T, N, A, B), \
17800 {#N, NEON_##T, UP (C), CF (N, C), 0}
17801 #define VAR4(T, N, A, B, C, D) \
17802 VAR3 (T, N, A, B, C), \
17803 {#N, NEON_##T, UP (D), CF (N, D), 0}
17804 #define VAR5(T, N, A, B, C, D, E) \
17805 VAR4 (T, N, A, B, C, D), \
17806 {#N, NEON_##T, UP (E), CF (N, E), 0}
17807 #define VAR6(T, N, A, B, C, D, E, F) \
17808 VAR5 (T, N, A, B, C, D, E), \
17809 {#N, NEON_##T, UP (F), CF (N, F), 0}
17810 #define VAR7(T, N, A, B, C, D, E, F, G) \
17811 VAR6 (T, N, A, B, C, D, E, F), \
17812 {#N, NEON_##T, UP (G), CF (N, G), 0}
17813 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17814 VAR7 (T, N, A, B, C, D, E, F, G), \
17815 {#N, NEON_##T, UP (H), CF (N, H), 0}
17816 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17817 VAR8 (T, N, A, B, C, D, E, F, G, H), \
17818 {#N, NEON_##T, UP (I), CF (N, I), 0}
17819 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17820 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
17821 {#N, NEON_##T, UP (J), CF (N, J), 0}
17823 /* The mode entries in the following table correspond to the "key" type of the
17824 instruction variant, i.e. equivalent to that which would be specified after
17825 the assembler mnemonic, which usually refers to the last vector operand.
17826 (Signed/unsigned/polynomial types are not differentiated between though, and
17827 are all mapped onto the same mode for a given element size.) The modes
17828 listed per instruction should be the same as those defined for that
17829 instruction's pattern in neon.md. */
17831 static neon_builtin_datum neon_builtin_data
[] =
17833 VAR10 (BINOP
, vadd
,
17834 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17835 VAR3 (BINOP
, vaddl
, v8qi
, v4hi
, v2si
),
17836 VAR3 (BINOP
, vaddw
, v8qi
, v4hi
, v2si
),
17837 VAR6 (BINOP
, vhadd
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17838 VAR8 (BINOP
, vqadd
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17839 VAR3 (BINOP
, vaddhn
, v8hi
, v4si
, v2di
),
17840 VAR8 (BINOP
, vmul
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17841 VAR8 (TERNOP
, vmla
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17842 VAR3 (TERNOP
, vmlal
, v8qi
, v4hi
, v2si
),
17843 VAR8 (TERNOP
, vmls
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17844 VAR3 (TERNOP
, vmlsl
, v8qi
, v4hi
, v2si
),
17845 VAR4 (BINOP
, vqdmulh
, v4hi
, v2si
, v8hi
, v4si
),
17846 VAR2 (TERNOP
, vqdmlal
, v4hi
, v2si
),
17847 VAR2 (TERNOP
, vqdmlsl
, v4hi
, v2si
),
17848 VAR3 (BINOP
, vmull
, v8qi
, v4hi
, v2si
),
17849 VAR2 (SCALARMULL
, vmull_n
, v4hi
, v2si
),
17850 VAR2 (LANEMULL
, vmull_lane
, v4hi
, v2si
),
17851 VAR2 (SCALARMULL
, vqdmull_n
, v4hi
, v2si
),
17852 VAR2 (LANEMULL
, vqdmull_lane
, v4hi
, v2si
),
17853 VAR4 (SCALARMULH
, vqdmulh_n
, v4hi
, v2si
, v8hi
, v4si
),
17854 VAR4 (LANEMULH
, vqdmulh_lane
, v4hi
, v2si
, v8hi
, v4si
),
17855 VAR2 (BINOP
, vqdmull
, v4hi
, v2si
),
17856 VAR8 (BINOP
, vshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17857 VAR8 (BINOP
, vqshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17858 VAR8 (SHIFTIMM
, vshr_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17859 VAR3 (SHIFTIMM
, vshrn_n
, v8hi
, v4si
, v2di
),
17860 VAR3 (SHIFTIMM
, vqshrn_n
, v8hi
, v4si
, v2di
),
17861 VAR3 (SHIFTIMM
, vqshrun_n
, v8hi
, v4si
, v2di
),
17862 VAR8 (SHIFTIMM
, vshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17863 VAR8 (SHIFTIMM
, vqshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17864 VAR8 (SHIFTIMM
, vqshlu_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17865 VAR3 (SHIFTIMM
, vshll_n
, v8qi
, v4hi
, v2si
),
17866 VAR8 (SHIFTACC
, vsra_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17867 VAR10 (BINOP
, vsub
,
17868 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17869 VAR3 (BINOP
, vsubl
, v8qi
, v4hi
, v2si
),
17870 VAR3 (BINOP
, vsubw
, v8qi
, v4hi
, v2si
),
17871 VAR8 (BINOP
, vqsub
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17872 VAR6 (BINOP
, vhsub
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17873 VAR3 (BINOP
, vsubhn
, v8hi
, v4si
, v2di
),
17874 VAR8 (BINOP
, vceq
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17875 VAR8 (BINOP
, vcge
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17876 VAR8 (BINOP
, vcgt
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17877 VAR2 (BINOP
, vcage
, v2sf
, v4sf
),
17878 VAR2 (BINOP
, vcagt
, v2sf
, v4sf
),
17879 VAR6 (BINOP
, vtst
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17880 VAR8 (BINOP
, vabd
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17881 VAR3 (BINOP
, vabdl
, v8qi
, v4hi
, v2si
),
17882 VAR6 (TERNOP
, vaba
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17883 VAR3 (TERNOP
, vabal
, v8qi
, v4hi
, v2si
),
17884 VAR8 (BINOP
, vmax
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17885 VAR8 (BINOP
, vmin
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17886 VAR4 (BINOP
, vpadd
, v8qi
, v4hi
, v2si
, v2sf
),
17887 VAR6 (UNOP
, vpaddl
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17888 VAR6 (BINOP
, vpadal
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17889 VAR4 (BINOP
, vpmax
, v8qi
, v4hi
, v2si
, v2sf
),
17890 VAR4 (BINOP
, vpmin
, v8qi
, v4hi
, v2si
, v2sf
),
17891 VAR2 (BINOP
, vrecps
, v2sf
, v4sf
),
17892 VAR2 (BINOP
, vrsqrts
, v2sf
, v4sf
),
17893 VAR8 (SHIFTINSERT
, vsri_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17894 VAR8 (SHIFTINSERT
, vsli_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
17895 VAR8 (UNOP
, vabs
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17896 VAR6 (UNOP
, vqabs
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17897 VAR8 (UNOP
, vneg
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17898 VAR6 (UNOP
, vqneg
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17899 VAR6 (UNOP
, vcls
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17900 VAR6 (UNOP
, vclz
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17901 VAR2 (UNOP
, vcnt
, v8qi
, v16qi
),
17902 VAR4 (UNOP
, vrecpe
, v2si
, v2sf
, v4si
, v4sf
),
17903 VAR4 (UNOP
, vrsqrte
, v2si
, v2sf
, v4si
, v4sf
),
17904 VAR6 (UNOP
, vmvn
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
17905 /* FIXME: vget_lane supports more variants than this! */
17906 VAR10 (GETLANE
, vget_lane
,
17907 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17908 VAR10 (SETLANE
, vset_lane
,
17909 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17910 VAR5 (CREATE
, vcreate
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17911 VAR10 (DUP
, vdup_n
,
17912 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17913 VAR10 (DUPLANE
, vdup_lane
,
17914 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17915 VAR5 (COMBINE
, vcombine
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17916 VAR5 (SPLIT
, vget_high
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17917 VAR5 (SPLIT
, vget_low
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17918 VAR3 (UNOP
, vmovn
, v8hi
, v4si
, v2di
),
17919 VAR3 (UNOP
, vqmovn
, v8hi
, v4si
, v2di
),
17920 VAR3 (UNOP
, vqmovun
, v8hi
, v4si
, v2di
),
17921 VAR3 (UNOP
, vmovl
, v8qi
, v4hi
, v2si
),
17922 VAR6 (LANEMUL
, vmul_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17923 VAR6 (LANEMAC
, vmla_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17924 VAR2 (LANEMAC
, vmlal_lane
, v4hi
, v2si
),
17925 VAR2 (LANEMAC
, vqdmlal_lane
, v4hi
, v2si
),
17926 VAR6 (LANEMAC
, vmls_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17927 VAR2 (LANEMAC
, vmlsl_lane
, v4hi
, v2si
),
17928 VAR2 (LANEMAC
, vqdmlsl_lane
, v4hi
, v2si
),
17929 VAR6 (SCALARMUL
, vmul_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17930 VAR6 (SCALARMAC
, vmla_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17931 VAR2 (SCALARMAC
, vmlal_n
, v4hi
, v2si
),
17932 VAR2 (SCALARMAC
, vqdmlal_n
, v4hi
, v2si
),
17933 VAR6 (SCALARMAC
, vmls_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17934 VAR2 (SCALARMAC
, vmlsl_n
, v4hi
, v2si
),
17935 VAR2 (SCALARMAC
, vqdmlsl_n
, v4hi
, v2si
),
17936 VAR10 (BINOP
, vext
,
17937 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17938 VAR8 (UNOP
, vrev64
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17939 VAR4 (UNOP
, vrev32
, v8qi
, v4hi
, v16qi
, v8hi
),
17940 VAR2 (UNOP
, vrev16
, v8qi
, v16qi
),
17941 VAR4 (CONVERT
, vcvt
, v2si
, v2sf
, v4si
, v4sf
),
17942 VAR4 (FIXCONV
, vcvt_n
, v2si
, v2sf
, v4si
, v4sf
),
17943 VAR10 (SELECT
, vbsl
,
17944 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17945 VAR1 (VTBL
, vtbl1
, v8qi
),
17946 VAR1 (VTBL
, vtbl2
, v8qi
),
17947 VAR1 (VTBL
, vtbl3
, v8qi
),
17948 VAR1 (VTBL
, vtbl4
, v8qi
),
17949 VAR1 (VTBX
, vtbx1
, v8qi
),
17950 VAR1 (VTBX
, vtbx2
, v8qi
),
17951 VAR1 (VTBX
, vtbx3
, v8qi
),
17952 VAR1 (VTBX
, vtbx4
, v8qi
),
17953 VAR8 (RESULTPAIR
, vtrn
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17954 VAR8 (RESULTPAIR
, vzip
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17955 VAR8 (RESULTPAIR
, vuzp
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
17956 VAR5 (REINTERP
, vreinterpretv8qi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17957 VAR5 (REINTERP
, vreinterpretv4hi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17958 VAR5 (REINTERP
, vreinterpretv2si
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17959 VAR5 (REINTERP
, vreinterpretv2sf
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17960 VAR5 (REINTERP
, vreinterpretdi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17961 VAR5 (REINTERP
, vreinterpretv16qi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17962 VAR5 (REINTERP
, vreinterpretv8hi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17963 VAR5 (REINTERP
, vreinterpretv4si
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17964 VAR5 (REINTERP
, vreinterpretv4sf
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17965 VAR5 (REINTERP
, vreinterpretv2di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17966 VAR10 (LOAD1
, vld1
,
17967 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17968 VAR10 (LOAD1LANE
, vld1_lane
,
17969 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17970 VAR10 (LOAD1
, vld1_dup
,
17971 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17972 VAR10 (STORE1
, vst1
,
17973 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17974 VAR10 (STORE1LANE
, vst1_lane
,
17975 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
17977 vld2
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
17978 VAR7 (LOADSTRUCTLANE
, vld2_lane
,
17979 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17980 VAR5 (LOADSTRUCT
, vld2_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17981 VAR9 (STORESTRUCT
, vst2
,
17982 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
17983 VAR7 (STORESTRUCTLANE
, vst2_lane
,
17984 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17986 vld3
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
17987 VAR7 (LOADSTRUCTLANE
, vld3_lane
,
17988 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17989 VAR5 (LOADSTRUCT
, vld3_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17990 VAR9 (STORESTRUCT
, vst3
,
17991 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
17992 VAR7 (STORESTRUCTLANE
, vst3_lane
,
17993 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17994 VAR9 (LOADSTRUCT
, vld4
,
17995 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
17996 VAR7 (LOADSTRUCTLANE
, vld4_lane
,
17997 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
17998 VAR5 (LOADSTRUCT
, vld4_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
17999 VAR9 (STORESTRUCT
, vst4
,
18000 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18001 VAR7 (STORESTRUCTLANE
, vst4_lane
,
18002 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18003 VAR10 (LOGICBINOP
, vand
,
18004 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18005 VAR10 (LOGICBINOP
, vorr
,
18006 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18007 VAR10 (BINOP
, veor
,
18008 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18009 VAR10 (LOGICBINOP
, vbic
,
18010 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18011 VAR10 (LOGICBINOP
, vorn
,
18012 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
)
18027 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18028 symbolic names defined here (which would require too much duplication).
18032 ARM_BUILTIN_GETWCX
,
18033 ARM_BUILTIN_SETWCX
,
18037 ARM_BUILTIN_WAVG2BR
,
18038 ARM_BUILTIN_WAVG2HR
,
18039 ARM_BUILTIN_WAVG2B
,
18040 ARM_BUILTIN_WAVG2H
,
18047 ARM_BUILTIN_WMACSZ
,
18049 ARM_BUILTIN_WMACUZ
,
18052 ARM_BUILTIN_WSADBZ
,
18054 ARM_BUILTIN_WSADHZ
,
18056 ARM_BUILTIN_WALIGN
,
18059 ARM_BUILTIN_TMIAPH
,
18060 ARM_BUILTIN_TMIABB
,
18061 ARM_BUILTIN_TMIABT
,
18062 ARM_BUILTIN_TMIATB
,
18063 ARM_BUILTIN_TMIATT
,
18065 ARM_BUILTIN_TMOVMSKB
,
18066 ARM_BUILTIN_TMOVMSKH
,
18067 ARM_BUILTIN_TMOVMSKW
,
18069 ARM_BUILTIN_TBCSTB
,
18070 ARM_BUILTIN_TBCSTH
,
18071 ARM_BUILTIN_TBCSTW
,
18073 ARM_BUILTIN_WMADDS
,
18074 ARM_BUILTIN_WMADDU
,
18076 ARM_BUILTIN_WPACKHSS
,
18077 ARM_BUILTIN_WPACKWSS
,
18078 ARM_BUILTIN_WPACKDSS
,
18079 ARM_BUILTIN_WPACKHUS
,
18080 ARM_BUILTIN_WPACKWUS
,
18081 ARM_BUILTIN_WPACKDUS
,
18086 ARM_BUILTIN_WADDSSB
,
18087 ARM_BUILTIN_WADDSSH
,
18088 ARM_BUILTIN_WADDSSW
,
18089 ARM_BUILTIN_WADDUSB
,
18090 ARM_BUILTIN_WADDUSH
,
18091 ARM_BUILTIN_WADDUSW
,
18095 ARM_BUILTIN_WSUBSSB
,
18096 ARM_BUILTIN_WSUBSSH
,
18097 ARM_BUILTIN_WSUBSSW
,
18098 ARM_BUILTIN_WSUBUSB
,
18099 ARM_BUILTIN_WSUBUSH
,
18100 ARM_BUILTIN_WSUBUSW
,
18107 ARM_BUILTIN_WCMPEQB
,
18108 ARM_BUILTIN_WCMPEQH
,
18109 ARM_BUILTIN_WCMPEQW
,
18110 ARM_BUILTIN_WCMPGTUB
,
18111 ARM_BUILTIN_WCMPGTUH
,
18112 ARM_BUILTIN_WCMPGTUW
,
18113 ARM_BUILTIN_WCMPGTSB
,
18114 ARM_BUILTIN_WCMPGTSH
,
18115 ARM_BUILTIN_WCMPGTSW
,
18117 ARM_BUILTIN_TEXTRMSB
,
18118 ARM_BUILTIN_TEXTRMSH
,
18119 ARM_BUILTIN_TEXTRMSW
,
18120 ARM_BUILTIN_TEXTRMUB
,
18121 ARM_BUILTIN_TEXTRMUH
,
18122 ARM_BUILTIN_TEXTRMUW
,
18123 ARM_BUILTIN_TINSRB
,
18124 ARM_BUILTIN_TINSRH
,
18125 ARM_BUILTIN_TINSRW
,
18127 ARM_BUILTIN_WMAXSW
,
18128 ARM_BUILTIN_WMAXSH
,
18129 ARM_BUILTIN_WMAXSB
,
18130 ARM_BUILTIN_WMAXUW
,
18131 ARM_BUILTIN_WMAXUH
,
18132 ARM_BUILTIN_WMAXUB
,
18133 ARM_BUILTIN_WMINSW
,
18134 ARM_BUILTIN_WMINSH
,
18135 ARM_BUILTIN_WMINSB
,
18136 ARM_BUILTIN_WMINUW
,
18137 ARM_BUILTIN_WMINUH
,
18138 ARM_BUILTIN_WMINUB
,
18140 ARM_BUILTIN_WMULUM
,
18141 ARM_BUILTIN_WMULSM
,
18142 ARM_BUILTIN_WMULUL
,
18144 ARM_BUILTIN_PSADBH
,
18145 ARM_BUILTIN_WSHUFH
,
18159 ARM_BUILTIN_WSLLHI
,
18160 ARM_BUILTIN_WSLLWI
,
18161 ARM_BUILTIN_WSLLDI
,
18162 ARM_BUILTIN_WSRAHI
,
18163 ARM_BUILTIN_WSRAWI
,
18164 ARM_BUILTIN_WSRADI
,
18165 ARM_BUILTIN_WSRLHI
,
18166 ARM_BUILTIN_WSRLWI
,
18167 ARM_BUILTIN_WSRLDI
,
18168 ARM_BUILTIN_WRORHI
,
18169 ARM_BUILTIN_WRORWI
,
18170 ARM_BUILTIN_WRORDI
,
18172 ARM_BUILTIN_WUNPCKIHB
,
18173 ARM_BUILTIN_WUNPCKIHH
,
18174 ARM_BUILTIN_WUNPCKIHW
,
18175 ARM_BUILTIN_WUNPCKILB
,
18176 ARM_BUILTIN_WUNPCKILH
,
18177 ARM_BUILTIN_WUNPCKILW
,
18179 ARM_BUILTIN_WUNPCKEHSB
,
18180 ARM_BUILTIN_WUNPCKEHSH
,
18181 ARM_BUILTIN_WUNPCKEHSW
,
18182 ARM_BUILTIN_WUNPCKEHUB
,
18183 ARM_BUILTIN_WUNPCKEHUH
,
18184 ARM_BUILTIN_WUNPCKEHUW
,
18185 ARM_BUILTIN_WUNPCKELSB
,
18186 ARM_BUILTIN_WUNPCKELSH
,
18187 ARM_BUILTIN_WUNPCKELSW
,
18188 ARM_BUILTIN_WUNPCKELUB
,
18189 ARM_BUILTIN_WUNPCKELUH
,
18190 ARM_BUILTIN_WUNPCKELUW
,
18192 ARM_BUILTIN_THREAD_POINTER
,
18194 ARM_BUILTIN_NEON_BASE
,
18196 ARM_BUILTIN_MAX
= ARM_BUILTIN_NEON_BASE
+ ARRAY_SIZE (neon_builtin_data
)
18199 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
18202 arm_init_neon_builtins (void)
18204 unsigned int i
, fcode
;
18207 tree neon_intQI_type_node
;
18208 tree neon_intHI_type_node
;
18209 tree neon_polyQI_type_node
;
18210 tree neon_polyHI_type_node
;
18211 tree neon_intSI_type_node
;
18212 tree neon_intDI_type_node
;
18213 tree neon_float_type_node
;
18215 tree intQI_pointer_node
;
18216 tree intHI_pointer_node
;
18217 tree intSI_pointer_node
;
18218 tree intDI_pointer_node
;
18219 tree float_pointer_node
;
18221 tree const_intQI_node
;
18222 tree const_intHI_node
;
18223 tree const_intSI_node
;
18224 tree const_intDI_node
;
18225 tree const_float_node
;
18227 tree const_intQI_pointer_node
;
18228 tree const_intHI_pointer_node
;
18229 tree const_intSI_pointer_node
;
18230 tree const_intDI_pointer_node
;
18231 tree const_float_pointer_node
;
18233 tree V8QI_type_node
;
18234 tree V4HI_type_node
;
18235 tree V2SI_type_node
;
18236 tree V2SF_type_node
;
18237 tree V16QI_type_node
;
18238 tree V8HI_type_node
;
18239 tree V4SI_type_node
;
18240 tree V4SF_type_node
;
18241 tree V2DI_type_node
;
18243 tree intUQI_type_node
;
18244 tree intUHI_type_node
;
18245 tree intUSI_type_node
;
18246 tree intUDI_type_node
;
18248 tree intEI_type_node
;
18249 tree intOI_type_node
;
18250 tree intCI_type_node
;
18251 tree intXI_type_node
;
18253 tree V8QI_pointer_node
;
18254 tree V4HI_pointer_node
;
18255 tree V2SI_pointer_node
;
18256 tree V2SF_pointer_node
;
18257 tree V16QI_pointer_node
;
18258 tree V8HI_pointer_node
;
18259 tree V4SI_pointer_node
;
18260 tree V4SF_pointer_node
;
18261 tree V2DI_pointer_node
;
18263 tree void_ftype_pv8qi_v8qi_v8qi
;
18264 tree void_ftype_pv4hi_v4hi_v4hi
;
18265 tree void_ftype_pv2si_v2si_v2si
;
18266 tree void_ftype_pv2sf_v2sf_v2sf
;
18267 tree void_ftype_pdi_di_di
;
18268 tree void_ftype_pv16qi_v16qi_v16qi
;
18269 tree void_ftype_pv8hi_v8hi_v8hi
;
18270 tree void_ftype_pv4si_v4si_v4si
;
18271 tree void_ftype_pv4sf_v4sf_v4sf
;
18272 tree void_ftype_pv2di_v2di_v2di
;
18274 tree reinterp_ftype_dreg
[5][5];
18275 tree reinterp_ftype_qreg
[5][5];
18276 tree dreg_types
[5], qreg_types
[5];
18278 /* Create distinguished type nodes for NEON vector element types,
18279 and pointers to values of such types, so we can detect them later. */
18280 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
18281 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
18282 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
18283 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
18284 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
18285 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
18286 neon_float_type_node
= make_node (REAL_TYPE
);
18287 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
18288 layout_type (neon_float_type_node
);
18290 /* Define typedefs which exactly correspond to the modes we are basing vector
18291 types on. If you change these names you'll need to change
18292 the table used by arm_mangle_type too. */
18293 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
18294 "__builtin_neon_qi");
18295 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
18296 "__builtin_neon_hi");
18297 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
18298 "__builtin_neon_si");
18299 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
18300 "__builtin_neon_sf");
18301 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
18302 "__builtin_neon_di");
18303 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
18304 "__builtin_neon_poly8");
18305 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
18306 "__builtin_neon_poly16");
18308 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
18309 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
18310 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
18311 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
18312 float_pointer_node
= build_pointer_type (neon_float_type_node
);
18314 /* Next create constant-qualified versions of the above types. */
18315 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
18317 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
18319 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
18321 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
18323 const_float_node
= build_qualified_type (neon_float_type_node
,
18326 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
18327 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
18328 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
18329 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
18330 const_float_pointer_node
= build_pointer_type (const_float_node
);
18332 /* Now create vector types based on our NEON element types. */
18333 /* 64-bit vectors. */
18335 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
18337 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
18339 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
18341 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
18342 /* 128-bit vectors. */
18344 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
18346 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
18348 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
18350 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
18352 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
18354 /* Unsigned integer types for various mode sizes. */
18355 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
18356 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
18357 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
18358 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
18360 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
18361 "__builtin_neon_uqi");
18362 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
18363 "__builtin_neon_uhi");
18364 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
18365 "__builtin_neon_usi");
18366 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
18367 "__builtin_neon_udi");
18369 /* Opaque integer types for structures of vectors. */
18370 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
18371 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
18372 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
18373 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
18375 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
18376 "__builtin_neon_ti");
18377 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
18378 "__builtin_neon_ei");
18379 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
18380 "__builtin_neon_oi");
18381 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
18382 "__builtin_neon_ci");
18383 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
18384 "__builtin_neon_xi");
18386 /* Pointers to vector types. */
18387 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
18388 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
18389 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
18390 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
18391 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
18392 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
18393 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
18394 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
18395 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
18397 /* Operations which return results as pairs. */
18398 void_ftype_pv8qi_v8qi_v8qi
=
18399 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
18400 V8QI_type_node
, NULL
);
18401 void_ftype_pv4hi_v4hi_v4hi
=
18402 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
18403 V4HI_type_node
, NULL
);
18404 void_ftype_pv2si_v2si_v2si
=
18405 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
18406 V2SI_type_node
, NULL
);
18407 void_ftype_pv2sf_v2sf_v2sf
=
18408 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
18409 V2SF_type_node
, NULL
);
18410 void_ftype_pdi_di_di
=
18411 build_function_type_list (void_type_node
, intDI_pointer_node
,
18412 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
18413 void_ftype_pv16qi_v16qi_v16qi
=
18414 build_function_type_list (void_type_node
, V16QI_pointer_node
,
18415 V16QI_type_node
, V16QI_type_node
, NULL
);
18416 void_ftype_pv8hi_v8hi_v8hi
=
18417 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
18418 V8HI_type_node
, NULL
);
18419 void_ftype_pv4si_v4si_v4si
=
18420 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
18421 V4SI_type_node
, NULL
);
18422 void_ftype_pv4sf_v4sf_v4sf
=
18423 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
18424 V4SF_type_node
, NULL
);
18425 void_ftype_pv2di_v2di_v2di
=
18426 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
18427 V2DI_type_node
, NULL
);
18429 dreg_types
[0] = V8QI_type_node
;
18430 dreg_types
[1] = V4HI_type_node
;
18431 dreg_types
[2] = V2SI_type_node
;
18432 dreg_types
[3] = V2SF_type_node
;
18433 dreg_types
[4] = neon_intDI_type_node
;
18435 qreg_types
[0] = V16QI_type_node
;
18436 qreg_types
[1] = V8HI_type_node
;
18437 qreg_types
[2] = V4SI_type_node
;
18438 qreg_types
[3] = V4SF_type_node
;
18439 qreg_types
[4] = V2DI_type_node
;
18441 for (i
= 0; i
< 5; i
++)
18444 for (j
= 0; j
< 5; j
++)
18446 reinterp_ftype_dreg
[i
][j
]
18447 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
18448 reinterp_ftype_qreg
[i
][j
]
18449 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
18453 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
18454 i
< ARRAY_SIZE (neon_builtin_data
);
18457 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
18459 const char* const modenames
[] = {
18460 "v8qi", "v4hi", "v2si", "v2sf", "di",
18461 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
18466 int is_load
= 0, is_store
= 0;
18468 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
18475 case NEON_LOAD1LANE
:
18476 case NEON_LOADSTRUCT
:
18477 case NEON_LOADSTRUCTLANE
:
18479 /* Fall through. */
18481 case NEON_STORE1LANE
:
18482 case NEON_STORESTRUCT
:
18483 case NEON_STORESTRUCTLANE
:
18486 /* Fall through. */
18489 case NEON_LOGICBINOP
:
18490 case NEON_SHIFTINSERT
:
18497 case NEON_SHIFTIMM
:
18498 case NEON_SHIFTACC
:
18504 case NEON_LANEMULL
:
18505 case NEON_LANEMULH
:
18507 case NEON_SCALARMUL
:
18508 case NEON_SCALARMULL
:
18509 case NEON_SCALARMULH
:
18510 case NEON_SCALARMAC
:
18516 tree return_type
= void_type_node
, args
= void_list_node
;
18518 /* Build a function type directly from the insn_data for
18519 this builtin. The build_function_type() function takes
18520 care of removing duplicates for us. */
18521 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
18525 if (is_load
&& k
== 1)
18527 /* Neon load patterns always have the memory
18528 operand in the operand 1 position. */
18529 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
18530 == neon_struct_operand
);
18536 eltype
= const_intQI_pointer_node
;
18541 eltype
= const_intHI_pointer_node
;
18546 eltype
= const_intSI_pointer_node
;
18551 eltype
= const_float_pointer_node
;
18556 eltype
= const_intDI_pointer_node
;
18559 default: gcc_unreachable ();
18562 else if (is_store
&& k
== 0)
18564 /* Similarly, Neon store patterns use operand 0 as
18565 the memory location to store to. */
18566 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
18567 == neon_struct_operand
);
18573 eltype
= intQI_pointer_node
;
18578 eltype
= intHI_pointer_node
;
18583 eltype
= intSI_pointer_node
;
18588 eltype
= float_pointer_node
;
18593 eltype
= intDI_pointer_node
;
18596 default: gcc_unreachable ();
18601 switch (insn_data
[d
->code
].operand
[k
].mode
)
18603 case VOIDmode
: eltype
= void_type_node
; break;
18605 case QImode
: eltype
= neon_intQI_type_node
; break;
18606 case HImode
: eltype
= neon_intHI_type_node
; break;
18607 case SImode
: eltype
= neon_intSI_type_node
; break;
18608 case SFmode
: eltype
= neon_float_type_node
; break;
18609 case DImode
: eltype
= neon_intDI_type_node
; break;
18610 case TImode
: eltype
= intTI_type_node
; break;
18611 case EImode
: eltype
= intEI_type_node
; break;
18612 case OImode
: eltype
= intOI_type_node
; break;
18613 case CImode
: eltype
= intCI_type_node
; break;
18614 case XImode
: eltype
= intXI_type_node
; break;
18615 /* 64-bit vectors. */
18616 case V8QImode
: eltype
= V8QI_type_node
; break;
18617 case V4HImode
: eltype
= V4HI_type_node
; break;
18618 case V2SImode
: eltype
= V2SI_type_node
; break;
18619 case V2SFmode
: eltype
= V2SF_type_node
; break;
18620 /* 128-bit vectors. */
18621 case V16QImode
: eltype
= V16QI_type_node
; break;
18622 case V8HImode
: eltype
= V8HI_type_node
; break;
18623 case V4SImode
: eltype
= V4SI_type_node
; break;
18624 case V4SFmode
: eltype
= V4SF_type_node
; break;
18625 case V2DImode
: eltype
= V2DI_type_node
; break;
18626 default: gcc_unreachable ();
18630 if (k
== 0 && !is_store
)
18631 return_type
= eltype
;
18633 args
= tree_cons (NULL_TREE
, eltype
, args
);
18636 ftype
= build_function_type (return_type
, args
);
18640 case NEON_RESULTPAIR
:
18642 switch (insn_data
[d
->code
].operand
[1].mode
)
18644 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
18645 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
18646 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
18647 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
18648 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
18649 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
18650 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
18651 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
18652 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
18653 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
18654 default: gcc_unreachable ();
18659 case NEON_REINTERP
:
18661 /* We iterate over 5 doubleword types, then 5 quadword
18663 int rhs
= d
->mode
% 5;
18664 switch (insn_data
[d
->code
].operand
[0].mode
)
18666 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
18667 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
18668 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
18669 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
18670 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
18671 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
18672 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
18673 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
18674 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
18675 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
18676 default: gcc_unreachable ();
18682 gcc_unreachable ();
18685 gcc_assert (ftype
!= NULL
);
18687 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
18689 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
18691 arm_builtin_decls
[fcode
] = decl
;
18695 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
18698 if ((MASK) & insn_flags) \
18701 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
18702 BUILT_IN_MD, NULL, NULL_TREE); \
18703 arm_builtin_decls[CODE] = bdecl; \
18708 struct builtin_description
18710 const unsigned int mask
;
18711 const enum insn_code icode
;
18712 const char * const name
;
18713 const enum arm_builtins code
;
18714 const enum rtx_code comparison
;
18715 const unsigned int flag
;
18718 static const struct builtin_description bdesc_2arg
[] =
18720 #define IWMMXT_BUILTIN(code, string, builtin) \
18721 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
18722 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18724 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
18725 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
18726 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
18727 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
18728 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
18729 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
18730 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
18731 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
18732 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
18733 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
18734 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
18735 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
18736 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
18737 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
18738 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
18739 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
18740 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
18741 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
18742 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
18743 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
18744 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
18745 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
18746 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
18747 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
18748 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
18749 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
18750 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
18751 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
18752 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
18753 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
18754 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
18755 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
18756 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
18757 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
18758 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
18759 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
18760 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
18761 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
18762 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
18763 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
18764 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
18765 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
18766 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
18767 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
18768 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
18769 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
18770 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
18771 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
18772 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
18773 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
18774 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
18775 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
18776 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
18777 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
18778 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
18779 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
18780 IWMMXT_BUILTIN (iwmmxt_wmadds
, "wmadds", WMADDS
)
18781 IWMMXT_BUILTIN (iwmmxt_wmaddu
, "wmaddu", WMADDU
)
18783 #define IWMMXT_BUILTIN2(code, builtin) \
18784 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
18786 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
18787 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
18788 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
18789 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
18790 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
18791 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
18792 IWMMXT_BUILTIN2 (ashlv4hi3_di
, WSLLH
)
18793 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt
, WSLLHI
)
18794 IWMMXT_BUILTIN2 (ashlv2si3_di
, WSLLW
)
18795 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt
, WSLLWI
)
18796 IWMMXT_BUILTIN2 (ashldi3_di
, WSLLD
)
18797 IWMMXT_BUILTIN2 (ashldi3_iwmmxt
, WSLLDI
)
18798 IWMMXT_BUILTIN2 (lshrv4hi3_di
, WSRLH
)
18799 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt
, WSRLHI
)
18800 IWMMXT_BUILTIN2 (lshrv2si3_di
, WSRLW
)
18801 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt
, WSRLWI
)
18802 IWMMXT_BUILTIN2 (lshrdi3_di
, WSRLD
)
18803 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt
, WSRLDI
)
18804 IWMMXT_BUILTIN2 (ashrv4hi3_di
, WSRAH
)
18805 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt
, WSRAHI
)
18806 IWMMXT_BUILTIN2 (ashrv2si3_di
, WSRAW
)
18807 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt
, WSRAWI
)
18808 IWMMXT_BUILTIN2 (ashrdi3_di
, WSRAD
)
18809 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt
, WSRADI
)
18810 IWMMXT_BUILTIN2 (rorv4hi3_di
, WRORH
)
18811 IWMMXT_BUILTIN2 (rorv4hi3
, WRORHI
)
18812 IWMMXT_BUILTIN2 (rorv2si3_di
, WRORW
)
18813 IWMMXT_BUILTIN2 (rorv2si3
, WRORWI
)
18814 IWMMXT_BUILTIN2 (rordi3_di
, WRORD
)
18815 IWMMXT_BUILTIN2 (rordi3
, WRORDI
)
18816 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
18817 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
18820 static const struct builtin_description bdesc_1arg
[] =
18822 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
18823 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
18824 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
18825 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
18826 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
18827 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
18828 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
18829 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
18830 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
18831 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
18832 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
18833 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
18834 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
18835 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
18836 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
18837 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
18838 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
18839 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
18842 /* Set up all the iWMMXt builtins. This is not called if
18843 TARGET_IWMMXT is zero. */
18846 arm_init_iwmmxt_builtins (void)
18848 const struct builtin_description
* d
;
18851 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
18852 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
18853 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
18856 = build_function_type_list (integer_type_node
,
18857 integer_type_node
, NULL_TREE
);
18858 tree v8qi_ftype_v8qi_v8qi_int
18859 = build_function_type_list (V8QI_type_node
,
18860 V8QI_type_node
, V8QI_type_node
,
18861 integer_type_node
, NULL_TREE
);
18862 tree v4hi_ftype_v4hi_int
18863 = build_function_type_list (V4HI_type_node
,
18864 V4HI_type_node
, integer_type_node
, NULL_TREE
);
18865 tree v2si_ftype_v2si_int
18866 = build_function_type_list (V2SI_type_node
,
18867 V2SI_type_node
, integer_type_node
, NULL_TREE
);
18868 tree v2si_ftype_di_di
18869 = build_function_type_list (V2SI_type_node
,
18870 long_long_integer_type_node
,
18871 long_long_integer_type_node
,
18873 tree di_ftype_di_int
18874 = build_function_type_list (long_long_integer_type_node
,
18875 long_long_integer_type_node
,
18876 integer_type_node
, NULL_TREE
);
18877 tree di_ftype_di_int_int
18878 = build_function_type_list (long_long_integer_type_node
,
18879 long_long_integer_type_node
,
18881 integer_type_node
, NULL_TREE
);
18882 tree int_ftype_v8qi
18883 = build_function_type_list (integer_type_node
,
18884 V8QI_type_node
, NULL_TREE
);
18885 tree int_ftype_v4hi
18886 = build_function_type_list (integer_type_node
,
18887 V4HI_type_node
, NULL_TREE
);
18888 tree int_ftype_v2si
18889 = build_function_type_list (integer_type_node
,
18890 V2SI_type_node
, NULL_TREE
);
18891 tree int_ftype_v8qi_int
18892 = build_function_type_list (integer_type_node
,
18893 V8QI_type_node
, integer_type_node
, NULL_TREE
);
18894 tree int_ftype_v4hi_int
18895 = build_function_type_list (integer_type_node
,
18896 V4HI_type_node
, integer_type_node
, NULL_TREE
);
18897 tree int_ftype_v2si_int
18898 = build_function_type_list (integer_type_node
,
18899 V2SI_type_node
, integer_type_node
, NULL_TREE
);
18900 tree v8qi_ftype_v8qi_int_int
18901 = build_function_type_list (V8QI_type_node
,
18902 V8QI_type_node
, integer_type_node
,
18903 integer_type_node
, NULL_TREE
);
18904 tree v4hi_ftype_v4hi_int_int
18905 = build_function_type_list (V4HI_type_node
,
18906 V4HI_type_node
, integer_type_node
,
18907 integer_type_node
, NULL_TREE
);
18908 tree v2si_ftype_v2si_int_int
18909 = build_function_type_list (V2SI_type_node
,
18910 V2SI_type_node
, integer_type_node
,
18911 integer_type_node
, NULL_TREE
);
18912 /* Miscellaneous. */
18913 tree v8qi_ftype_v4hi_v4hi
18914 = build_function_type_list (V8QI_type_node
,
18915 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18916 tree v4hi_ftype_v2si_v2si
18917 = build_function_type_list (V4HI_type_node
,
18918 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18919 tree v2si_ftype_v4hi_v4hi
18920 = build_function_type_list (V2SI_type_node
,
18921 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
18922 tree v2si_ftype_v8qi_v8qi
18923 = build_function_type_list (V2SI_type_node
,
18924 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18925 tree v4hi_ftype_v4hi_di
18926 = build_function_type_list (V4HI_type_node
,
18927 V4HI_type_node
, long_long_integer_type_node
,
18929 tree v2si_ftype_v2si_di
18930 = build_function_type_list (V2SI_type_node
,
18931 V2SI_type_node
, long_long_integer_type_node
,
18933 tree void_ftype_int_int
18934 = build_function_type_list (void_type_node
,
18935 integer_type_node
, integer_type_node
,
18938 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
18940 = build_function_type_list (long_long_integer_type_node
,
18941 V8QI_type_node
, NULL_TREE
);
18943 = build_function_type_list (long_long_integer_type_node
,
18944 V4HI_type_node
, NULL_TREE
);
18946 = build_function_type_list (long_long_integer_type_node
,
18947 V2SI_type_node
, NULL_TREE
);
18948 tree v2si_ftype_v4hi
18949 = build_function_type_list (V2SI_type_node
,
18950 V4HI_type_node
, NULL_TREE
);
18951 tree v4hi_ftype_v8qi
18952 = build_function_type_list (V4HI_type_node
,
18953 V8QI_type_node
, NULL_TREE
);
18955 tree di_ftype_di_v4hi_v4hi
18956 = build_function_type_list (long_long_unsigned_type_node
,
18957 long_long_unsigned_type_node
,
18958 V4HI_type_node
, V4HI_type_node
,
18961 tree di_ftype_v4hi_v4hi
18962 = build_function_type_list (long_long_unsigned_type_node
,
18963 V4HI_type_node
,V4HI_type_node
,
18966 /* Normal vector binops. */
18967 tree v8qi_ftype_v8qi_v8qi
18968 = build_function_type_list (V8QI_type_node
,
18969 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
18970 tree v4hi_ftype_v4hi_v4hi
18971 = build_function_type_list (V4HI_type_node
,
18972 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
18973 tree v2si_ftype_v2si_v2si
18974 = build_function_type_list (V2SI_type_node
,
18975 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
18976 tree di_ftype_di_di
18977 = build_function_type_list (long_long_unsigned_type_node
,
18978 long_long_unsigned_type_node
,
18979 long_long_unsigned_type_node
,
18982 /* Add all builtins that are more or less simple operations on two
18984 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18986 /* Use one of the operands; the target can have a different mode for
18987 mask-generating compares. */
18988 enum machine_mode mode
;
18994 mode
= insn_data
[d
->icode
].operand
[1].mode
;
18999 type
= v8qi_ftype_v8qi_v8qi
;
19002 type
= v4hi_ftype_v4hi_v4hi
;
19005 type
= v2si_ftype_v2si_v2si
;
19008 type
= di_ftype_di_di
;
19012 gcc_unreachable ();
19015 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
19018 /* Add the remaining MMX insns with somewhat more complicated types. */
19019 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19020 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19021 ARM_BUILTIN_ ## CODE)
19023 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
19024 iwmmx_mbuiltin ("setwcx", void_ftype_int_int
, SETWCX
);
19025 iwmmx_mbuiltin ("getwcx", int_ftype_int
, GETWCX
);
19027 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
19028 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
19029 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
19030 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
19031 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
19032 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
19034 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
19035 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
19036 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
19037 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
19038 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
19039 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
19041 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
19042 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
19043 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
19044 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
19045 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
19046 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
19048 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
19049 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
19050 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
19051 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
19052 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
19053 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
19055 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
19057 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi
, WSADB
);
19058 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi
, WSADH
);
19059 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
19060 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
19062 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
19063 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
19064 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
19065 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
19066 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
19067 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
19068 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
19069 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
19070 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
19072 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
19073 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
19074 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
19076 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
19077 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
19078 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
19080 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
19081 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
19082 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
19083 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
19084 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
19085 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
19087 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
19088 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
19089 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
19090 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
19091 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
19092 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
19093 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
19094 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
19095 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
19096 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
19097 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
19098 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
19100 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
19101 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
19102 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
19103 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
19105 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGN
);
19106 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
19107 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
19108 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
19109 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
19110 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
19111 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
19113 #undef iwmmx_mbuiltin
19117 arm_init_tls_builtins (void)
19121 ftype
= build_function_type (ptr_type_node
, void_list_node
);
19122 decl
= add_builtin_function ("__builtin_thread_pointer", ftype
,
19123 ARM_BUILTIN_THREAD_POINTER
, BUILT_IN_MD
,
19125 TREE_NOTHROW (decl
) = 1;
19126 TREE_READONLY (decl
) = 1;
19127 arm_builtin_decls
[ARM_BUILTIN_THREAD_POINTER
] = decl
;
19131 arm_init_fp16_builtins (void)
19133 tree fp16_type
= make_node (REAL_TYPE
);
19134 TYPE_PRECISION (fp16_type
) = 16;
19135 layout_type (fp16_type
);
19136 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
19140 arm_init_builtins (void)
19142 arm_init_tls_builtins ();
19144 if (TARGET_REALLY_IWMMXT
)
19145 arm_init_iwmmxt_builtins ();
19148 arm_init_neon_builtins ();
19150 if (arm_fp16_format
)
19151 arm_init_fp16_builtins ();
19154 /* Return the ARM builtin for CODE. */
19157 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
19159 if (code
>= ARM_BUILTIN_MAX
)
19160 return error_mark_node
;
19162 return arm_builtin_decls
[code
];
19165 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19167 static const char *
19168 arm_invalid_parameter_type (const_tree t
)
19170 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19171 return N_("function parameters cannot have __fp16 type");
19175 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19177 static const char *
19178 arm_invalid_return_type (const_tree t
)
19180 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19181 return N_("functions cannot return __fp16 type");
19185 /* Implement TARGET_PROMOTED_TYPE. */
19188 arm_promoted_type (const_tree t
)
19190 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19191 return float_type_node
;
19195 /* Implement TARGET_CONVERT_TO_TYPE.
19196 Specifically, this hook implements the peculiarity of the ARM
19197 half-precision floating-point C semantics that requires conversions between
19198 __fp16 to or from double to do an intermediate conversion to float. */
19201 arm_convert_to_type (tree type
, tree expr
)
19203 tree fromtype
= TREE_TYPE (expr
);
19204 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
19206 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
19207 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
19208 return convert (type
, convert (float_type_node
, expr
));
19212 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19213 This simply adds HFmode as a supported mode; even though we don't
19214 implement arithmetic on this type directly, it's supported by
19215 optabs conversions, much the way the double-word arithmetic is
19216 special-cased in the default hook. */
19219 arm_scalar_mode_supported_p (enum machine_mode mode
)
19221 if (mode
== HFmode
)
19222 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
19224 return default_scalar_mode_supported_p (mode
);
19227 /* Errors in the source file can cause expand_expr to return const0_rtx
19228 where we expect a vector. To avoid crashing, use one of the vector
19229 clear instructions. */
19232 safe_vector_operand (rtx x
, enum machine_mode mode
)
19234 if (x
!= const0_rtx
)
19236 x
= gen_reg_rtx (mode
);
19238 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
19239 : gen_rtx_SUBREG (DImode
, x
, 0)));
19243 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19246 arm_expand_binop_builtin (enum insn_code icode
,
19247 tree exp
, rtx target
)
19250 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19251 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19252 rtx op0
= expand_normal (arg0
);
19253 rtx op1
= expand_normal (arg1
);
19254 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19255 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19256 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
19258 if (VECTOR_MODE_P (mode0
))
19259 op0
= safe_vector_operand (op0
, mode0
);
19260 if (VECTOR_MODE_P (mode1
))
19261 op1
= safe_vector_operand (op1
, mode1
);
19264 || GET_MODE (target
) != tmode
19265 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19266 target
= gen_reg_rtx (tmode
);
19268 gcc_assert (GET_MODE (op0
) == mode0
&& GET_MODE (op1
) == mode1
);
19270 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19271 op0
= copy_to_mode_reg (mode0
, op0
);
19272 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19273 op1
= copy_to_mode_reg (mode1
, op1
);
19275 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19282 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19285 arm_expand_unop_builtin (enum insn_code icode
,
19286 tree exp
, rtx target
, int do_load
)
19289 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19290 rtx op0
= expand_normal (arg0
);
19291 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19292 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19295 || GET_MODE (target
) != tmode
19296 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19297 target
= gen_reg_rtx (tmode
);
19299 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19302 if (VECTOR_MODE_P (mode0
))
19303 op0
= safe_vector_operand (op0
, mode0
);
19305 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19306 op0
= copy_to_mode_reg (mode0
, op0
);
19309 pat
= GEN_FCN (icode
) (target
, op0
);
19317 NEON_ARG_COPY_TO_REG
,
19323 #define NEON_MAX_BUILTIN_ARGS 5
19325 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
19326 and return an expression for the accessed memory.
19328 The intrinsic function operates on a block of registers that has
19329 mode REG_MODE. This block contains vectors of type TYPE_MODE.
19330 The function references the memory at EXP in mode MEM_MODE;
19331 this mode may be BLKmode if no more suitable mode is available. */
19334 neon_dereference_pointer (tree exp
, enum machine_mode mem_mode
,
19335 enum machine_mode reg_mode
,
19336 neon_builtin_type_mode type_mode
)
19338 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
19339 tree elem_type
, upper_bound
, array_type
;
19341 /* Work out the size of the register block in bytes. */
19342 reg_size
= GET_MODE_SIZE (reg_mode
);
19344 /* Work out the size of each vector in bytes. */
19345 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
19346 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
19348 /* Work out how many vectors there are. */
19349 gcc_assert (reg_size
% vector_size
== 0);
19350 nvectors
= reg_size
/ vector_size
;
19352 /* Work out how many elements are being loaded or stored.
19353 MEM_MODE == REG_MODE implies a one-to-one mapping between register
19354 and memory elements; anything else implies a lane load or store. */
19355 if (mem_mode
== reg_mode
)
19356 nelems
= vector_size
* nvectors
;
19360 /* Work out the type of each element. */
19361 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp
)));
19362 elem_type
= TREE_TYPE (TREE_TYPE (exp
));
19364 /* Create a type that describes the full access. */
19365 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
19366 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
19368 /* Dereference EXP using that type. */
19369 exp
= convert (build_pointer_type (array_type
), exp
);
19370 return fold_build2 (MEM_REF
, array_type
, exp
,
19371 build_int_cst (TREE_TYPE (exp
), 0));
19374 /* Expand a Neon builtin. */
19376 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
19377 neon_builtin_type_mode type_mode
,
19382 tree arg
[NEON_MAX_BUILTIN_ARGS
];
19383 rtx op
[NEON_MAX_BUILTIN_ARGS
];
19384 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19385 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
19386 enum machine_mode other_mode
;
19392 || GET_MODE (target
) != tmode
19393 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
19394 target
= gen_reg_rtx (tmode
);
19396 va_start (ap
, exp
);
19400 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
19402 if (thisarg
== NEON_ARG_STOP
)
19406 opno
= argc
+ have_retval
;
19407 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
19408 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
19409 if (thisarg
== NEON_ARG_MEMORY
)
19411 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
19412 arg
[argc
] = neon_dereference_pointer (arg
[argc
], mode
[argc
],
19413 other_mode
, type_mode
);
19415 op
[argc
] = expand_normal (arg
[argc
]);
19419 case NEON_ARG_COPY_TO_REG
:
19420 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19421 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
19422 (op
[argc
], mode
[argc
]))
19423 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
19426 case NEON_ARG_CONSTANT
:
19427 /* FIXME: This error message is somewhat unhelpful. */
19428 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
19429 (op
[argc
], mode
[argc
]))
19430 error ("argument must be a constant");
19433 case NEON_ARG_MEMORY
:
19434 gcc_assert (MEM_P (op
[argc
]));
19435 PUT_MODE (op
[argc
], mode
[argc
]);
19436 /* ??? arm_neon.h uses the same built-in functions for signed
19437 and unsigned accesses, casting where necessary. This isn't
19439 set_mem_alias_set (op
[argc
], 0);
19440 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
19441 (op
[argc
], mode
[argc
]))
19442 op
[argc
] = (replace_equiv_address
19443 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
19446 case NEON_ARG_STOP
:
19447 gcc_unreachable ();
19460 pat
= GEN_FCN (icode
) (target
, op
[0]);
19464 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
19468 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
19472 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
19476 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
19480 gcc_unreachable ();
19486 pat
= GEN_FCN (icode
) (op
[0]);
19490 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
19494 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
19498 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
19502 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
19506 gcc_unreachable ();
19517 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19518 constants defined per-instruction or per instruction-variant. Instead, the
19519 required info is looked up in the table neon_builtin_data. */
19521 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
19523 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
19524 neon_itype itype
= d
->itype
;
19525 enum insn_code icode
= d
->code
;
19526 neon_builtin_type_mode type_mode
= d
->mode
;
19533 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19534 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19538 case NEON_SCALARMUL
:
19539 case NEON_SCALARMULL
:
19540 case NEON_SCALARMULH
:
19541 case NEON_SHIFTINSERT
:
19542 case NEON_LOGICBINOP
:
19543 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19544 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19548 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19549 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19550 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19554 case NEON_SHIFTIMM
:
19555 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19556 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
19560 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19561 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19565 case NEON_REINTERP
:
19566 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19567 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19571 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19572 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19574 case NEON_RESULTPAIR
:
19575 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
,
19576 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19580 case NEON_LANEMULL
:
19581 case NEON_LANEMULH
:
19582 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19583 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19584 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19587 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19588 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19589 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19591 case NEON_SHIFTACC
:
19592 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19593 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19594 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19596 case NEON_SCALARMAC
:
19597 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19598 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19599 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19603 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19604 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19608 case NEON_LOADSTRUCT
:
19609 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19610 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
19612 case NEON_LOAD1LANE
:
19613 case NEON_LOADSTRUCTLANE
:
19614 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19615 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19619 case NEON_STORESTRUCT
:
19620 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
,
19621 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19623 case NEON_STORE1LANE
:
19624 case NEON_STORESTRUCTLANE
:
19625 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
,
19626 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19630 gcc_unreachable ();
19633 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19635 neon_reinterpret (rtx dest
, rtx src
)
19637 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
19640 /* Emit code to place a Neon pair result in memory locations (with equal
19643 neon_emit_pair_result_insn (enum machine_mode mode
,
19644 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
19647 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
19648 rtx tmp1
= gen_reg_rtx (mode
);
19649 rtx tmp2
= gen_reg_rtx (mode
);
19651 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
19653 emit_move_insn (mem
, tmp1
);
19654 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
19655 emit_move_insn (mem
, tmp2
);
19658 /* Set up operands for a register copy from src to dest, taking care not to
19659 clobber registers in the process.
19660 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19661 be called with a large N, so that should be OK. */
19664 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
19666 unsigned int copied
= 0, opctr
= 0;
19667 unsigned int done
= (1 << count
) - 1;
19670 while (copied
!= done
)
19672 for (i
= 0; i
< count
; i
++)
19676 for (j
= 0; good
&& j
< count
; j
++)
19677 if (i
!= j
&& (copied
& (1 << j
)) == 0
19678 && reg_overlap_mentioned_p (src
[j
], dest
[i
]))
19683 operands
[opctr
++] = dest
[i
];
19684 operands
[opctr
++] = src
[i
];
19690 gcc_assert (opctr
== count
* 2);
19693 /* Expand an expression EXP that calls a built-in function,
19694 with result going to TARGET if that's convenient
19695 (and in mode MODE if that's convenient).
19696 SUBTARGET may be used as the target for computing one of EXP's operands.
19697 IGNORE is nonzero if the value is to be ignored. */
19700 arm_expand_builtin (tree exp
,
19702 rtx subtarget ATTRIBUTE_UNUSED
,
19703 enum machine_mode mode ATTRIBUTE_UNUSED
,
19704 int ignore ATTRIBUTE_UNUSED
)
19706 const struct builtin_description
* d
;
19707 enum insn_code icode
;
19708 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
19716 int fcode
= DECL_FUNCTION_CODE (fndecl
);
19718 enum machine_mode tmode
;
19719 enum machine_mode mode0
;
19720 enum machine_mode mode1
;
19721 enum machine_mode mode2
;
19723 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
19724 return arm_expand_neon_builtin (fcode
, exp
, target
);
19728 case ARM_BUILTIN_TEXTRMSB
:
19729 case ARM_BUILTIN_TEXTRMUB
:
19730 case ARM_BUILTIN_TEXTRMSH
:
19731 case ARM_BUILTIN_TEXTRMUH
:
19732 case ARM_BUILTIN_TEXTRMSW
:
19733 case ARM_BUILTIN_TEXTRMUW
:
19734 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
19735 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
19736 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
19737 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
19738 : CODE_FOR_iwmmxt_textrmw
);
19740 arg0
= CALL_EXPR_ARG (exp
, 0);
19741 arg1
= CALL_EXPR_ARG (exp
, 1);
19742 op0
= expand_normal (arg0
);
19743 op1
= expand_normal (arg1
);
19744 tmode
= insn_data
[icode
].operand
[0].mode
;
19745 mode0
= insn_data
[icode
].operand
[1].mode
;
19746 mode1
= insn_data
[icode
].operand
[2].mode
;
19748 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19749 op0
= copy_to_mode_reg (mode0
, op0
);
19750 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19752 /* @@@ better error message */
19753 error ("selector must be an immediate");
19754 return gen_reg_rtx (tmode
);
19757 || GET_MODE (target
) != tmode
19758 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19759 target
= gen_reg_rtx (tmode
);
19760 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19766 case ARM_BUILTIN_TINSRB
:
19767 case ARM_BUILTIN_TINSRH
:
19768 case ARM_BUILTIN_TINSRW
:
19769 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
19770 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
19771 : CODE_FOR_iwmmxt_tinsrw
);
19772 arg0
= CALL_EXPR_ARG (exp
, 0);
19773 arg1
= CALL_EXPR_ARG (exp
, 1);
19774 arg2
= CALL_EXPR_ARG (exp
, 2);
19775 op0
= expand_normal (arg0
);
19776 op1
= expand_normal (arg1
);
19777 op2
= expand_normal (arg2
);
19778 tmode
= insn_data
[icode
].operand
[0].mode
;
19779 mode0
= insn_data
[icode
].operand
[1].mode
;
19780 mode1
= insn_data
[icode
].operand
[2].mode
;
19781 mode2
= insn_data
[icode
].operand
[3].mode
;
19783 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19784 op0
= copy_to_mode_reg (mode0
, op0
);
19785 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19786 op1
= copy_to_mode_reg (mode1
, op1
);
19787 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
19789 /* @@@ better error message */
19790 error ("selector must be an immediate");
19794 || GET_MODE (target
) != tmode
19795 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19796 target
= gen_reg_rtx (tmode
);
19797 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19803 case ARM_BUILTIN_SETWCX
:
19804 arg0
= CALL_EXPR_ARG (exp
, 0);
19805 arg1
= CALL_EXPR_ARG (exp
, 1);
19806 op0
= force_reg (SImode
, expand_normal (arg0
));
19807 op1
= expand_normal (arg1
);
19808 emit_insn (gen_iwmmxt_tmcr (op1
, op0
));
19811 case ARM_BUILTIN_GETWCX
:
19812 arg0
= CALL_EXPR_ARG (exp
, 0);
19813 op0
= expand_normal (arg0
);
19814 target
= gen_reg_rtx (SImode
);
19815 emit_insn (gen_iwmmxt_tmrc (target
, op0
));
19818 case ARM_BUILTIN_WSHUFH
:
19819 icode
= CODE_FOR_iwmmxt_wshufh
;
19820 arg0
= CALL_EXPR_ARG (exp
, 0);
19821 arg1
= CALL_EXPR_ARG (exp
, 1);
19822 op0
= expand_normal (arg0
);
19823 op1
= expand_normal (arg1
);
19824 tmode
= insn_data
[icode
].operand
[0].mode
;
19825 mode1
= insn_data
[icode
].operand
[1].mode
;
19826 mode2
= insn_data
[icode
].operand
[2].mode
;
19828 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19829 op0
= copy_to_mode_reg (mode1
, op0
);
19830 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19832 /* @@@ better error message */
19833 error ("mask must be an immediate");
19837 || GET_MODE (target
) != tmode
19838 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19839 target
= gen_reg_rtx (tmode
);
19840 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19846 case ARM_BUILTIN_WSADB
:
19847 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb
, exp
, target
);
19848 case ARM_BUILTIN_WSADH
:
19849 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh
, exp
, target
);
19850 case ARM_BUILTIN_WSADBZ
:
19851 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
19852 case ARM_BUILTIN_WSADHZ
:
19853 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
19855 /* Several three-argument builtins. */
19856 case ARM_BUILTIN_WMACS
:
19857 case ARM_BUILTIN_WMACU
:
19858 case ARM_BUILTIN_WALIGN
:
19859 case ARM_BUILTIN_TMIA
:
19860 case ARM_BUILTIN_TMIAPH
:
19861 case ARM_BUILTIN_TMIATT
:
19862 case ARM_BUILTIN_TMIATB
:
19863 case ARM_BUILTIN_TMIABT
:
19864 case ARM_BUILTIN_TMIABB
:
19865 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
19866 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
19867 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
19868 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
19869 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
19870 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
19871 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
19872 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
19873 : CODE_FOR_iwmmxt_walign
);
19874 arg0
= CALL_EXPR_ARG (exp
, 0);
19875 arg1
= CALL_EXPR_ARG (exp
, 1);
19876 arg2
= CALL_EXPR_ARG (exp
, 2);
19877 op0
= expand_normal (arg0
);
19878 op1
= expand_normal (arg1
);
19879 op2
= expand_normal (arg2
);
19880 tmode
= insn_data
[icode
].operand
[0].mode
;
19881 mode0
= insn_data
[icode
].operand
[1].mode
;
19882 mode1
= insn_data
[icode
].operand
[2].mode
;
19883 mode2
= insn_data
[icode
].operand
[3].mode
;
19885 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19886 op0
= copy_to_mode_reg (mode0
, op0
);
19887 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19888 op1
= copy_to_mode_reg (mode1
, op1
);
19889 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
19890 op2
= copy_to_mode_reg (mode2
, op2
);
19892 || GET_MODE (target
) != tmode
19893 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19894 target
= gen_reg_rtx (tmode
);
19895 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19901 case ARM_BUILTIN_WZERO
:
19902 target
= gen_reg_rtx (DImode
);
19903 emit_insn (gen_iwmmxt_clrdi (target
));
19906 case ARM_BUILTIN_THREAD_POINTER
:
19907 return arm_load_tp (target
);
19913 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19914 if (d
->code
== (const enum arm_builtins
) fcode
)
19915 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
19917 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19918 if (d
->code
== (const enum arm_builtins
) fcode
)
19919 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
19921 /* @@@ Should really do something sensible here. */
19925 /* Return the number (counting from 0) of
19926 the least significant set bit in MASK. */
19929 number_of_first_bit_set (unsigned mask
)
19934 (mask
& (1 << bit
)) == 0;
19941 /* Emit code to push or pop registers to or from the stack. F is the
19942 assembly file. MASK is the registers to push or pop. PUSH is
19943 nonzero if we should push, and zero if we should pop. For debugging
19944 output, if pushing, adjust CFA_OFFSET by the amount of space added
19945 to the stack. REAL_REGS should have the same number of bits set as
19946 MASK, and will be used instead (in the same order) to describe which
19947 registers were saved - this is used to mark the save slots when we
19948 push high registers after moving them to low registers. */
19950 thumb_pushpop (FILE *f
, unsigned long mask
, int push
, int *cfa_offset
,
19951 unsigned long real_regs
)
19954 int lo_mask
= mask
& 0xFF;
19955 int pushed_words
= 0;
19959 if (lo_mask
== 0 && !push
&& (mask
& (1 << PC_REGNUM
)))
19961 /* Special case. Do not generate a POP PC statement here, do it in
19963 thumb_exit (f
, -1);
19967 if (push
&& arm_except_unwind_info (&global_options
) == UI_TARGET
)
19969 fprintf (f
, "\t.save\t{");
19970 for (regno
= 0; regno
< 15; regno
++)
19972 if (real_regs
& (1 << regno
))
19974 if (real_regs
& ((1 << regno
) -1))
19976 asm_fprintf (f
, "%r", regno
);
19979 fprintf (f
, "}\n");
19982 fprintf (f
, "\t%s\t{", push
? "push" : "pop");
19984 /* Look at the low registers first. */
19985 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
19989 asm_fprintf (f
, "%r", regno
);
19991 if ((lo_mask
& ~1) != 0)
19998 if (push
&& (mask
& (1 << LR_REGNUM
)))
20000 /* Catch pushing the LR. */
20004 asm_fprintf (f
, "%r", LR_REGNUM
);
20008 else if (!push
&& (mask
& (1 << PC_REGNUM
)))
20010 /* Catch popping the PC. */
20011 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
20012 || crtl
->calls_eh_return
)
20014 /* The PC is never poped directly, instead
20015 it is popped into r3 and then BX is used. */
20016 fprintf (f
, "}\n");
20018 thumb_exit (f
, -1);
20027 asm_fprintf (f
, "%r", PC_REGNUM
);
20031 fprintf (f
, "}\n");
20033 if (push
&& pushed_words
&& dwarf2out_do_frame ())
20035 char *l
= dwarf2out_cfi_label (false);
20036 int pushed_mask
= real_regs
;
20038 *cfa_offset
+= pushed_words
* 4;
20039 dwarf2out_def_cfa (l
, SP_REGNUM
, *cfa_offset
);
20042 pushed_mask
= real_regs
;
20043 for (regno
= 0; regno
<= 14; regno
++, pushed_mask
>>= 1)
20045 if (pushed_mask
& 1)
20046 dwarf2out_reg_save (l
, regno
, 4 * pushed_words
++ - *cfa_offset
);
20051 /* Generate code to return from a thumb function.
20052 If 'reg_containing_return_addr' is -1, then the return address is
20053 actually on the stack, at the stack pointer. */
20055 thumb_exit (FILE *f
, int reg_containing_return_addr
)
20057 unsigned regs_available_for_popping
;
20058 unsigned regs_to_pop
;
20060 unsigned available
;
20064 int restore_a4
= FALSE
;
20066 /* Compute the registers we need to pop. */
20070 if (reg_containing_return_addr
== -1)
20072 regs_to_pop
|= 1 << LR_REGNUM
;
20076 if (TARGET_BACKTRACE
)
20078 /* Restore the (ARM) frame pointer and stack pointer. */
20079 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
20083 /* If there is nothing to pop then just emit the BX instruction and
20085 if (pops_needed
== 0)
20087 if (crtl
->calls_eh_return
)
20088 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
20090 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
20093 /* Otherwise if we are not supporting interworking and we have not created
20094 a backtrace structure and the function was not entered in ARM mode then
20095 just pop the return address straight into the PC. */
20096 else if (!TARGET_INTERWORK
20097 && !TARGET_BACKTRACE
20098 && !is_called_in_ARM_mode (current_function_decl
)
20099 && !crtl
->calls_eh_return
)
20101 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
20105 /* Find out how many of the (return) argument registers we can corrupt. */
20106 regs_available_for_popping
= 0;
20108 /* If returning via __builtin_eh_return, the bottom three registers
20109 all contain information needed for the return. */
20110 if (crtl
->calls_eh_return
)
20114 /* If we can deduce the registers used from the function's
20115 return value. This is more reliable that examining
20116 df_regs_ever_live_p () because that will be set if the register is
20117 ever used in the function, not just if the register is used
20118 to hold a return value. */
20120 if (crtl
->return_rtx
!= 0)
20121 mode
= GET_MODE (crtl
->return_rtx
);
20123 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20125 size
= GET_MODE_SIZE (mode
);
20129 /* In a void function we can use any argument register.
20130 In a function that returns a structure on the stack
20131 we can use the second and third argument registers. */
20132 if (mode
== VOIDmode
)
20133 regs_available_for_popping
=
20134 (1 << ARG_REGISTER (1))
20135 | (1 << ARG_REGISTER (2))
20136 | (1 << ARG_REGISTER (3));
20138 regs_available_for_popping
=
20139 (1 << ARG_REGISTER (2))
20140 | (1 << ARG_REGISTER (3));
20142 else if (size
<= 4)
20143 regs_available_for_popping
=
20144 (1 << ARG_REGISTER (2))
20145 | (1 << ARG_REGISTER (3));
20146 else if (size
<= 8)
20147 regs_available_for_popping
=
20148 (1 << ARG_REGISTER (3));
20151 /* Match registers to be popped with registers into which we pop them. */
20152 for (available
= regs_available_for_popping
,
20153 required
= regs_to_pop
;
20154 required
!= 0 && available
!= 0;
20155 available
&= ~(available
& - available
),
20156 required
&= ~(required
& - required
))
20159 /* If we have any popping registers left over, remove them. */
20161 regs_available_for_popping
&= ~available
;
20163 /* Otherwise if we need another popping register we can use
20164 the fourth argument register. */
20165 else if (pops_needed
)
20167 /* If we have not found any free argument registers and
20168 reg a4 contains the return address, we must move it. */
20169 if (regs_available_for_popping
== 0
20170 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
20172 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
20173 reg_containing_return_addr
= LR_REGNUM
;
20175 else if (size
> 12)
20177 /* Register a4 is being used to hold part of the return value,
20178 but we have dire need of a free, low register. */
20181 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
20184 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
20186 /* The fourth argument register is available. */
20187 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
20193 /* Pop as many registers as we can. */
20194 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
20195 regs_available_for_popping
);
20197 /* Process the registers we popped. */
20198 if (reg_containing_return_addr
== -1)
20200 /* The return address was popped into the lowest numbered register. */
20201 regs_to_pop
&= ~(1 << LR_REGNUM
);
20203 reg_containing_return_addr
=
20204 number_of_first_bit_set (regs_available_for_popping
);
20206 /* Remove this register for the mask of available registers, so that
20207 the return address will not be corrupted by further pops. */
20208 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
20211 /* If we popped other registers then handle them here. */
20212 if (regs_available_for_popping
)
20216 /* Work out which register currently contains the frame pointer. */
20217 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
20219 /* Move it into the correct place. */
20220 asm_fprintf (f
, "\tmov\t%r, %r\n",
20221 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
20223 /* (Temporarily) remove it from the mask of popped registers. */
20224 regs_available_for_popping
&= ~(1 << frame_pointer
);
20225 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
20227 if (regs_available_for_popping
)
20231 /* We popped the stack pointer as well,
20232 find the register that contains it. */
20233 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
20235 /* Move it into the stack register. */
20236 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
20238 /* At this point we have popped all necessary registers, so
20239 do not worry about restoring regs_available_for_popping
20240 to its correct value:
20242 assert (pops_needed == 0)
20243 assert (regs_available_for_popping == (1 << frame_pointer))
20244 assert (regs_to_pop == (1 << STACK_POINTER)) */
20248 /* Since we have just move the popped value into the frame
20249 pointer, the popping register is available for reuse, and
20250 we know that we still have the stack pointer left to pop. */
20251 regs_available_for_popping
|= (1 << frame_pointer
);
20255 /* If we still have registers left on the stack, but we no longer have
20256 any registers into which we can pop them, then we must move the return
20257 address into the link register and make available the register that
20259 if (regs_available_for_popping
== 0 && pops_needed
> 0)
20261 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
20263 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
20264 reg_containing_return_addr
);
20266 reg_containing_return_addr
= LR_REGNUM
;
20269 /* If we have registers left on the stack then pop some more.
20270 We know that at most we will want to pop FP and SP. */
20271 if (pops_needed
> 0)
20276 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
20277 regs_available_for_popping
);
20279 /* We have popped either FP or SP.
20280 Move whichever one it is into the correct register. */
20281 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
20282 move_to
= number_of_first_bit_set (regs_to_pop
);
20284 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
20286 regs_to_pop
&= ~(1 << move_to
);
20291 /* If we still have not popped everything then we must have only
20292 had one register available to us and we are now popping the SP. */
20293 if (pops_needed
> 0)
20297 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
20298 regs_available_for_popping
);
20300 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
20302 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
20304 assert (regs_to_pop == (1 << STACK_POINTER))
20305 assert (pops_needed == 1)
20309 /* If necessary restore the a4 register. */
20312 if (reg_containing_return_addr
!= LR_REGNUM
)
20314 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
20315 reg_containing_return_addr
= LR_REGNUM
;
20318 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
20321 if (crtl
->calls_eh_return
)
20322 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
20324 /* Return to caller. */
20325 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
20328 /* Scan INSN just before assembler is output for it.
20329 For Thumb-1, we track the status of the condition codes; this
20330 information is used in the cbranchsi4_insn pattern. */
20332 thumb1_final_prescan_insn (rtx insn
)
20334 if (flag_print_asm_name
)
20335 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
20336 INSN_ADDRESSES (INSN_UID (insn
)));
20337 /* Don't overwrite the previous setter when we get to a cbranch. */
20338 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
20340 enum attr_conds conds
;
20342 if (cfun
->machine
->thumb1_cc_insn
)
20344 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
20345 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
20348 conds
= get_attr_conds (insn
);
20349 if (conds
== CONDS_SET
)
20351 rtx set
= single_set (insn
);
20352 cfun
->machine
->thumb1_cc_insn
= insn
;
20353 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
20354 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
20355 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
20356 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
20358 rtx src1
= XEXP (SET_SRC (set
), 1);
20359 if (src1
== const0_rtx
)
20360 cfun
->machine
->thumb1_cc_mode
= CCmode
;
20363 else if (conds
!= CONDS_NOCOND
)
20364 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
20369 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
20371 unsigned HOST_WIDE_INT mask
= 0xff;
20374 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
20375 if (val
== 0) /* XXX */
20378 for (i
= 0; i
< 25; i
++)
20379 if ((val
& (mask
<< i
)) == val
)
20385 /* Returns nonzero if the current function contains,
20386 or might contain a far jump. */
20388 thumb_far_jump_used_p (void)
20392 /* This test is only important for leaf functions. */
20393 /* assert (!leaf_function_p ()); */
20395 /* If we have already decided that far jumps may be used,
20396 do not bother checking again, and always return true even if
20397 it turns out that they are not being used. Once we have made
20398 the decision that far jumps are present (and that hence the link
20399 register will be pushed onto the stack) we cannot go back on it. */
20400 if (cfun
->machine
->far_jump_used
)
20403 /* If this function is not being called from the prologue/epilogue
20404 generation code then it must be being called from the
20405 INITIAL_ELIMINATION_OFFSET macro. */
20406 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
20408 /* In this case we know that we are being asked about the elimination
20409 of the arg pointer register. If that register is not being used,
20410 then there are no arguments on the stack, and we do not have to
20411 worry that a far jump might force the prologue to push the link
20412 register, changing the stack offsets. In this case we can just
20413 return false, since the presence of far jumps in the function will
20414 not affect stack offsets.
20416 If the arg pointer is live (or if it was live, but has now been
20417 eliminated and so set to dead) then we do have to test to see if
20418 the function might contain a far jump. This test can lead to some
20419 false negatives, since before reload is completed, then length of
20420 branch instructions is not known, so gcc defaults to returning their
20421 longest length, which in turn sets the far jump attribute to true.
20423 A false negative will not result in bad code being generated, but it
20424 will result in a needless push and pop of the link register. We
20425 hope that this does not occur too often.
20427 If we need doubleword stack alignment this could affect the other
20428 elimination offsets so we can't risk getting it wrong. */
20429 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
20430 cfun
->machine
->arg_pointer_live
= 1;
20431 else if (!cfun
->machine
->arg_pointer_live
)
20435 /* Check to see if the function contains a branch
20436 insn with the far jump attribute set. */
20437 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
20439 if (GET_CODE (insn
) == JUMP_INSN
20440 /* Ignore tablejump patterns. */
20441 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
20442 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
20443 && get_attr_far_jump (insn
) == FAR_JUMP_YES
20446 /* Record the fact that we have decided that
20447 the function does use far jumps. */
20448 cfun
->machine
->far_jump_used
= 1;
20456 /* Return nonzero if FUNC must be entered in ARM mode. */
20458 is_called_in_ARM_mode (tree func
)
20460 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
20462 /* Ignore the problem about functions whose address is taken. */
20463 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
20467 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
20473 /* Given the stack offsets and register mask in OFFSETS, decide how
20474 many additional registers to push instead of subtracting a constant
20475 from SP. For epilogues the principle is the same except we use pop.
20476 FOR_PROLOGUE indicates which we're generating. */
20478 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
20480 HOST_WIDE_INT amount
;
20481 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
20482 /* Extract a mask of the ones we can give to the Thumb's push/pop
20484 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
20485 /* Then count how many other high registers will need to be pushed. */
20486 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20487 int n_free
, reg_base
;
20489 if (!for_prologue
&& frame_pointer_needed
)
20490 amount
= offsets
->locals_base
- offsets
->saved_regs
;
20492 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20494 /* If the stack frame size is 512 exactly, we can save one load
20495 instruction, which should make this a win even when optimizing
20497 if (!optimize_size
&& amount
!= 512)
20500 /* Can't do this if there are high registers to push. */
20501 if (high_regs_pushed
!= 0)
20504 /* Shouldn't do it in the prologue if no registers would normally
20505 be pushed at all. In the epilogue, also allow it if we'll have
20506 a pop insn for the PC. */
20509 || TARGET_BACKTRACE
20510 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
20511 || TARGET_INTERWORK
20512 || crtl
->args
.pretend_args_size
!= 0))
20515 /* Don't do this if thumb_expand_prologue wants to emit instructions
20516 between the push and the stack frame allocation. */
20518 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20519 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
20526 reg_base
= arm_size_return_regs () / UNITS_PER_WORD
;
20527 live_regs_mask
>>= reg_base
;
20530 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
20531 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
20533 live_regs_mask
>>= 1;
20539 gcc_assert (amount
/ 4 * 4 == amount
);
20541 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
20542 return (amount
- 508) / 4;
20543 if (amount
<= n_free
* 4)
20548 /* The bits which aren't usefully expanded as rtl. */
20550 thumb_unexpanded_epilogue (void)
20552 arm_stack_offsets
*offsets
;
20554 unsigned long live_regs_mask
= 0;
20555 int high_regs_pushed
= 0;
20557 int had_to_push_lr
;
20560 if (cfun
->machine
->return_used_this_function
!= 0)
20563 if (IS_NAKED (arm_current_func_type ()))
20566 offsets
= arm_get_frame_offsets ();
20567 live_regs_mask
= offsets
->saved_regs_mask
;
20568 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20570 /* If we can deduce the registers used from the function's return value.
20571 This is more reliable that examining df_regs_ever_live_p () because that
20572 will be set if the register is ever used in the function, not just if
20573 the register is used to hold a return value. */
20574 size
= arm_size_return_regs ();
20576 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
20579 unsigned long extra_mask
= (1 << extra_pop
) - 1;
20580 live_regs_mask
|= extra_mask
<< (size
/ UNITS_PER_WORD
);
20583 /* The prolog may have pushed some high registers to use as
20584 work registers. e.g. the testsuite file:
20585 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20586 compiles to produce:
20587 push {r4, r5, r6, r7, lr}
20591 as part of the prolog. We have to undo that pushing here. */
20593 if (high_regs_pushed
)
20595 unsigned long mask
= live_regs_mask
& 0xff;
20598 /* The available low registers depend on the size of the value we are
20606 /* Oh dear! We have no low registers into which we can pop
20609 ("no low registers available for popping high registers");
20611 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
20612 if (live_regs_mask
& (1 << next_hi_reg
))
20615 while (high_regs_pushed
)
20617 /* Find lo register(s) into which the high register(s) can
20619 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
20621 if (mask
& (1 << regno
))
20622 high_regs_pushed
--;
20623 if (high_regs_pushed
== 0)
20627 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
20629 /* Pop the values into the low register(s). */
20630 thumb_pushpop (asm_out_file
, mask
, 0, NULL
, mask
);
20632 /* Move the value(s) into the high registers. */
20633 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
20635 if (mask
& (1 << regno
))
20637 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
20640 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
20641 if (live_regs_mask
& (1 << next_hi_reg
))
20646 live_regs_mask
&= ~0x0f00;
20649 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
20650 live_regs_mask
&= 0xff;
20652 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
20654 /* Pop the return address into the PC. */
20655 if (had_to_push_lr
)
20656 live_regs_mask
|= 1 << PC_REGNUM
;
20658 /* Either no argument registers were pushed or a backtrace
20659 structure was created which includes an adjusted stack
20660 pointer, so just pop everything. */
20661 if (live_regs_mask
)
20662 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
20665 /* We have either just popped the return address into the
20666 PC or it is was kept in LR for the entire function.
20667 Note that thumb_pushpop has already called thumb_exit if the
20668 PC was in the list. */
20669 if (!had_to_push_lr
)
20670 thumb_exit (asm_out_file
, LR_REGNUM
);
20674 /* Pop everything but the return address. */
20675 if (live_regs_mask
)
20676 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
20679 if (had_to_push_lr
)
20683 /* We have no free low regs, so save one. */
20684 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
20688 /* Get the return address into a temporary register. */
20689 thumb_pushpop (asm_out_file
, 1 << LAST_ARG_REGNUM
, 0, NULL
,
20690 1 << LAST_ARG_REGNUM
);
20694 /* Move the return address to lr. */
20695 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
20697 /* Restore the low register. */
20698 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
20703 regno
= LAST_ARG_REGNUM
;
20708 /* Remove the argument registers that were pushed onto the stack. */
20709 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
20710 SP_REGNUM
, SP_REGNUM
,
20711 crtl
->args
.pretend_args_size
);
20713 thumb_exit (asm_out_file
, regno
);
20719 /* Functions to save and restore machine-specific function data. */
20720 static struct machine_function
*
20721 arm_init_machine_status (void)
20723 struct machine_function
*machine
;
20724 machine
= ggc_alloc_cleared_machine_function ();
20726 #if ARM_FT_UNKNOWN != 0
20727 machine
->func_type
= ARM_FT_UNKNOWN
;
20732 /* Return an RTX indicating where the return address to the
20733 calling function can be found. */
20735 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
20740 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
20743 /* Do anything needed before RTL is emitted for each function. */
20745 arm_init_expanders (void)
20747 /* Arrange to initialize and mark the machine per-function status. */
20748 init_machine_status
= arm_init_machine_status
;
20750 /* This is to stop the combine pass optimizing away the alignment
20751 adjustment of va_arg. */
20752 /* ??? It is claimed that this should not be necessary. */
20754 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
20758 /* Like arm_compute_initial_elimination offset. Simpler because there
20759 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20760 to point at the base of the local variables after static stack
20761 space for a function has been allocated. */
20764 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20766 arm_stack_offsets
*offsets
;
20768 offsets
= arm_get_frame_offsets ();
20772 case ARG_POINTER_REGNUM
:
20775 case STACK_POINTER_REGNUM
:
20776 return offsets
->outgoing_args
- offsets
->saved_args
;
20778 case FRAME_POINTER_REGNUM
:
20779 return offsets
->soft_frame
- offsets
->saved_args
;
20781 case ARM_HARD_FRAME_POINTER_REGNUM
:
20782 return offsets
->saved_regs
- offsets
->saved_args
;
20784 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20785 return offsets
->locals_base
- offsets
->saved_args
;
20788 gcc_unreachable ();
20792 case FRAME_POINTER_REGNUM
:
20795 case STACK_POINTER_REGNUM
:
20796 return offsets
->outgoing_args
- offsets
->soft_frame
;
20798 case ARM_HARD_FRAME_POINTER_REGNUM
:
20799 return offsets
->saved_regs
- offsets
->soft_frame
;
20801 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20802 return offsets
->locals_base
- offsets
->soft_frame
;
20805 gcc_unreachable ();
20810 gcc_unreachable ();
20814 /* Generate the rest of a function's prologue. */
20816 thumb1_expand_prologue (void)
20820 HOST_WIDE_INT amount
;
20821 arm_stack_offsets
*offsets
;
20822 unsigned long func_type
;
20824 unsigned long live_regs_mask
;
20826 func_type
= arm_current_func_type ();
20828 /* Naked functions don't have prologues. */
20829 if (IS_NAKED (func_type
))
20832 if (IS_INTERRUPT (func_type
))
20834 error ("interrupt Service Routines cannot be coded in Thumb mode");
20838 offsets
= arm_get_frame_offsets ();
20839 live_regs_mask
= offsets
->saved_regs_mask
;
20840 /* Load the pic register before setting the frame pointer,
20841 so we can use r7 as a temporary work register. */
20842 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20843 arm_load_pic_register (live_regs_mask
);
20845 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
20846 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
20847 stack_pointer_rtx
);
20849 if (flag_stack_usage_info
)
20850 current_function_static_stack_size
20851 = offsets
->outgoing_args
- offsets
->saved_args
;
20853 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20854 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
20859 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20860 GEN_INT (- amount
)));
20861 RTX_FRAME_RELATED_P (insn
) = 1;
20867 /* The stack decrement is too big for an immediate value in a single
20868 insn. In theory we could issue multiple subtracts, but after
20869 three of them it becomes more space efficient to place the full
20870 value in the constant pool and load into a register. (Also the
20871 ARM debugger really likes to see only one stack decrement per
20872 function). So instead we look for a scratch register into which
20873 we can load the decrement, and then we subtract this from the
20874 stack pointer. Unfortunately on the thumb the only available
20875 scratch registers are the argument registers, and we cannot use
20876 these as they may hold arguments to the function. Instead we
20877 attempt to locate a call preserved register which is used by this
20878 function. If we can find one, then we know that it will have
20879 been pushed at the start of the prologue and so we can corrupt
20881 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
20882 if (live_regs_mask
& (1 << regno
))
20885 gcc_assert(regno
<= LAST_LO_REGNUM
);
20887 reg
= gen_rtx_REG (SImode
, regno
);
20889 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
20891 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
20892 stack_pointer_rtx
, reg
));
20893 RTX_FRAME_RELATED_P (insn
) = 1;
20894 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20895 plus_constant (stack_pointer_rtx
,
20897 RTX_FRAME_RELATED_P (dwarf
) = 1;
20898 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20902 if (frame_pointer_needed
)
20903 thumb_set_frame_pointer (offsets
);
20905 /* If we are profiling, make sure no instructions are scheduled before
20906 the call to mcount. Similarly if the user has requested no
20907 scheduling in the prolog. Similarly if we want non-call exceptions
20908 using the EABI unwinder, to prevent faulting instructions from being
20909 swapped with a stack adjustment. */
20910 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
20911 || (arm_except_unwind_info (&global_options
) == UI_TARGET
20912 && cfun
->can_throw_non_call_exceptions
))
20913 emit_insn (gen_blockage ());
20915 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
20916 if (live_regs_mask
& 0xff)
20917 cfun
->machine
->lr_save_eliminated
= 0;
20922 thumb1_expand_epilogue (void)
20924 HOST_WIDE_INT amount
;
20925 arm_stack_offsets
*offsets
;
20928 /* Naked functions don't have prologues. */
20929 if (IS_NAKED (arm_current_func_type ()))
20932 offsets
= arm_get_frame_offsets ();
20933 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20935 if (frame_pointer_needed
)
20937 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
20938 amount
= offsets
->locals_base
- offsets
->saved_regs
;
20940 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
20942 gcc_assert (amount
>= 0);
20946 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20947 GEN_INT (amount
)));
20950 /* r3 is always free in the epilogue. */
20951 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
20953 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
20954 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
20958 /* Emit a USE (stack_pointer_rtx), so that
20959 the stack adjustment will not be deleted. */
20960 emit_insn (gen_prologue_use (stack_pointer_rtx
));
20962 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
20963 emit_insn (gen_blockage ());
20965 /* Emit a clobber for each insn that will be restored in the epilogue,
20966 so that flow2 will get register lifetimes correct. */
20967 for (regno
= 0; regno
< 13; regno
++)
20968 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
20969 emit_clobber (gen_rtx_REG (SImode
, regno
));
20971 if (! df_regs_ever_live_p (LR_REGNUM
))
20972 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
20976 thumb1_output_function_prologue (FILE *f
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
20978 arm_stack_offsets
*offsets
;
20979 unsigned long live_regs_mask
= 0;
20980 unsigned long l_mask
;
20981 unsigned high_regs_pushed
= 0;
20982 int cfa_offset
= 0;
20985 if (IS_NAKED (arm_current_func_type ()))
20988 if (is_called_in_ARM_mode (current_function_decl
))
20992 gcc_assert (GET_CODE (DECL_RTL (current_function_decl
)) == MEM
);
20993 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
20995 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
20997 /* Generate code sequence to switch us into Thumb mode. */
20998 /* The .code 32 directive has already been emitted by
20999 ASM_DECLARE_FUNCTION_NAME. */
21000 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
21001 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
21003 /* Generate a label, so that the debugger will notice the
21004 change in instruction sets. This label is also used by
21005 the assembler to bypass the ARM code when this function
21006 is called from a Thumb encoded function elsewhere in the
21007 same file. Hence the definition of STUB_NAME here must
21008 agree with the definition in gas/config/tc-arm.c. */
21010 #define STUB_NAME ".real_start_of"
21012 fprintf (f
, "\t.code\t16\n");
21014 if (arm_dllexport_name_p (name
))
21015 name
= arm_strip_name_encoding (name
);
21017 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
21018 fprintf (f
, "\t.thumb_func\n");
21019 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
21022 if (crtl
->args
.pretend_args_size
)
21024 /* Output unwind directive for the stack adjustment. */
21025 if (arm_except_unwind_info (&global_options
) == UI_TARGET
)
21026 fprintf (f
, "\t.pad #%d\n",
21027 crtl
->args
.pretend_args_size
);
21029 if (cfun
->machine
->uses_anonymous_args
)
21033 fprintf (f
, "\tpush\t{");
21035 num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
21037 for (regno
= LAST_ARG_REGNUM
+ 1 - num_pushes
;
21038 regno
<= LAST_ARG_REGNUM
;
21040 asm_fprintf (f
, "%r%s", regno
,
21041 regno
== LAST_ARG_REGNUM
? "" : ", ");
21043 fprintf (f
, "}\n");
21046 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n",
21047 SP_REGNUM
, SP_REGNUM
,
21048 crtl
->args
.pretend_args_size
);
21050 /* We don't need to record the stores for unwinding (would it
21051 help the debugger any if we did?), but record the change in
21052 the stack pointer. */
21053 if (dwarf2out_do_frame ())
21055 char *l
= dwarf2out_cfi_label (false);
21057 cfa_offset
= cfa_offset
+ crtl
->args
.pretend_args_size
;
21058 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
21062 /* Get the registers we are going to push. */
21063 offsets
= arm_get_frame_offsets ();
21064 live_regs_mask
= offsets
->saved_regs_mask
;
21065 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21066 l_mask
= live_regs_mask
& 0x40ff;
21067 /* Then count how many other high registers will need to be pushed. */
21068 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
21070 if (TARGET_BACKTRACE
)
21073 unsigned work_register
;
21075 /* We have been asked to create a stack backtrace structure.
21076 The code looks like this:
21080 0 sub SP, #16 Reserve space for 4 registers.
21081 2 push {R7} Push low registers.
21082 4 add R7, SP, #20 Get the stack pointer before the push.
21083 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
21084 8 mov R7, PC Get hold of the start of this code plus 12.
21085 10 str R7, [SP, #16] Store it.
21086 12 mov R7, FP Get hold of the current frame pointer.
21087 14 str R7, [SP, #4] Store it.
21088 16 mov R7, LR Get hold of the current return address.
21089 18 str R7, [SP, #12] Store it.
21090 20 add R7, SP, #16 Point at the start of the backtrace structure.
21091 22 mov FP, R7 Put this value into the frame pointer. */
21093 work_register
= thumb_find_work_register (live_regs_mask
);
21095 if (arm_except_unwind_info (&global_options
) == UI_TARGET
)
21096 asm_fprintf (f
, "\t.pad #16\n");
21099 (f
, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
21100 SP_REGNUM
, SP_REGNUM
);
21102 if (dwarf2out_do_frame ())
21104 char *l
= dwarf2out_cfi_label (false);
21106 cfa_offset
= cfa_offset
+ 16;
21107 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
21112 thumb_pushpop (f
, l_mask
, 1, &cfa_offset
, l_mask
);
21113 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
21118 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
21119 offset
+ 16 + crtl
->args
.pretend_args_size
);
21121 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21124 /* Make sure that the instruction fetching the PC is in the right place
21125 to calculate "start of backtrace creation code + 12". */
21128 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
21129 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21131 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
21132 ARM_HARD_FRAME_POINTER_REGNUM
);
21133 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21138 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
21139 ARM_HARD_FRAME_POINTER_REGNUM
);
21140 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21142 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
21143 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21147 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, LR_REGNUM
);
21148 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21150 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
21152 asm_fprintf (f
, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21153 ARM_HARD_FRAME_POINTER_REGNUM
, work_register
);
21155 /* Optimization: If we are not pushing any low registers but we are going
21156 to push some high registers then delay our first push. This will just
21157 be a push of LR and we can combine it with the push of the first high
21159 else if ((l_mask
& 0xff) != 0
21160 || (high_regs_pushed
== 0 && l_mask
))
21162 unsigned long mask
= l_mask
;
21163 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
21164 thumb_pushpop (f
, mask
, 1, &cfa_offset
, mask
);
21167 if (high_regs_pushed
)
21169 unsigned pushable_regs
;
21170 unsigned next_hi_reg
;
21172 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
21173 if (live_regs_mask
& (1 << next_hi_reg
))
21176 pushable_regs
= l_mask
& 0xff;
21178 if (pushable_regs
== 0)
21179 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
21181 while (high_regs_pushed
> 0)
21183 unsigned long real_regs_mask
= 0;
21185 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
21187 if (pushable_regs
& (1 << regno
))
21189 asm_fprintf (f
, "\tmov\t%r, %r\n", regno
, next_hi_reg
);
21191 high_regs_pushed
--;
21192 real_regs_mask
|= (1 << next_hi_reg
);
21194 if (high_regs_pushed
)
21196 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
21198 if (live_regs_mask
& (1 << next_hi_reg
))
21203 pushable_regs
&= ~((1 << regno
) - 1);
21209 /* If we had to find a work register and we have not yet
21210 saved the LR then add it to the list of regs to push. */
21211 if (l_mask
== (1 << LR_REGNUM
))
21213 thumb_pushpop (f
, pushable_regs
| (1 << LR_REGNUM
),
21215 real_regs_mask
| (1 << LR_REGNUM
));
21219 thumb_pushpop (f
, pushable_regs
, 1, &cfa_offset
, real_regs_mask
);
21224 /* Handle the case of a double word load into a low register from
21225 a computed memory address. The computed address may involve a
21226 register which is overwritten by the load. */
21228 thumb_load_double_from_address (rtx
*operands
)
21236 gcc_assert (GET_CODE (operands
[0]) == REG
);
21237 gcc_assert (GET_CODE (operands
[1]) == MEM
);
21239 /* Get the memory address. */
21240 addr
= XEXP (operands
[1], 0);
21242 /* Work out how the memory address is computed. */
21243 switch (GET_CODE (addr
))
21246 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21248 if (REGNO (operands
[0]) == REGNO (addr
))
21250 output_asm_insn ("ldr\t%H0, %2", operands
);
21251 output_asm_insn ("ldr\t%0, %1", operands
);
21255 output_asm_insn ("ldr\t%0, %1", operands
);
21256 output_asm_insn ("ldr\t%H0, %2", operands
);
21261 /* Compute <address> + 4 for the high order load. */
21262 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21264 output_asm_insn ("ldr\t%0, %1", operands
);
21265 output_asm_insn ("ldr\t%H0, %2", operands
);
21269 arg1
= XEXP (addr
, 0);
21270 arg2
= XEXP (addr
, 1);
21272 if (CONSTANT_P (arg1
))
21273 base
= arg2
, offset
= arg1
;
21275 base
= arg1
, offset
= arg2
;
21277 gcc_assert (GET_CODE (base
) == REG
);
21279 /* Catch the case of <address> = <reg> + <reg> */
21280 if (GET_CODE (offset
) == REG
)
21282 int reg_offset
= REGNO (offset
);
21283 int reg_base
= REGNO (base
);
21284 int reg_dest
= REGNO (operands
[0]);
21286 /* Add the base and offset registers together into the
21287 higher destination register. */
21288 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
21289 reg_dest
+ 1, reg_base
, reg_offset
);
21291 /* Load the lower destination register from the address in
21292 the higher destination register. */
21293 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
21294 reg_dest
, reg_dest
+ 1);
21296 /* Load the higher destination register from its own address
21298 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
21299 reg_dest
+ 1, reg_dest
+ 1);
21303 /* Compute <address> + 4 for the high order load. */
21304 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21306 /* If the computed address is held in the low order register
21307 then load the high order register first, otherwise always
21308 load the low order register first. */
21309 if (REGNO (operands
[0]) == REGNO (base
))
21311 output_asm_insn ("ldr\t%H0, %2", operands
);
21312 output_asm_insn ("ldr\t%0, %1", operands
);
21316 output_asm_insn ("ldr\t%0, %1", operands
);
21317 output_asm_insn ("ldr\t%H0, %2", operands
);
21323 /* With no registers to worry about we can just load the value
21325 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21327 output_asm_insn ("ldr\t%H0, %2", operands
);
21328 output_asm_insn ("ldr\t%0, %1", operands
);
21332 gcc_unreachable ();
21339 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
21346 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21349 operands
[4] = operands
[5];
21352 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
21353 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
21357 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21360 operands
[4] = operands
[5];
21363 if (REGNO (operands
[5]) > REGNO (operands
[6]))
21366 operands
[5] = operands
[6];
21369 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21372 operands
[4] = operands
[5];
21376 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
21377 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
21381 gcc_unreachable ();
21387 /* Output a call-via instruction for thumb state. */
21389 thumb_call_via_reg (rtx reg
)
21391 int regno
= REGNO (reg
);
21394 gcc_assert (regno
< LR_REGNUM
);
21396 /* If we are in the normal text section we can use a single instance
21397 per compilation unit. If we are doing function sections, then we need
21398 an entry per section, since we can't rely on reachability. */
21399 if (in_section
== text_section
)
21401 thumb_call_reg_needed
= 1;
21403 if (thumb_call_via_label
[regno
] == NULL
)
21404 thumb_call_via_label
[regno
] = gen_label_rtx ();
21405 labelp
= thumb_call_via_label
+ regno
;
21409 if (cfun
->machine
->call_via
[regno
] == NULL
)
21410 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
21411 labelp
= cfun
->machine
->call_via
+ regno
;
21414 output_asm_insn ("bl\t%a0", labelp
);
21418 /* Routines for generating rtl. */
21420 thumb_expand_movmemqi (rtx
*operands
)
21422 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
21423 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
21424 HOST_WIDE_INT len
= INTVAL (operands
[2]);
21425 HOST_WIDE_INT offset
= 0;
21429 emit_insn (gen_movmem12b (out
, in
, out
, in
));
21435 emit_insn (gen_movmem8b (out
, in
, out
, in
));
21441 rtx reg
= gen_reg_rtx (SImode
);
21442 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
21443 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
21450 rtx reg
= gen_reg_rtx (HImode
);
21451 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
21452 plus_constant (in
, offset
))));
21453 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (out
, offset
)),
21461 rtx reg
= gen_reg_rtx (QImode
);
21462 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
21463 plus_constant (in
, offset
))));
21464 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (out
, offset
)),
21470 thumb_reload_out_hi (rtx
*operands
)
21472 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
21475 /* Handle reading a half-word from memory during reload. */
21477 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
21479 gcc_unreachable ();
21482 /* Return the length of a function name prefix
21483 that starts with the character 'c'. */
21485 arm_get_strip_length (int c
)
21489 ARM_NAME_ENCODING_LENGTHS
21494 /* Return a pointer to a function's name with any
21495 and all prefix encodings stripped from it. */
21497 arm_strip_name_encoding (const char *name
)
21501 while ((skip
= arm_get_strip_length (* name
)))
21507 /* If there is a '*' anywhere in the name's prefix, then
21508 emit the stripped name verbatim, otherwise prepend an
21509 underscore if leading underscores are being used. */
21511 arm_asm_output_labelref (FILE *stream
, const char *name
)
21516 while ((skip
= arm_get_strip_length (* name
)))
21518 verbatim
|= (*name
== '*');
21523 fputs (name
, stream
);
21525 asm_fprintf (stream
, "%U%s", name
);
21529 arm_file_start (void)
21533 if (TARGET_UNIFIED_ASM
)
21534 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
21538 const char *fpu_name
;
21539 if (arm_selected_arch
)
21540 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
21542 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_selected_cpu
->name
);
21544 if (TARGET_SOFT_FLOAT
)
21547 fpu_name
= "softvfp";
21549 fpu_name
= "softfpa";
21553 fpu_name
= arm_fpu_desc
->name
;
21554 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
21556 if (TARGET_HARD_FLOAT
)
21557 asm_fprintf (asm_out_file
, "\t.eabi_attribute 27, 3\n");
21558 if (TARGET_HARD_FLOAT_ABI
)
21559 asm_fprintf (asm_out_file
, "\t.eabi_attribute 28, 1\n");
21562 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
21564 /* Some of these attributes only apply when the corresponding features
21565 are used. However we don't have any easy way of figuring this out.
21566 Conservatively record the setting that would have been used. */
21568 /* Tag_ABI_FP_rounding. */
21569 if (flag_rounding_math
)
21570 asm_fprintf (asm_out_file
, "\t.eabi_attribute 19, 1\n");
21571 if (!flag_unsafe_math_optimizations
)
21573 /* Tag_ABI_FP_denomal. */
21574 asm_fprintf (asm_out_file
, "\t.eabi_attribute 20, 1\n");
21575 /* Tag_ABI_FP_exceptions. */
21576 asm_fprintf (asm_out_file
, "\t.eabi_attribute 21, 1\n");
21578 /* Tag_ABI_FP_user_exceptions. */
21579 if (flag_signaling_nans
)
21580 asm_fprintf (asm_out_file
, "\t.eabi_attribute 22, 1\n");
21581 /* Tag_ABI_FP_number_model. */
21582 asm_fprintf (asm_out_file
, "\t.eabi_attribute 23, %d\n",
21583 flag_finite_math_only
? 1 : 3);
21585 /* Tag_ABI_align8_needed. */
21586 asm_fprintf (asm_out_file
, "\t.eabi_attribute 24, 1\n");
21587 /* Tag_ABI_align8_preserved. */
21588 asm_fprintf (asm_out_file
, "\t.eabi_attribute 25, 1\n");
21589 /* Tag_ABI_enum_size. */
21590 asm_fprintf (asm_out_file
, "\t.eabi_attribute 26, %d\n",
21591 flag_short_enums
? 1 : 2);
21593 /* Tag_ABI_optimization_goals. */
21596 else if (optimize
>= 2)
21602 asm_fprintf (asm_out_file
, "\t.eabi_attribute 30, %d\n", val
);
21604 /* Tag_ABI_FP_16bit_format. */
21605 if (arm_fp16_format
)
21606 asm_fprintf (asm_out_file
, "\t.eabi_attribute 38, %d\n",
21607 (int)arm_fp16_format
);
21609 if (arm_lang_output_object_attributes_hook
)
21610 arm_lang_output_object_attributes_hook();
21612 default_file_start();
21616 arm_file_end (void)
21620 if (NEED_INDICATE_EXEC_STACK
)
21621 /* Add .note.GNU-stack. */
21622 file_end_indicate_exec_stack ();
21624 if (! thumb_call_reg_needed
)
21627 switch_to_section (text_section
);
21628 asm_fprintf (asm_out_file
, "\t.code 16\n");
21629 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
21631 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21633 rtx label
= thumb_call_via_label
[regno
];
21637 targetm
.asm_out
.internal_label (asm_out_file
, "L",
21638 CODE_LABEL_NUMBER (label
));
21639 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
21645 /* Symbols in the text segment can be accessed without indirecting via the
21646 constant pool; it may take an extra binary operation, but this is still
21647 faster than indirecting via memory. Don't do this when not optimizing,
21648 since we won't be calculating al of the offsets necessary to do this
21652 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
21654 if (optimize
> 0 && TREE_CONSTANT (decl
))
21655 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
21657 default_encode_section_info (decl
, rtl
, first
);
21659 #endif /* !ARM_PE */
21662 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
21664 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
21665 && !strcmp (prefix
, "L"))
21667 arm_ccfsm_state
= 0;
21668 arm_target_insn
= NULL
;
21670 default_internal_label (stream
, prefix
, labelno
);
21673 /* Output code to add DELTA to the first argument, and then jump
21674 to FUNCTION. Used for C++ multiple inheritance. */
21676 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
21677 HOST_WIDE_INT delta
,
21678 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
21681 static int thunk_label
= 0;
21684 int mi_delta
= delta
;
21685 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
21687 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
21690 mi_delta
= - mi_delta
;
21694 int labelno
= thunk_label
++;
21695 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
21696 /* Thunks are entered in arm mode when avaiable. */
21697 if (TARGET_THUMB1_ONLY
)
21699 /* push r3 so we can use it as a temporary. */
21700 /* TODO: Omit this save if r3 is not used. */
21701 fputs ("\tpush {r3}\n", file
);
21702 fputs ("\tldr\tr3, ", file
);
21706 fputs ("\tldr\tr12, ", file
);
21708 assemble_name (file
, label
);
21709 fputc ('\n', file
);
21712 /* If we are generating PIC, the ldr instruction below loads
21713 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21714 the address of the add + 8, so we have:
21716 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21719 Note that we have "+ 1" because some versions of GNU ld
21720 don't set the low bit of the result for R_ARM_REL32
21721 relocations against thumb function symbols.
21722 On ARMv6M this is +4, not +8. */
21723 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
21724 assemble_name (file
, labelpc
);
21725 fputs (":\n", file
);
21726 if (TARGET_THUMB1_ONLY
)
21728 /* This is 2 insns after the start of the thunk, so we know it
21729 is 4-byte aligned. */
21730 fputs ("\tadd\tr3, pc, r3\n", file
);
21731 fputs ("\tmov r12, r3\n", file
);
21734 fputs ("\tadd\tr12, pc, r12\n", file
);
21736 else if (TARGET_THUMB1_ONLY
)
21737 fputs ("\tmov r12, r3\n", file
);
21739 if (TARGET_THUMB1_ONLY
)
21741 if (mi_delta
> 255)
21743 fputs ("\tldr\tr3, ", file
);
21744 assemble_name (file
, label
);
21745 fputs ("+4\n", file
);
21746 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
21747 mi_op
, this_regno
, this_regno
);
21749 else if (mi_delta
!= 0)
21751 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
21752 mi_op
, this_regno
, this_regno
,
21758 /* TODO: Use movw/movt for large constants when available. */
21759 while (mi_delta
!= 0)
21761 if ((mi_delta
& (3 << shift
)) == 0)
21765 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
21766 mi_op
, this_regno
, this_regno
,
21767 mi_delta
& (0xff << shift
));
21768 mi_delta
&= ~(0xff << shift
);
21775 if (TARGET_THUMB1_ONLY
)
21776 fputs ("\tpop\t{r3}\n", file
);
21778 fprintf (file
, "\tbx\tr12\n");
21779 ASM_OUTPUT_ALIGN (file
, 2);
21780 assemble_name (file
, label
);
21781 fputs (":\n", file
);
21784 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21785 rtx tem
= XEXP (DECL_RTL (function
), 0);
21786 tem
= gen_rtx_PLUS (GET_MODE (tem
), tem
, GEN_INT (-7));
21787 tem
= gen_rtx_MINUS (GET_MODE (tem
),
21789 gen_rtx_SYMBOL_REF (Pmode
,
21790 ggc_strdup (labelpc
)));
21791 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
21794 /* Output ".word .LTHUNKn". */
21795 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
21797 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
21798 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
21802 fputs ("\tb\t", file
);
21803 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
21804 if (NEED_PLT_RELOC
)
21805 fputs ("(PLT)", file
);
21806 fputc ('\n', file
);
21811 arm_emit_vector_const (FILE *file
, rtx x
)
21814 const char * pattern
;
21816 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21818 switch (GET_MODE (x
))
21820 case V2SImode
: pattern
= "%08x"; break;
21821 case V4HImode
: pattern
= "%04x"; break;
21822 case V8QImode
: pattern
= "%02x"; break;
21823 default: gcc_unreachable ();
21826 fprintf (file
, "0x");
21827 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
21831 element
= CONST_VECTOR_ELT (x
, i
);
21832 fprintf (file
, pattern
, INTVAL (element
));
21838 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21839 HFmode constant pool entries are actually loaded with ldr. */
21841 arm_emit_fp16_const (rtx c
)
21846 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
21847 bits
= real_to_target (NULL
, &r
, HFmode
);
21848 if (WORDS_BIG_ENDIAN
)
21849 assemble_zeros (2);
21850 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
21851 if (!WORDS_BIG_ENDIAN
)
21852 assemble_zeros (2);
21856 arm_output_load_gr (rtx
*operands
)
21863 if (GET_CODE (operands
[1]) != MEM
21864 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
21865 || GET_CODE (reg
= XEXP (sum
, 0)) != REG
21866 || GET_CODE (offset
= XEXP (sum
, 1)) != CONST_INT
21867 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
21868 return "wldrw%?\t%0, %1";
21870 /* Fix up an out-of-range load of a GR register. */
21871 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
21872 wcgr
= operands
[0];
21874 output_asm_insn ("ldr%?\t%0, %1", operands
);
21876 operands
[0] = wcgr
;
21878 output_asm_insn ("tmcr%?\t%0, %1", operands
);
21879 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
21884 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21886 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21887 named arg and all anonymous args onto the stack.
21888 XXX I know the prologue shouldn't be pushing registers, but it is faster
21892 arm_setup_incoming_varargs (CUMULATIVE_ARGS
*pcum
,
21893 enum machine_mode mode
,
21896 int second_time ATTRIBUTE_UNUSED
)
21900 cfun
->machine
->uses_anonymous_args
= 1;
21901 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
21903 nregs
= pcum
->aapcs_ncrn
;
21904 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
21908 nregs
= pcum
->nregs
;
21910 if (nregs
< NUM_ARG_REGS
)
21911 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
21914 /* Return nonzero if the CONSUMER instruction (a store) does not need
21915 PRODUCER's value to calculate the address. */
21918 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
21920 rtx value
= PATTERN (producer
);
21921 rtx addr
= PATTERN (consumer
);
21923 if (GET_CODE (value
) == COND_EXEC
)
21924 value
= COND_EXEC_CODE (value
);
21925 if (GET_CODE (value
) == PARALLEL
)
21926 value
= XVECEXP (value
, 0, 0);
21927 value
= XEXP (value
, 0);
21928 if (GET_CODE (addr
) == COND_EXEC
)
21929 addr
= COND_EXEC_CODE (addr
);
21930 if (GET_CODE (addr
) == PARALLEL
)
21931 addr
= XVECEXP (addr
, 0, 0);
21932 addr
= XEXP (addr
, 0);
21934 return !reg_overlap_mentioned_p (value
, addr
);
21937 /* Return nonzero if the CONSUMER instruction (a store) does need
21938 PRODUCER's value to calculate the address. */
21941 arm_early_store_addr_dep (rtx producer
, rtx consumer
)
21943 return !arm_no_early_store_addr_dep (producer
, consumer
);
21946 /* Return nonzero if the CONSUMER instruction (a load) does need
21947 PRODUCER's value to calculate the address. */
21950 arm_early_load_addr_dep (rtx producer
, rtx consumer
)
21952 rtx value
= PATTERN (producer
);
21953 rtx addr
= PATTERN (consumer
);
21955 if (GET_CODE (value
) == COND_EXEC
)
21956 value
= COND_EXEC_CODE (value
);
21957 if (GET_CODE (value
) == PARALLEL
)
21958 value
= XVECEXP (value
, 0, 0);
21959 value
= XEXP (value
, 0);
21960 if (GET_CODE (addr
) == COND_EXEC
)
21961 addr
= COND_EXEC_CODE (addr
);
21962 if (GET_CODE (addr
) == PARALLEL
)
21963 addr
= XVECEXP (addr
, 0, 0);
21964 addr
= XEXP (addr
, 1);
21966 return reg_overlap_mentioned_p (value
, addr
);
21969 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21970 have an early register shift value or amount dependency on the
21971 result of PRODUCER. */
21974 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
21976 rtx value
= PATTERN (producer
);
21977 rtx op
= PATTERN (consumer
);
21980 if (GET_CODE (value
) == COND_EXEC
)
21981 value
= COND_EXEC_CODE (value
);
21982 if (GET_CODE (value
) == PARALLEL
)
21983 value
= XVECEXP (value
, 0, 0);
21984 value
= XEXP (value
, 0);
21985 if (GET_CODE (op
) == COND_EXEC
)
21986 op
= COND_EXEC_CODE (op
);
21987 if (GET_CODE (op
) == PARALLEL
)
21988 op
= XVECEXP (op
, 0, 0);
21991 early_op
= XEXP (op
, 0);
21992 /* This is either an actual independent shift, or a shift applied to
21993 the first operand of another operation. We want the whole shift
21995 if (GET_CODE (early_op
) == REG
)
21998 return !reg_overlap_mentioned_p (value
, early_op
);
22001 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22002 have an early register shift value dependency on the result of
22006 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
22008 rtx value
= PATTERN (producer
);
22009 rtx op
= PATTERN (consumer
);
22012 if (GET_CODE (value
) == COND_EXEC
)
22013 value
= COND_EXEC_CODE (value
);
22014 if (GET_CODE (value
) == PARALLEL
)
22015 value
= XVECEXP (value
, 0, 0);
22016 value
= XEXP (value
, 0);
22017 if (GET_CODE (op
) == COND_EXEC
)
22018 op
= COND_EXEC_CODE (op
);
22019 if (GET_CODE (op
) == PARALLEL
)
22020 op
= XVECEXP (op
, 0, 0);
22023 early_op
= XEXP (op
, 0);
22025 /* This is either an actual independent shift, or a shift applied to
22026 the first operand of another operation. We want the value being
22027 shifted, in either case. */
22028 if (GET_CODE (early_op
) != REG
)
22029 early_op
= XEXP (early_op
, 0);
22031 return !reg_overlap_mentioned_p (value
, early_op
);
22034 /* Return nonzero if the CONSUMER (a mul or mac op) does not
22035 have an early register mult dependency on the result of
22039 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
22041 rtx value
= PATTERN (producer
);
22042 rtx op
= PATTERN (consumer
);
22044 if (GET_CODE (value
) == COND_EXEC
)
22045 value
= COND_EXEC_CODE (value
);
22046 if (GET_CODE (value
) == PARALLEL
)
22047 value
= XVECEXP (value
, 0, 0);
22048 value
= XEXP (value
, 0);
22049 if (GET_CODE (op
) == COND_EXEC
)
22050 op
= COND_EXEC_CODE (op
);
22051 if (GET_CODE (op
) == PARALLEL
)
22052 op
= XVECEXP (op
, 0, 0);
22055 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
22057 if (GET_CODE (XEXP (op
, 0)) == MULT
)
22058 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
22060 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
22066 /* We can't rely on the caller doing the proper promotion when
22067 using APCS or ATPCS. */
22070 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
22072 return !TARGET_AAPCS_BASED
;
22075 static enum machine_mode
22076 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
22077 enum machine_mode mode
,
22078 int *punsignedp ATTRIBUTE_UNUSED
,
22079 const_tree fntype ATTRIBUTE_UNUSED
,
22080 int for_return ATTRIBUTE_UNUSED
)
22082 if (GET_MODE_CLASS (mode
) == MODE_INT
22083 && GET_MODE_SIZE (mode
) < 4)
22089 /* AAPCS based ABIs use short enums by default. */
22092 arm_default_short_enums (void)
22094 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
22098 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22101 arm_align_anon_bitfield (void)
22103 return TARGET_AAPCS_BASED
;
22107 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22110 arm_cxx_guard_type (void)
22112 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
22115 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22116 has an accumulator dependency on the result of the producer (a
22117 multiplication instruction) and no other dependency on that result. */
22119 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
22121 rtx mul
= PATTERN (producer
);
22122 rtx mac
= PATTERN (consumer
);
22124 rtx mac_op0
, mac_op1
, mac_acc
;
22126 if (GET_CODE (mul
) == COND_EXEC
)
22127 mul
= COND_EXEC_CODE (mul
);
22128 if (GET_CODE (mac
) == COND_EXEC
)
22129 mac
= COND_EXEC_CODE (mac
);
22131 /* Check that mul is of the form (set (...) (mult ...))
22132 and mla is of the form (set (...) (plus (mult ...) (...))). */
22133 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
22134 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
22135 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
22138 mul_result
= XEXP (mul
, 0);
22139 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
22140 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
22141 mac_acc
= XEXP (XEXP (mac
, 1), 1);
22143 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
22144 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
22145 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));
22149 /* The EABI says test the least significant bit of a guard variable. */
22152 arm_cxx_guard_mask_bit (void)
22154 return TARGET_AAPCS_BASED
;
22158 /* The EABI specifies that all array cookies are 8 bytes long. */
22161 arm_get_cookie_size (tree type
)
22165 if (!TARGET_AAPCS_BASED
)
22166 return default_cxx_get_cookie_size (type
);
22168 size
= build_int_cst (sizetype
, 8);
22173 /* The EABI says that array cookies should also contain the element size. */
22176 arm_cookie_has_size (void)
22178 return TARGET_AAPCS_BASED
;
22182 /* The EABI says constructors and destructors should return a pointer to
22183 the object constructed/destroyed. */
22186 arm_cxx_cdtor_returns_this (void)
22188 return TARGET_AAPCS_BASED
;
22191 /* The EABI says that an inline function may never be the key
22195 arm_cxx_key_method_may_be_inline (void)
22197 return !TARGET_AAPCS_BASED
;
22201 arm_cxx_determine_class_data_visibility (tree decl
)
22203 if (!TARGET_AAPCS_BASED
22204 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
22207 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22208 is exported. However, on systems without dynamic vague linkage,
22209 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22210 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
22211 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
22213 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
22214 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
22218 arm_cxx_class_data_always_comdat (void)
22220 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22221 vague linkage if the class has no key function. */
22222 return !TARGET_AAPCS_BASED
;
22226 /* The EABI says __aeabi_atexit should be used to register static
22230 arm_cxx_use_aeabi_atexit (void)
22232 return TARGET_AAPCS_BASED
;
22237 arm_set_return_address (rtx source
, rtx scratch
)
22239 arm_stack_offsets
*offsets
;
22240 HOST_WIDE_INT delta
;
22242 unsigned long saved_regs
;
22244 offsets
= arm_get_frame_offsets ();
22245 saved_regs
= offsets
->saved_regs_mask
;
22247 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
22248 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
22251 if (frame_pointer_needed
)
22252 addr
= plus_constant(hard_frame_pointer_rtx
, -4);
22255 /* LR will be the first saved register. */
22256 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
22261 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
22262 GEN_INT (delta
& ~4095)));
22267 addr
= stack_pointer_rtx
;
22269 addr
= plus_constant (addr
, delta
);
22271 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
22277 thumb_set_return_address (rtx source
, rtx scratch
)
22279 arm_stack_offsets
*offsets
;
22280 HOST_WIDE_INT delta
;
22281 HOST_WIDE_INT limit
;
22284 unsigned long mask
;
22288 offsets
= arm_get_frame_offsets ();
22289 mask
= offsets
->saved_regs_mask
;
22290 if (mask
& (1 << LR_REGNUM
))
22293 /* Find the saved regs. */
22294 if (frame_pointer_needed
)
22296 delta
= offsets
->soft_frame
- offsets
->saved_args
;
22297 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
22303 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
22306 /* Allow for the stack frame. */
22307 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
22309 /* The link register is always the first saved register. */
22312 /* Construct the address. */
22313 addr
= gen_rtx_REG (SImode
, reg
);
22316 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
22317 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
22321 addr
= plus_constant (addr
, delta
);
22323 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
22326 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
22329 /* Implements target hook vector_mode_supported_p. */
22331 arm_vector_mode_supported_p (enum machine_mode mode
)
22333 /* Neon also supports V2SImode, etc. listed in the clause below. */
22334 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
22335 || mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
22338 if ((TARGET_NEON
|| TARGET_IWMMXT
)
22339 && ((mode
== V2SImode
)
22340 || (mode
== V4HImode
)
22341 || (mode
== V8QImode
)))
22347 /* Implements target hook array_mode_supported_p. */
22350 arm_array_mode_supported_p (enum machine_mode mode
,
22351 unsigned HOST_WIDE_INT nelems
)
22354 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
22355 && (nelems
>= 2 && nelems
<= 4))
22361 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22362 registers when autovectorizing for Neon, at least until multiple vector
22363 widths are supported properly by the middle-end. */
22365 static enum machine_mode
22366 arm_preferred_simd_mode (enum machine_mode mode
)
22372 return TARGET_NEON_VECTORIZE_QUAD
? V4SFmode
: V2SFmode
;
22374 return TARGET_NEON_VECTORIZE_QUAD
? V4SImode
: V2SImode
;
22376 return TARGET_NEON_VECTORIZE_QUAD
? V8HImode
: V4HImode
;
22378 return TARGET_NEON_VECTORIZE_QUAD
? V16QImode
: V8QImode
;
22380 if (TARGET_NEON_VECTORIZE_QUAD
)
22387 if (TARGET_REALLY_IWMMXT
)
22403 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22405 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22406 using r0-r4 for function arguments, r7 for the stack frame and don't have
22407 enough left over to do doubleword arithmetic. For Thumb-2 all the
22408 potentially problematic instructions accept high registers so this is not
22409 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22410 that require many low registers. */
22412 arm_class_likely_spilled_p (reg_class_t rclass
)
22414 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
22415 || rclass
== CC_REG
)
22421 /* Implements target hook small_register_classes_for_mode_p. */
22423 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
22425 return TARGET_THUMB1
;
22428 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22429 ARM insns and therefore guarantee that the shift count is modulo 256.
22430 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22431 guarantee no particular behavior for out-of-range counts. */
22433 static unsigned HOST_WIDE_INT
22434 arm_shift_truncation_mask (enum machine_mode mode
)
22436 return mode
== SImode
? 255 : 0;
22440 /* Map internal gcc register numbers to DWARF2 register numbers. */
22443 arm_dbx_register_number (unsigned int regno
)
22448 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22449 compatibility. The EABI defines them as registers 96-103. */
22450 if (IS_FPA_REGNUM (regno
))
22451 return (TARGET_AAPCS_BASED
? 96 : 16) + regno
- FIRST_FPA_REGNUM
;
22453 if (IS_VFP_REGNUM (regno
))
22455 /* See comment in arm_dwarf_register_span. */
22456 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
22457 return 64 + regno
- FIRST_VFP_REGNUM
;
22459 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
22462 if (IS_IWMMXT_GR_REGNUM (regno
))
22463 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
22465 if (IS_IWMMXT_REGNUM (regno
))
22466 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
22468 gcc_unreachable ();
22471 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22472 GCC models tham as 64 32-bit registers, so we need to describe this to
22473 the DWARF generation code. Other registers can use the default. */
22475 arm_dwarf_register_span (rtx rtl
)
22482 regno
= REGNO (rtl
);
22483 if (!IS_VFP_REGNUM (regno
))
22486 /* XXX FIXME: The EABI defines two VFP register ranges:
22487 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22489 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22490 corresponding D register. Until GDB supports this, we shall use the
22491 legacy encodings. We also use these encodings for D0-D15 for
22492 compatibility with older debuggers. */
22493 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
22496 nregs
= GET_MODE_SIZE (GET_MODE (rtl
)) / 8;
22497 p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nregs
));
22498 regno
= (regno
- FIRST_VFP_REGNUM
) / 2;
22499 for (i
= 0; i
< nregs
; i
++)
22500 XVECEXP (p
, 0, i
) = gen_rtx_REG (DImode
, 256 + regno
+ i
);
22505 #if ARM_UNWIND_INFO
22506 /* Emit unwind directives for a store-multiple instruction or stack pointer
22507 push during alignment.
22508 These should only ever be generated by the function prologue code, so
22509 expect them to have a particular form. */
22512 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
22515 HOST_WIDE_INT offset
;
22516 HOST_WIDE_INT nregs
;
22522 e
= XVECEXP (p
, 0, 0);
22523 if (GET_CODE (e
) != SET
)
22526 /* First insn will adjust the stack pointer. */
22527 if (GET_CODE (e
) != SET
22528 || GET_CODE (XEXP (e
, 0)) != REG
22529 || REGNO (XEXP (e
, 0)) != SP_REGNUM
22530 || GET_CODE (XEXP (e
, 1)) != PLUS
)
22533 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
22534 nregs
= XVECLEN (p
, 0) - 1;
22536 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
22539 /* The function prologue may also push pc, but not annotate it as it is
22540 never restored. We turn this into a stack pointer adjustment. */
22541 if (nregs
* 4 == offset
- 4)
22543 fprintf (asm_out_file
, "\t.pad #4\n");
22547 fprintf (asm_out_file
, "\t.save {");
22549 else if (IS_VFP_REGNUM (reg
))
22552 fprintf (asm_out_file
, "\t.vsave {");
22554 else if (reg
>= FIRST_FPA_REGNUM
&& reg
<= LAST_FPA_REGNUM
)
22556 /* FPA registers are done differently. */
22557 asm_fprintf (asm_out_file
, "\t.save %r, %wd\n", reg
, nregs
);
22561 /* Unknown register type. */
22564 /* If the stack increment doesn't match the size of the saved registers,
22565 something has gone horribly wrong. */
22566 if (offset
!= nregs
* reg_size
)
22571 /* The remaining insns will describe the stores. */
22572 for (i
= 1; i
<= nregs
; i
++)
22574 /* Expect (set (mem <addr>) (reg)).
22575 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22576 e
= XVECEXP (p
, 0, i
);
22577 if (GET_CODE (e
) != SET
22578 || GET_CODE (XEXP (e
, 0)) != MEM
22579 || GET_CODE (XEXP (e
, 1)) != REG
)
22582 reg
= REGNO (XEXP (e
, 1));
22587 fprintf (asm_out_file
, ", ");
22588 /* We can't use %r for vfp because we need to use the
22589 double precision register names. */
22590 if (IS_VFP_REGNUM (reg
))
22591 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
22593 asm_fprintf (asm_out_file
, "%r", reg
);
22595 #ifdef ENABLE_CHECKING
22596 /* Check that the addresses are consecutive. */
22597 e
= XEXP (XEXP (e
, 0), 0);
22598 if (GET_CODE (e
) == PLUS
)
22600 offset
+= reg_size
;
22601 if (GET_CODE (XEXP (e
, 0)) != REG
22602 || REGNO (XEXP (e
, 0)) != SP_REGNUM
22603 || GET_CODE (XEXP (e
, 1)) != CONST_INT
22604 || offset
!= INTVAL (XEXP (e
, 1)))
22608 || GET_CODE (e
) != REG
22609 || REGNO (e
) != SP_REGNUM
)
22613 fprintf (asm_out_file
, "}\n");
22616 /* Emit unwind directives for a SET. */
22619 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
22627 switch (GET_CODE (e0
))
22630 /* Pushing a single register. */
22631 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
22632 || GET_CODE (XEXP (XEXP (e0
, 0), 0)) != REG
22633 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
22636 asm_fprintf (asm_out_file
, "\t.save ");
22637 if (IS_VFP_REGNUM (REGNO (e1
)))
22638 asm_fprintf(asm_out_file
, "{d%d}\n",
22639 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
22641 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
22645 if (REGNO (e0
) == SP_REGNUM
)
22647 /* A stack increment. */
22648 if (GET_CODE (e1
) != PLUS
22649 || GET_CODE (XEXP (e1
, 0)) != REG
22650 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
22651 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
22654 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
22655 -INTVAL (XEXP (e1
, 1)));
22657 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
22659 HOST_WIDE_INT offset
;
22661 if (GET_CODE (e1
) == PLUS
)
22663 if (GET_CODE (XEXP (e1
, 0)) != REG
22664 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
22666 reg
= REGNO (XEXP (e1
, 0));
22667 offset
= INTVAL (XEXP (e1
, 1));
22668 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
22669 HARD_FRAME_POINTER_REGNUM
, reg
,
22672 else if (GET_CODE (e1
) == REG
)
22675 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
22676 HARD_FRAME_POINTER_REGNUM
, reg
);
22681 else if (GET_CODE (e1
) == REG
&& REGNO (e1
) == SP_REGNUM
)
22683 /* Move from sp to reg. */
22684 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
22686 else if (GET_CODE (e1
) == PLUS
22687 && GET_CODE (XEXP (e1
, 0)) == REG
22688 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
22689 && GET_CODE (XEXP (e1
, 1)) == CONST_INT
)
22691 /* Set reg to offset from sp. */
22692 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
22693 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
22695 else if (GET_CODE (e1
) == UNSPEC
&& XINT (e1
, 1) == UNSPEC_STACK_ALIGN
)
22697 /* Stack pointer save before alignment. */
22699 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22712 /* Emit unwind directives for the given insn. */
22715 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
22719 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
22722 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
22723 && (TREE_NOTHROW (current_function_decl
)
22724 || crtl
->all_throwers_are_sibcalls
))
22727 if (GET_CODE (insn
) == NOTE
|| !RTX_FRAME_RELATED_P (insn
))
22730 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
22732 pat
= XEXP (pat
, 0);
22734 pat
= PATTERN (insn
);
22736 switch (GET_CODE (pat
))
22739 arm_unwind_emit_set (asm_out_file
, pat
);
22743 /* Store multiple. */
22744 arm_unwind_emit_sequence (asm_out_file
, pat
);
22753 /* Output a reference from a function exception table to the type_info
22754 object X. The EABI specifies that the symbol should be relocated by
22755 an R_ARM_TARGET2 relocation. */
22758 arm_output_ttype (rtx x
)
22760 fputs ("\t.word\t", asm_out_file
);
22761 output_addr_const (asm_out_file
, x
);
22762 /* Use special relocations for symbol references. */
22763 if (GET_CODE (x
) != CONST_INT
)
22764 fputs ("(TARGET2)", asm_out_file
);
22765 fputc ('\n', asm_out_file
);
22770 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22773 arm_asm_emit_except_personality (rtx personality
)
22775 fputs ("\t.personality\t", asm_out_file
);
22776 output_addr_const (asm_out_file
, personality
);
22777 fputc ('\n', asm_out_file
);
22780 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22783 arm_asm_init_sections (void)
22785 exception_section
= get_unnamed_section (0, output_section_asm_op
,
22788 #endif /* ARM_UNWIND_INFO */
22790 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22792 static enum unwind_info_type
22793 arm_except_unwind_info (struct gcc_options
*opts
)
22795 /* Honor the --enable-sjlj-exceptions configure switch. */
22796 #ifdef CONFIG_SJLJ_EXCEPTIONS
22797 if (CONFIG_SJLJ_EXCEPTIONS
)
22801 /* If not using ARM EABI unwind tables... */
22802 if (ARM_UNWIND_INFO
)
22804 /* For simplicity elsewhere in this file, indicate that all unwind
22805 info is disabled if we're not emitting unwind tables. */
22806 if (!opts
->x_flag_exceptions
&& !opts
->x_flag_unwind_tables
)
22812 /* ... we use sjlj exceptions for backwards compatibility. */
22817 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22818 stack alignment. */
22821 arm_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
22823 rtx unspec
= SET_SRC (pattern
);
22824 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
22828 case UNSPEC_STACK_ALIGN
:
22829 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22830 put anything on the stack, so hopefully it won't matter.
22831 CFA = SP will be correct after alignment. */
22832 dwarf2out_reg_save_reg (label
, stack_pointer_rtx
,
22833 SET_DEST (pattern
));
22836 gcc_unreachable ();
22841 /* Output unwind directives for the start/end of a function. */
22844 arm_output_fn_unwind (FILE * f
, bool prologue
)
22846 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
22850 fputs ("\t.fnstart\n", f
);
22853 /* If this function will never be unwound, then mark it as such.
22854 The came condition is used in arm_unwind_emit to suppress
22855 the frame annotations. */
22856 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
22857 && (TREE_NOTHROW (current_function_decl
)
22858 || crtl
->all_throwers_are_sibcalls
))
22859 fputs("\t.cantunwind\n", f
);
22861 fputs ("\t.fnend\n", f
);
22866 arm_emit_tls_decoration (FILE *fp
, rtx x
)
22868 enum tls_reloc reloc
;
22871 val
= XVECEXP (x
, 0, 0);
22872 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
22874 output_addr_const (fp
, val
);
22879 fputs ("(tlsgd)", fp
);
22882 fputs ("(tlsldm)", fp
);
22885 fputs ("(tlsldo)", fp
);
22888 fputs ("(gottpoff)", fp
);
22891 fputs ("(tpoff)", fp
);
22894 gcc_unreachable ();
22902 fputs (" + (. - ", fp
);
22903 output_addr_const (fp
, XVECEXP (x
, 0, 2));
22905 output_addr_const (fp
, XVECEXP (x
, 0, 3));
22915 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22918 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
22920 gcc_assert (size
== 4);
22921 fputs ("\t.word\t", file
);
22922 output_addr_const (file
, x
);
22923 fputs ("(tlsldo)", file
);
22926 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22929 arm_output_addr_const_extra (FILE *fp
, rtx x
)
22931 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
22932 return arm_emit_tls_decoration (fp
, x
);
22933 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
22936 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
22938 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
22939 assemble_name_raw (fp
, label
);
22943 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
22945 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
22949 output_addr_const (fp
, XVECEXP (x
, 0, 0));
22953 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
22955 output_addr_const (fp
, XVECEXP (x
, 0, 0));
22959 output_addr_const (fp
, XVECEXP (x
, 0, 1));
22963 else if (GET_CODE (x
) == CONST_VECTOR
)
22964 return arm_emit_vector_const (fp
, x
);
22969 /* Output assembly for a shift instruction.
22970 SET_FLAGS determines how the instruction modifies the condition codes.
22971 0 - Do not set condition codes.
22972 1 - Set condition codes.
22973 2 - Use smallest instruction. */
22975 arm_output_shift(rtx
* operands
, int set_flags
)
22978 static const char flag_chars
[3] = {'?', '.', '!'};
22983 c
= flag_chars
[set_flags
];
22984 if (TARGET_UNIFIED_ASM
)
22986 shift
= shift_op(operands
[3], &val
);
22990 operands
[2] = GEN_INT(val
);
22991 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
22994 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
22997 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
22998 output_asm_insn (pattern
, operands
);
23002 /* Output a Thumb-1 casesi dispatch sequence. */
23004 thumb1_output_casesi (rtx
*operands
)
23006 rtx diff_vec
= PATTERN (next_real_insn (operands
[0]));
23008 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
23010 switch (GET_MODE(diff_vec
))
23013 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
23014 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
23016 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
23017 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
23019 return "bl\t%___gnu_thumb1_case_si";
23021 gcc_unreachable ();
23025 /* Output a Thumb-2 casesi instruction. */
23027 thumb2_output_casesi (rtx
*operands
)
23029 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
23031 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
23033 output_asm_insn ("cmp\t%0, %1", operands
);
23034 output_asm_insn ("bhi\t%l3", operands
);
23035 switch (GET_MODE(diff_vec
))
23038 return "tbb\t[%|pc, %0]";
23040 return "tbh\t[%|pc, %0, lsl #1]";
23044 output_asm_insn ("adr\t%4, %l2", operands
);
23045 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
23046 output_asm_insn ("add\t%4, %4, %5", operands
);
23051 output_asm_insn ("adr\t%4, %l2", operands
);
23052 return "ldr\t%|pc, [%4, %0, lsl #2]";
23055 gcc_unreachable ();
23059 /* Most ARM cores are single issue, but some newer ones can dual issue.
23060 The scheduler descriptions rely on this being correct. */
23062 arm_issue_rate (void)
23080 /* A table and a function to perform ARM-specific name mangling for
23081 NEON vector types in order to conform to the AAPCS (see "Procedure
23082 Call Standard for the ARM Architecture", Appendix A). To qualify
23083 for emission with the mangled names defined in that document, a
23084 vector type must not only be of the correct mode but also be
23085 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23088 enum machine_mode mode
;
23089 const char *element_type_name
;
23090 const char *aapcs_name
;
23091 } arm_mangle_map_entry
;
23093 static arm_mangle_map_entry arm_mangle_map
[] = {
23094 /* 64-bit containerized types. */
23095 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
23096 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23097 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
23098 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23099 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
23100 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
23101 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
23102 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23103 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23104 /* 128-bit containerized types. */
23105 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
23106 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23107 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
23108 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23109 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
23110 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
23111 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
23112 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23113 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23114 { VOIDmode
, NULL
, NULL
}
23118 arm_mangle_type (const_tree type
)
23120 arm_mangle_map_entry
*pos
= arm_mangle_map
;
23122 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23123 has to be managled as if it is in the "std" namespace. */
23124 if (TARGET_AAPCS_BASED
23125 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
23127 static bool warned
;
23128 if (!warned
&& warn_psabi
&& !in_system_header
)
23131 inform (input_location
,
23132 "the mangling of %<va_list%> has changed in GCC 4.4");
23134 return "St9__va_list";
23137 /* Half-precision float. */
23138 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
23141 if (TREE_CODE (type
) != VECTOR_TYPE
)
23144 /* Check the mode of the vector type, and the name of the vector
23145 element type, against the table. */
23146 while (pos
->mode
!= VOIDmode
)
23148 tree elt_type
= TREE_TYPE (type
);
23150 if (pos
->mode
== TYPE_MODE (type
)
23151 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
23152 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
23153 pos
->element_type_name
))
23154 return pos
->aapcs_name
;
23159 /* Use the default mangling for unrecognized (possibly user-defined)
23164 /* Order of allocation of core registers for Thumb: this allocation is
23165 written over the corresponding initial entries of the array
23166 initialized with REG_ALLOC_ORDER. We allocate all low registers
23167 first. Saving and restoring a low register is usually cheaper than
23168 using a call-clobbered high register. */
23170 static const int thumb_core_reg_alloc_order
[] =
23172 3, 2, 1, 0, 4, 5, 6, 7,
23173 14, 12, 8, 9, 10, 11, 13, 15
23176 /* Adjust register allocation order when compiling for Thumb. */
23179 arm_order_regs_for_local_alloc (void)
23181 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
23182 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
23184 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
23185 sizeof (thumb_core_reg_alloc_order
));
23188 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23191 arm_frame_pointer_required (void)
23193 return (cfun
->has_nonlocal_label
23194 || SUBTARGET_FRAME_POINTER_REQUIRED
23195 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
23198 /* Only thumb1 can't support conditional execution, so return true if
23199 the target is not thumb1. */
23201 arm_have_conditional_execution (void)
23203 return !TARGET_THUMB1
;
23206 /* Legitimize a memory reference for sync primitive implemented using
23207 ldrex / strex. We currently force the form of the reference to be
23208 indirect without offset. We do not yet support the indirect offset
23209 addressing supported by some ARM targets for these
23212 arm_legitimize_sync_memory (rtx memory
)
23214 rtx addr
= force_reg (Pmode
, XEXP (memory
, 0));
23215 rtx legitimate_memory
= gen_rtx_MEM (GET_MODE (memory
), addr
);
23217 set_mem_alias_set (legitimate_memory
, ALIAS_SET_MEMORY_BARRIER
);
23218 MEM_VOLATILE_P (legitimate_memory
) = MEM_VOLATILE_P (memory
);
23219 return legitimate_memory
;
23222 /* An instruction emitter. */
23223 typedef void (* emit_f
) (int label
, const char *, rtx
*);
23225 /* An instruction emitter that emits via the conventional
23226 output_asm_insn. */
23228 arm_emit (int label ATTRIBUTE_UNUSED
, const char *pattern
, rtx
*operands
)
23230 output_asm_insn (pattern
, operands
);
23233 /* Count the number of emitted synchronization instructions. */
23234 static unsigned arm_insn_count
;
23236 /* An emitter that counts emitted instructions but does not actually
23237 emit instruction into the instruction stream. */
23239 arm_count (int label
,
23240 const char *pattern ATTRIBUTE_UNUSED
,
23241 rtx
*operands ATTRIBUTE_UNUSED
)
23247 /* Construct a pattern using conventional output formatting and feed
23248 it to output_asm_insn. Provides a mechanism to construct the
23249 output pattern on the fly. Note the hard limit on the pattern
23251 static void ATTRIBUTE_PRINTF_4
23252 arm_output_asm_insn (emit_f emit
, int label
, rtx
*operands
,
23253 const char *pattern
, ...)
23258 va_start (ap
, pattern
);
23259 vsprintf (buffer
, pattern
, ap
);
23261 emit (label
, buffer
, operands
);
23264 /* Emit the memory barrier instruction, if any, provided by this
23265 target to a specified emitter. */
23267 arm_process_output_memory_barrier (emit_f emit
, rtx
*operands
)
23269 if (TARGET_HAVE_DMB
)
23271 /* Note we issue a system level barrier. We should consider
23272 issuing a inner shareabilty zone barrier here instead, ie.
23274 emit (0, "dmb\tsy", operands
);
23278 if (TARGET_HAVE_DMB_MCR
)
23280 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands
);
23284 gcc_unreachable ();
23287 /* Emit the memory barrier instruction, if any, provided by this
23290 arm_output_memory_barrier (rtx
*operands
)
23292 arm_process_output_memory_barrier (arm_emit
, operands
);
23296 /* Helper to figure out the instruction suffix required on ldrex/strex
23297 for operations on an object of the specified mode. */
23298 static const char *
23299 arm_ldrex_suffix (enum machine_mode mode
)
23303 case QImode
: return "b";
23304 case HImode
: return "h";
23305 case SImode
: return "";
23306 case DImode
: return "d";
23308 gcc_unreachable ();
23313 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23316 arm_output_ldrex (emit_f emit
,
23317 enum machine_mode mode
,
23321 const char *suffix
= arm_ldrex_suffix (mode
);
23324 operands
[0] = target
;
23325 operands
[1] = memory
;
23326 arm_output_asm_insn (emit
, 0, operands
, "ldrex%s\t%%0, %%C1", suffix
);
23329 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23332 arm_output_strex (emit_f emit
,
23333 enum machine_mode mode
,
23339 const char *suffix
= arm_ldrex_suffix (mode
);
23342 operands
[0] = result
;
23343 operands
[1] = value
;
23344 operands
[2] = memory
;
23345 arm_output_asm_insn (emit
, 0, operands
, "strex%s%s\t%%0, %%1, %%C2", suffix
,
23349 /* Helper to emit a two operand instruction. */
23351 arm_output_op2 (emit_f emit
, const char *mnemonic
, rtx d
, rtx s
)
23357 arm_output_asm_insn (emit
, 0, operands
, "%s\t%%0, %%1", mnemonic
);
23360 /* Helper to emit a three operand instruction. */
23362 arm_output_op3 (emit_f emit
, const char *mnemonic
, rtx d
, rtx a
, rtx b
)
23369 arm_output_asm_insn (emit
, 0, operands
, "%s\t%%0, %%1, %%2", mnemonic
);
23372 /* Emit a load store exclusive synchronization loop.
23376 if old_value != required_value
23378 t1 = sync_op (old_value, new_value)
23379 [mem] = t1, t2 = [0|1]
23383 t1 == t2 is not permitted
23384 t1 == old_value is permitted
23388 RTX register or const_int representing the required old_value for
23389 the modify to continue, if NULL no comparsion is performed. */
23391 arm_output_sync_loop (emit_f emit
,
23392 enum machine_mode mode
,
23395 rtx required_value
,
23399 enum attr_sync_op sync_op
,
23400 int early_barrier_required
)
23404 gcc_assert (t1
!= t2
);
23406 if (early_barrier_required
)
23407 arm_process_output_memory_barrier (emit
, NULL
);
23409 arm_output_asm_insn (emit
, 1, operands
, "%sLSYT%%=:", LOCAL_LABEL_PREFIX
);
23411 arm_output_ldrex (emit
, mode
, old_value
, memory
);
23413 if (required_value
)
23417 operands
[0] = old_value
;
23418 operands
[1] = required_value
;
23419 arm_output_asm_insn (emit
, 0, operands
, "cmp\t%%0, %%1");
23420 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX
);
23426 arm_output_op3 (emit
, "add", t1
, old_value
, new_value
);
23430 arm_output_op3 (emit
, "sub", t1
, old_value
, new_value
);
23434 arm_output_op3 (emit
, "orr", t1
, old_value
, new_value
);
23438 arm_output_op3 (emit
, "eor", t1
, old_value
, new_value
);
23442 arm_output_op3 (emit
,"and", t1
, old_value
, new_value
);
23446 arm_output_op3 (emit
, "and", t1
, old_value
, new_value
);
23447 arm_output_op2 (emit
, "mvn", t1
, t1
);
23457 arm_output_strex (emit
, mode
, "", t2
, t1
, memory
);
23459 arm_output_asm_insn (emit
, 0, operands
, "teq\t%%0, #0");
23460 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYT%%=",
23461 LOCAL_LABEL_PREFIX
);
23465 /* Use old_value for the return value because for some operations
23466 the old_value can easily be restored. This saves one register. */
23467 arm_output_strex (emit
, mode
, "", old_value
, t1
, memory
);
23468 operands
[0] = old_value
;
23469 arm_output_asm_insn (emit
, 0, operands
, "teq\t%%0, #0");
23470 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYT%%=",
23471 LOCAL_LABEL_PREFIX
);
23476 arm_output_op3 (emit
, "sub", old_value
, t1
, new_value
);
23480 arm_output_op3 (emit
, "add", old_value
, t1
, new_value
);
23484 arm_output_op3 (emit
, "eor", old_value
, t1
, new_value
);
23488 arm_output_op2 (emit
, "mov", old_value
, required_value
);
23492 gcc_unreachable ();
23496 arm_process_output_memory_barrier (emit
, NULL
);
23497 arm_output_asm_insn (emit
, 1, operands
, "%sLSYB%%=:", LOCAL_LABEL_PREFIX
);
23501 arm_get_sync_operand (rtx
*operands
, int index
, rtx default_value
)
23504 default_value
= operands
[index
- 1];
23506 return default_value
;
23509 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23510 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23512 /* Extract the operands for a synchroniztion instruction from the
23513 instructions attributes and emit the instruction. */
23515 arm_process_output_sync_insn (emit_f emit
, rtx insn
, rtx
*operands
)
23517 rtx result
, memory
, required_value
, new_value
, t1
, t2
;
23519 enum machine_mode mode
;
23520 enum attr_sync_op sync_op
;
23522 result
= FETCH_SYNC_OPERAND(result
, 0);
23523 memory
= FETCH_SYNC_OPERAND(memory
, 0);
23524 required_value
= FETCH_SYNC_OPERAND(required_value
, 0);
23525 new_value
= FETCH_SYNC_OPERAND(new_value
, 0);
23526 t1
= FETCH_SYNC_OPERAND(t1
, 0);
23527 t2
= FETCH_SYNC_OPERAND(t2
, 0);
23529 get_attr_sync_release_barrier (insn
) == SYNC_RELEASE_BARRIER_YES
;
23530 sync_op
= get_attr_sync_op (insn
);
23531 mode
= GET_MODE (memory
);
23533 arm_output_sync_loop (emit
, mode
, result
, memory
, required_value
,
23534 new_value
, t1
, t2
, sync_op
, early_barrier
);
23537 /* Emit a synchronization instruction loop. */
23539 arm_output_sync_insn (rtx insn
, rtx
*operands
)
23541 arm_process_output_sync_insn (arm_emit
, insn
, operands
);
23545 /* Count the number of machine instruction that will be emitted for a
23546 synchronization instruction. Note that the emitter used does not
23547 emit instructions, it just counts instructions being carefull not
23548 to count labels. */
23550 arm_sync_loop_insns (rtx insn
, rtx
*operands
)
23552 arm_insn_count
= 0;
23553 arm_process_output_sync_insn (arm_count
, insn
, operands
);
23554 return arm_insn_count
;
23557 /* Helper to call a target sync instruction generator, dealing with
23558 the variation in operands required by the different generators. */
23560 arm_call_generator (struct arm_sync_generator
*generator
, rtx old_value
,
23561 rtx memory
, rtx required_value
, rtx new_value
)
23563 switch (generator
->op
)
23565 case arm_sync_generator_omn
:
23566 gcc_assert (! required_value
);
23567 return generator
->u
.omn (old_value
, memory
, new_value
);
23569 case arm_sync_generator_omrn
:
23570 gcc_assert (required_value
);
23571 return generator
->u
.omrn (old_value
, memory
, required_value
, new_value
);
23577 /* Expand a synchronization loop. The synchronization loop is expanded
23578 as an opaque block of instructions in order to ensure that we do
23579 not subsequently get extraneous memory accesses inserted within the
23580 critical region. The exclusive access property of ldrex/strex is
23581 only guaranteed in there are no intervening memory accesses. */
23583 arm_expand_sync (enum machine_mode mode
,
23584 struct arm_sync_generator
*generator
,
23585 rtx target
, rtx memory
, rtx required_value
, rtx new_value
)
23587 if (target
== NULL
)
23588 target
= gen_reg_rtx (mode
);
23590 memory
= arm_legitimize_sync_memory (memory
);
23591 if (mode
!= SImode
)
23593 rtx load_temp
= gen_reg_rtx (SImode
);
23595 if (required_value
)
23596 required_value
= convert_modes (SImode
, mode
, required_value
, true);
23598 new_value
= convert_modes (SImode
, mode
, new_value
, true);
23599 emit_insn (arm_call_generator (generator
, load_temp
, memory
,
23600 required_value
, new_value
));
23601 emit_move_insn (target
, gen_lowpart (mode
, load_temp
));
23605 emit_insn (arm_call_generator (generator
, target
, memory
, required_value
,
23610 static unsigned int
23611 arm_autovectorize_vector_sizes (void)
23613 return TARGET_NEON_VECTORIZE_QUAD
? 16 | 8 : 0;
23617 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
23619 /* Vectors which aren't in packed structures will not be less aligned than
23620 the natural alignment of their element type, so this is safe. */
23621 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
23624 return default_builtin_vector_alignment_reachable (type
, is_packed
);
23628 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
23629 const_tree type
, int misalignment
,
23632 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
23634 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
23639 /* If the misalignment is unknown, we should be able to handle the access
23640 so long as it is not to a member of a packed data structure. */
23641 if (misalignment
== -1)
23644 /* Return true if the misalignment is a multiple of the natural alignment
23645 of the vector's element type. This is probably always going to be
23646 true in practice, since we've already established that this isn't a
23648 return ((misalignment
% align
) == 0);
23651 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
23656 arm_conditional_register_usage (void)
23660 if (TARGET_SOFT_FLOAT
|| TARGET_THUMB1
|| !TARGET_FPA
)
23662 for (regno
= FIRST_FPA_REGNUM
;
23663 regno
<= LAST_FPA_REGNUM
; ++regno
)
23664 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
23667 if (TARGET_THUMB1
&& optimize_size
)
23669 /* When optimizing for size on Thumb-1, it's better not
23670 to use the HI regs, because of the overhead of
23672 for (regno
= FIRST_HI_REGNUM
;
23673 regno
<= LAST_HI_REGNUM
; ++regno
)
23674 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
23677 /* The link register can be clobbered by any branch insn,
23678 but we have no way to track that at present, so mark
23679 it as unavailable. */
23681 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
23683 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
23685 if (TARGET_MAVERICK
)
23687 for (regno
= FIRST_FPA_REGNUM
;
23688 regno
<= LAST_FPA_REGNUM
; ++ regno
)
23689 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
23690 for (regno
= FIRST_CIRRUS_FP_REGNUM
;
23691 regno
<= LAST_CIRRUS_FP_REGNUM
; ++ regno
)
23693 fixed_regs
[regno
] = 0;
23694 call_used_regs
[regno
] = regno
< FIRST_CIRRUS_FP_REGNUM
+ 4;
23699 /* VFPv3 registers are disabled when earlier VFP
23700 versions are selected due to the definition of
23701 LAST_VFP_REGNUM. */
23702 for (regno
= FIRST_VFP_REGNUM
;
23703 regno
<= LAST_VFP_REGNUM
; ++ regno
)
23705 fixed_regs
[regno
] = 0;
23706 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
23707 || regno
>= FIRST_VFP_REGNUM
+ 32;
23712 if (TARGET_REALLY_IWMMXT
)
23714 regno
= FIRST_IWMMXT_GR_REGNUM
;
23715 /* The 2002/10/09 revision of the XScale ABI has wCG0
23716 and wCG1 as call-preserved registers. The 2002/11/21
23717 revision changed this so that all wCG registers are
23718 scratch registers. */
23719 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
23720 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
23721 fixed_regs
[regno
] = 0;
23722 /* The XScale ABI has wR0 - wR9 as scratch registers,
23723 the rest as call-preserved registers. */
23724 for (regno
= FIRST_IWMMXT_REGNUM
;
23725 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
23727 fixed_regs
[regno
] = 0;
23728 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
23732 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
23734 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
23735 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
23737 else if (TARGET_APCS_STACK
)
23739 fixed_regs
[10] = 1;
23740 call_used_regs
[10] = 1;
23742 /* -mcaller-super-interworking reserves r11 for calls to
23743 _interwork_r11_call_via_rN(). Making the register global
23744 is an easy way of ensuring that it remains valid for all
23746 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
23747 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
23749 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
23750 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
23751 if (TARGET_CALLER_INTERWORKING
)
23752 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
23754 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23758 arm_preferred_rename_class (reg_class_t rclass
)
23760 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23761 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23762 and code size can be reduced. */
23763 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
23769 /* Compute the atrribute "length" of insn "*push_multi".
23770 So this function MUST be kept in sync with that insn pattern. */
23772 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
23774 int i
, regno
, hi_reg
;
23775 int num_saves
= XVECLEN (parallel_op
, 0);
23782 regno
= REGNO (first_op
);
23783 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
23784 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
23786 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
23787 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
23795 #include "gt-arm.h"