1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
49 #include "c-family/c-pragma.h" /* ??? */
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode
;
63 typedef struct minipool_fixup Mfix
;
65 void (*arm_lang_output_object_attributes_hook
)(void);
67 /* Forward function declarations. */
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets
*arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
72 HOST_WIDE_INT
, rtx
, rtx
, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx
, int);
75 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
76 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
77 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
78 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
79 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
80 inline static int thumb1_index_register_rtx_p (rtx
, int);
81 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT
, enum rtx_code
);
85 static rtx
emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx
, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx
, int);
89 static void arm_print_operand_address (FILE *, rtx
);
90 static bool arm_print_operand_punct_valid_p (unsigned char code
);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
92 static arm_cc
get_arm_condition_code (rtx
);
93 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
94 static rtx
is_jump_table (rtx
);
95 static const char *output_multi_immediate (rtx
*, const char *, const char *,
97 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
98 static struct machine_function
*arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx
is_jump_table (rtx
);
101 static HOST_WIDE_INT
get_jump_table_size (rtx
);
102 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
103 static Mnode
*add_minipool_forward_ref (Mfix
*);
104 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
105 static Mnode
*add_minipool_backward_ref (Mfix
*);
106 static void assign_minipool_offsets (Mfix
*);
107 static void arm_print_value (FILE *, rtx
);
108 static void dump_minipool (rtx
);
109 static int arm_barrier_cost (rtx
);
110 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
111 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
112 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree
);
119 static unsigned long arm_compute_func_type (void);
120 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
121 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
122 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT
);
129 static int arm_comp_type_attributes (const_tree
, const_tree
);
130 static void arm_set_default_type_attributes (tree
);
131 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
132 static int count_insns_for_constant (HOST_WIDE_INT
, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree
, tree
);
135 static enum machine_mode
arm_promote_function_mode (const_tree
,
136 enum machine_mode
, int *,
138 static bool arm_return_in_memory (const_tree
, const_tree
);
139 static rtx
arm_function_value (const_tree
, const_tree
, bool);
140 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
147 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
148 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
151 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
152 static bool arm_rtx_costs (rtx
, int, int, int *, bool);
153 static int arm_address_cost (rtx
, bool);
154 static bool arm_memory_load_p (rtx
);
155 static bool arm_cirrus_insn_p (rtx
);
156 static void cirrus_reorg (rtx
);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
160 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
161 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
162 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
163 static void emit_constant_insn (rtx cond
, rtx pattern
);
164 static rtx
emit_set_insn (rtx
, rtx
);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
167 static rtx
arm_function_arg (CUMULATIVE_ARGS
*, enum machine_mode
,
169 static void arm_function_arg_advance (CUMULATIVE_ARGS
*, enum machine_mode
,
171 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
173 static int aapcs_select_return_coproc (const_tree
, const_tree
);
175 #ifdef OBJECT_FORMAT_ELF
176 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
177 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
180 static void arm_encode_section_info (tree
, rtx
, int);
183 static void arm_file_end (void);
184 static void arm_file_start (void);
186 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
188 static bool arm_pass_by_reference (CUMULATIVE_ARGS
*,
189 enum machine_mode
, const_tree
, bool);
190 static bool arm_promote_prototypes (const_tree
);
191 static bool arm_default_short_enums (void);
192 static bool arm_align_anon_bitfield (void);
193 static bool arm_return_in_msb (const_tree
);
194 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
195 static bool arm_return_in_memory (const_tree
, const_tree
);
196 #ifdef TARGET_UNWIND_INFO
197 static void arm_unwind_emit (FILE *, rtx
);
198 static bool arm_output_ttype (rtx
);
199 static void arm_asm_emit_except_personality (rtx
);
200 static void arm_asm_init_sections (void);
202 static void arm_dwarf_handle_frame_unspec (const char *, rtx
, int);
203 static rtx
arm_dwarf_register_span (rtx
);
205 static tree
arm_cxx_guard_type (void);
206 static bool arm_cxx_guard_mask_bit (void);
207 static tree
arm_get_cookie_size (tree
);
208 static bool arm_cookie_has_size (void);
209 static bool arm_cxx_cdtor_returns_this (void);
210 static bool arm_cxx_key_method_may_be_inline (void);
211 static void arm_cxx_determine_class_data_visibility (tree
);
212 static bool arm_cxx_class_data_always_comdat (void);
213 static bool arm_cxx_use_aeabi_atexit (void);
214 static void arm_init_libfuncs (void);
215 static tree
arm_build_builtin_va_list (void);
216 static void arm_expand_builtin_va_start (tree
, rtx
);
217 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
218 static void arm_option_override (void);
219 static void arm_option_optimization (int, int);
220 static bool arm_handle_option (size_t, const char *, int);
221 static void arm_target_help (void);
222 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
223 static bool arm_cannot_copy_insn_p (rtx
);
224 static bool arm_tls_symbol_p (rtx x
);
225 static int arm_issue_rate (void);
226 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
227 static bool arm_output_addr_const_extra (FILE *, rtx
);
228 static bool arm_allocate_stack_slots_for_args (void);
229 static const char *arm_invalid_parameter_type (const_tree t
);
230 static const char *arm_invalid_return_type (const_tree t
);
231 static tree
arm_promoted_type (const_tree t
);
232 static tree
arm_convert_to_type (tree type
, tree expr
);
233 static bool arm_scalar_mode_supported_p (enum machine_mode
);
234 static bool arm_frame_pointer_required (void);
235 static bool arm_can_eliminate (const int, const int);
236 static void arm_asm_trampoline_template (FILE *);
237 static void arm_trampoline_init (rtx
, tree
, rtx
);
238 static rtx
arm_trampoline_adjust_address (rtx
);
239 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
240 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
241 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
242 static unsigned int arm_units_per_simd_word (enum machine_mode
);
243 static bool arm_class_likely_spilled_p (reg_class_t
);
246 /* Table of machine attributes. */
247 static const struct attribute_spec arm_attribute_table
[] =
249 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
250 /* Function calls made to this symbol must be done indirectly, because
251 it may lie outside of the 26 bit addressing range of a normal function
253 { "long_call", 0, 0, false, true, true, NULL
},
254 /* Whereas these functions are always known to reside within the 26 bit
256 { "short_call", 0, 0, false, true, true, NULL
},
257 /* Specify the procedure call conventions for a function. */
258 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
},
259 /* Interrupt Service Routines have special prologue and epilogue requirements. */
260 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
},
261 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
},
262 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
},
264 /* ARM/PE has three new attributes:
266 dllexport - for exporting a function/variable that will live in a dll
267 dllimport - for importing a function/variable from a dll
269 Microsoft allows multiple declspecs in one __declspec, separating
270 them with spaces. We do NOT support this. Instead, use __declspec
273 { "dllimport", 0, 0, true, false, false, NULL
},
274 { "dllexport", 0, 0, true, false, false, NULL
},
275 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
},
276 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
277 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
278 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
279 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
},
281 { NULL
, 0, 0, false, false, false, NULL
}
284 /* Initialize the GCC target structure. */
285 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
286 #undef TARGET_MERGE_DECL_ATTRIBUTES
287 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
290 #undef TARGET_LEGITIMIZE_ADDRESS
291 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
293 #undef TARGET_ATTRIBUTE_TABLE
294 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
296 #undef TARGET_ASM_FILE_START
297 #define TARGET_ASM_FILE_START arm_file_start
298 #undef TARGET_ASM_FILE_END
299 #define TARGET_ASM_FILE_END arm_file_end
301 #undef TARGET_ASM_ALIGNED_SI_OP
302 #define TARGET_ASM_ALIGNED_SI_OP NULL
303 #undef TARGET_ASM_INTEGER
304 #define TARGET_ASM_INTEGER arm_assemble_integer
306 #undef TARGET_PRINT_OPERAND
307 #define TARGET_PRINT_OPERAND arm_print_operand
308 #undef TARGET_PRINT_OPERAND_ADDRESS
309 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
310 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
311 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
313 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
314 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
316 #undef TARGET_ASM_FUNCTION_PROLOGUE
317 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
319 #undef TARGET_ASM_FUNCTION_EPILOGUE
320 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
322 #undef TARGET_DEFAULT_TARGET_FLAGS
323 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
324 #undef TARGET_HANDLE_OPTION
325 #define TARGET_HANDLE_OPTION arm_handle_option
327 #define TARGET_HELP arm_target_help
328 #undef TARGET_OPTION_OVERRIDE
329 #define TARGET_OPTION_OVERRIDE arm_option_override
330 #undef TARGET_OPTION_OPTIMIZATION
331 #define TARGET_OPTION_OPTIMIZATION arm_option_optimization
333 #undef TARGET_COMP_TYPE_ATTRIBUTES
334 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
336 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
337 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
339 #undef TARGET_SCHED_ADJUST_COST
340 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
342 #undef TARGET_ENCODE_SECTION_INFO
344 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
346 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
349 #undef TARGET_STRIP_NAME_ENCODING
350 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
352 #undef TARGET_ASM_INTERNAL_LABEL
353 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
355 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
356 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
358 #undef TARGET_FUNCTION_VALUE
359 #define TARGET_FUNCTION_VALUE arm_function_value
361 #undef TARGET_LIBCALL_VALUE
362 #define TARGET_LIBCALL_VALUE arm_libcall_value
364 #undef TARGET_ASM_OUTPUT_MI_THUNK
365 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
366 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
367 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
369 #undef TARGET_RTX_COSTS
370 #define TARGET_RTX_COSTS arm_rtx_costs
371 #undef TARGET_ADDRESS_COST
372 #define TARGET_ADDRESS_COST arm_address_cost
374 #undef TARGET_SHIFT_TRUNCATION_MASK
375 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
376 #undef TARGET_VECTOR_MODE_SUPPORTED_P
377 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
378 #undef TARGET_VECTORIZE_UNITS_PER_SIMD_WORD
379 #define TARGET_VECTORIZE_UNITS_PER_SIMD_WORD arm_units_per_simd_word
381 #undef TARGET_MACHINE_DEPENDENT_REORG
382 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
384 #undef TARGET_INIT_BUILTINS
385 #define TARGET_INIT_BUILTINS arm_init_builtins
386 #undef TARGET_EXPAND_BUILTIN
387 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
389 #undef TARGET_INIT_LIBFUNCS
390 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
392 #undef TARGET_PROMOTE_FUNCTION_MODE
393 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
394 #undef TARGET_PROMOTE_PROTOTYPES
395 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
396 #undef TARGET_PASS_BY_REFERENCE
397 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
398 #undef TARGET_ARG_PARTIAL_BYTES
399 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
400 #undef TARGET_FUNCTION_ARG
401 #define TARGET_FUNCTION_ARG arm_function_arg
402 #undef TARGET_FUNCTION_ARG_ADVANCE
403 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
405 #undef TARGET_SETUP_INCOMING_VARARGS
406 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
408 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
409 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
411 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
412 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
413 #undef TARGET_TRAMPOLINE_INIT
414 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
415 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
416 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
418 #undef TARGET_DEFAULT_SHORT_ENUMS
419 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
421 #undef TARGET_ALIGN_ANON_BITFIELD
422 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
424 #undef TARGET_NARROW_VOLATILE_BITFIELD
425 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
427 #undef TARGET_CXX_GUARD_TYPE
428 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
430 #undef TARGET_CXX_GUARD_MASK_BIT
431 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
433 #undef TARGET_CXX_GET_COOKIE_SIZE
434 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
436 #undef TARGET_CXX_COOKIE_HAS_SIZE
437 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
439 #undef TARGET_CXX_CDTOR_RETURNS_THIS
440 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
442 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
443 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
445 #undef TARGET_CXX_USE_AEABI_ATEXIT
446 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
448 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
449 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
450 arm_cxx_determine_class_data_visibility
452 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
453 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
455 #undef TARGET_RETURN_IN_MSB
456 #define TARGET_RETURN_IN_MSB arm_return_in_msb
458 #undef TARGET_RETURN_IN_MEMORY
459 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
461 #undef TARGET_MUST_PASS_IN_STACK
462 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
464 #ifdef TARGET_UNWIND_INFO
465 #undef TARGET_ASM_UNWIND_EMIT
466 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
468 /* EABI unwinding tables use a different format for the typeinfo tables. */
469 #undef TARGET_ASM_TTYPE
470 #define TARGET_ASM_TTYPE arm_output_ttype
472 #undef TARGET_ARM_EABI_UNWINDER
473 #define TARGET_ARM_EABI_UNWINDER true
475 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
476 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
478 #undef TARGET_ASM_INIT_SECTIONS
479 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
480 #endif /* TARGET_UNWIND_INFO */
482 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
483 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
485 #undef TARGET_DWARF_REGISTER_SPAN
486 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
488 #undef TARGET_CANNOT_COPY_INSN_P
489 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
492 #undef TARGET_HAVE_TLS
493 #define TARGET_HAVE_TLS true
496 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
497 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
499 #undef TARGET_CANNOT_FORCE_CONST_MEM
500 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
502 #undef TARGET_MAX_ANCHOR_OFFSET
503 #define TARGET_MAX_ANCHOR_OFFSET 4095
505 /* The minimum is set such that the total size of the block
506 for a particular anchor is -4088 + 1 + 4095 bytes, which is
507 divisible by eight, ensuring natural spacing of anchors. */
508 #undef TARGET_MIN_ANCHOR_OFFSET
509 #define TARGET_MIN_ANCHOR_OFFSET -4088
511 #undef TARGET_SCHED_ISSUE_RATE
512 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
514 #undef TARGET_MANGLE_TYPE
515 #define TARGET_MANGLE_TYPE arm_mangle_type
517 #undef TARGET_BUILD_BUILTIN_VA_LIST
518 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
519 #undef TARGET_EXPAND_BUILTIN_VA_START
520 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
521 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
522 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
525 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
526 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
529 #undef TARGET_LEGITIMATE_ADDRESS_P
530 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
532 #undef TARGET_INVALID_PARAMETER_TYPE
533 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
535 #undef TARGET_INVALID_RETURN_TYPE
536 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
538 #undef TARGET_PROMOTED_TYPE
539 #define TARGET_PROMOTED_TYPE arm_promoted_type
541 #undef TARGET_CONVERT_TO_TYPE
542 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
544 #undef TARGET_SCALAR_MODE_SUPPORTED_P
545 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
547 #undef TARGET_FRAME_POINTER_REQUIRED
548 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
550 #undef TARGET_CAN_ELIMINATE
551 #define TARGET_CAN_ELIMINATE arm_can_eliminate
553 #undef TARGET_CLASS_LIKELY_SPILLED_P
554 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
556 struct gcc_target targetm
= TARGET_INITIALIZER
;
558 /* Obstack for minipool constant handling. */
559 static struct obstack minipool_obstack
;
560 static char * minipool_startobj
;
562 /* The maximum number of insns skipped which
563 will be conditionalised if possible. */
564 static int max_insns_skipped
= 5;
566 extern FILE * asm_out_file
;
568 /* True if we are currently building a constant table. */
569 int making_const_table
;
571 /* The processor for which instructions should be scheduled. */
572 enum processor_type arm_tune
= arm_none
;
574 /* The current tuning set. */
575 const struct tune_params
*current_tune
;
577 /* Which floating point hardware to schedule for. */
580 /* Which floating popint hardware to use. */
581 const struct arm_fpu_desc
*arm_fpu_desc
;
583 /* Whether to use floating point hardware. */
584 enum float_abi_type arm_float_abi
;
586 /* Which __fp16 format to use. */
587 enum arm_fp16_format_type arm_fp16_format
;
589 /* Which ABI to use. */
590 enum arm_abi_type arm_abi
;
592 /* Which thread pointer model to use. */
593 enum arm_tp_type target_thread_pointer
= TP_AUTO
;
595 /* Used to parse -mstructure_size_boundary command line option. */
596 int arm_structure_size_boundary
= DEFAULT_STRUCTURE_SIZE_BOUNDARY
;
598 /* Used for Thumb call_via trampolines. */
599 rtx thumb_call_via_label
[14];
600 static int thumb_call_reg_needed
;
602 /* Bit values used to identify processor capabilities. */
603 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
604 #define FL_ARCH3M (1 << 1) /* Extended multiply */
605 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
606 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
607 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
608 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
609 #define FL_THUMB (1 << 6) /* Thumb aware */
610 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
611 #define FL_STRONG (1 << 8) /* StrongARM */
612 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
613 #define FL_XSCALE (1 << 10) /* XScale */
614 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
615 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
616 media instructions. */
617 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
618 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
619 Note: ARM6 & 7 derivatives only. */
620 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
621 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
622 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
624 #define FL_DIV (1 << 18) /* Hardware divide. */
625 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
626 #define FL_NEON (1 << 20) /* Neon instructions. */
627 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
629 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
631 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
633 /* Flags that only effect tuning, not available instructions. */
634 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
637 #define FL_FOR_ARCH2 FL_NOTM
638 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
639 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
640 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
641 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
642 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
643 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
644 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
645 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
646 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
647 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
648 #define FL_FOR_ARCH6J FL_FOR_ARCH6
649 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
650 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
651 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
652 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
653 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
654 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
655 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
656 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
657 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
658 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
660 /* The bits in this mask specify which
661 instructions we are allowed to generate. */
662 static unsigned long insn_flags
= 0;
664 /* The bits in this mask specify which instruction scheduling options should
666 static unsigned long tune_flags
= 0;
668 /* The following are used in the arm.md file as equivalents to bits
669 in the above two flag variables. */
671 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
674 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
677 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
680 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
683 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
686 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
689 /* Nonzero if this chip supports the ARM 6K extensions. */
692 /* Nonzero if this chip supports the ARM 7 extensions. */
695 /* Nonzero if instructions not present in the 'M' profile can be used. */
696 int arm_arch_notm
= 0;
698 /* Nonzero if instructions present in ARMv7E-M can be used. */
701 /* Nonzero if this chip can benefit from load scheduling. */
702 int arm_ld_sched
= 0;
704 /* Nonzero if this chip is a StrongARM. */
705 int arm_tune_strongarm
= 0;
707 /* Nonzero if this chip is a Cirrus variant. */
708 int arm_arch_cirrus
= 0;
710 /* Nonzero if this chip supports Intel Wireless MMX technology. */
711 int arm_arch_iwmmxt
= 0;
713 /* Nonzero if this chip is an XScale. */
714 int arm_arch_xscale
= 0;
716 /* Nonzero if tuning for XScale */
717 int arm_tune_xscale
= 0;
719 /* Nonzero if we want to tune for stores that access the write-buffer.
720 This typically means an ARM6 or ARM7 with MMU or MPU. */
721 int arm_tune_wbuf
= 0;
723 /* Nonzero if tuning for Cortex-A9. */
724 int arm_tune_cortex_a9
= 0;
726 /* Nonzero if generating Thumb instructions. */
729 /* Nonzero if generating Thumb-1 instructions. */
732 /* Nonzero if we should define __THUMB_INTERWORK__ in the
734 XXX This is a bit of a hack, it's intended to help work around
735 problems in GLD which doesn't understand that armv5t code is
736 interworking clean. */
737 int arm_cpp_interwork
= 0;
739 /* Nonzero if chip supports Thumb 2. */
742 /* Nonzero if chip supports integer division instruction. */
745 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
746 we must report the mode of the memory reference from
747 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
748 enum machine_mode output_memory_reference_mode
;
750 /* The register number to be used for the PIC offset register. */
751 unsigned arm_pic_register
= INVALID_REGNUM
;
753 /* Set to 1 after arm_reorg has started. Reset to start at the start of
754 the next function. */
755 static int after_arm_reorg
= 0;
757 enum arm_pcs arm_pcs_default
;
759 /* For an explanation of these variables, see final_prescan_insn below. */
761 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
762 enum arm_cond_code arm_current_cc
;
765 int arm_target_label
;
766 /* The number of conditionally executed insns, including the current insn. */
767 int arm_condexec_count
= 0;
768 /* A bitmask specifying the patterns for the IT block.
769 Zero means do not output an IT block before this insn. */
770 int arm_condexec_mask
= 0;
771 /* The number of bits used in arm_condexec_mask. */
772 int arm_condexec_masklen
= 0;
774 /* The condition codes of the ARM, and the inverse function. */
775 static const char * const arm_condition_codes
[] =
777 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
778 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
781 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
782 int arm_regs_in_sequence
[] =
784 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
787 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
788 #define streq(string1, string2) (strcmp (string1, string2) == 0)
790 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
791 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
792 | (1 << PIC_OFFSET_TABLE_REGNUM)))
794 /* Initialization code. */
798 const char *const name
;
799 enum processor_type core
;
801 const unsigned long flags
;
802 const struct tune_params
*const tune
;
805 const struct tune_params arm_slowmul_tune
=
807 arm_slowmul_rtx_costs
,
812 const struct tune_params arm_fastmul_tune
=
814 arm_fastmul_rtx_costs
,
819 const struct tune_params arm_xscale_tune
=
821 arm_xscale_rtx_costs
,
822 xscale_sched_adjust_cost
,
826 const struct tune_params arm_9e_tune
=
833 const struct tune_params arm_cortex_a9_tune
=
836 cortex_a9_sched_adjust_cost
,
841 /* Not all of these give usefully different compilation alternatives,
842 but there is no simple way of generalizing them. */
843 static const struct processors all_cores
[] =
846 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
847 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
848 #include "arm-cores.def"
850 {NULL
, arm_none
, NULL
, 0, NULL
}
853 static const struct processors all_architectures
[] =
855 /* ARM Architectures */
856 /* We don't specify tuning costs here as it will be figured out
859 {"armv2", arm2
, "2", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH2
, NULL
},
860 {"armv2a", arm2
, "2", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH2
, NULL
},
861 {"armv3", arm6
, "3", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH3
, NULL
},
862 {"armv3m", arm7m
, "3M", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH3M
, NULL
},
863 {"armv4", arm7tdmi
, "4", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH4
, NULL
},
864 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
865 implementations that support it, so we will leave it out for now. */
866 {"armv4t", arm7tdmi
, "4T", FL_CO_PROC
| FL_FOR_ARCH4T
, NULL
},
867 {"armv5", arm10tdmi
, "5", FL_CO_PROC
| FL_FOR_ARCH5
, NULL
},
868 {"armv5t", arm10tdmi
, "5T", FL_CO_PROC
| FL_FOR_ARCH5T
, NULL
},
869 {"armv5e", arm1026ejs
, "5E", FL_CO_PROC
| FL_FOR_ARCH5E
, NULL
},
870 {"armv5te", arm1026ejs
, "5TE", FL_CO_PROC
| FL_FOR_ARCH5TE
, NULL
},
871 {"armv6", arm1136js
, "6", FL_CO_PROC
| FL_FOR_ARCH6
, NULL
},
872 {"armv6j", arm1136js
, "6J", FL_CO_PROC
| FL_FOR_ARCH6J
, NULL
},
873 {"armv6k", mpcore
, "6K", FL_CO_PROC
| FL_FOR_ARCH6K
, NULL
},
874 {"armv6z", arm1176jzs
, "6Z", FL_CO_PROC
| FL_FOR_ARCH6Z
, NULL
},
875 {"armv6zk", arm1176jzs
, "6ZK", FL_CO_PROC
| FL_FOR_ARCH6ZK
, NULL
},
876 {"armv6t2", arm1156t2s
, "6T2", FL_CO_PROC
| FL_FOR_ARCH6T2
, NULL
},
877 {"armv6-m", cortexm1
, "6M", FL_FOR_ARCH6M
, NULL
},
878 {"armv7", cortexa8
, "7", FL_CO_PROC
| FL_FOR_ARCH7
, NULL
},
879 {"armv7-a", cortexa8
, "7A", FL_CO_PROC
| FL_FOR_ARCH7A
, NULL
},
880 {"armv7-r", cortexr4
, "7R", FL_CO_PROC
| FL_FOR_ARCH7R
, NULL
},
881 {"armv7-m", cortexm3
, "7M", FL_CO_PROC
| FL_FOR_ARCH7M
, NULL
},
882 {"armv7e-m", cortexm4
, "7EM", FL_CO_PROC
| FL_FOR_ARCH7EM
, NULL
},
883 {"ep9312", ep9312
, "4T", FL_LDSCHED
| FL_CIRRUS
| FL_FOR_ARCH4
, NULL
},
884 {"iwmmxt", iwmmxt
, "5TE", FL_LDSCHED
| FL_STRONG
| FL_FOR_ARCH5TE
| FL_XSCALE
| FL_IWMMXT
, NULL
},
885 {"iwmmxt2", iwmmxt2
, "5TE", FL_LDSCHED
| FL_STRONG
| FL_FOR_ARCH5TE
| FL_XSCALE
| FL_IWMMXT
, NULL
},
886 {NULL
, arm_none
, NULL
, 0 , NULL
}
890 /* These are populated as commandline arguments are processed, or NULL
892 static const struct processors
*arm_selected_arch
;
893 static const struct processors
*arm_selected_cpu
;
894 static const struct processors
*arm_selected_tune
;
896 /* The name of the preprocessor macro to define for this architecture. */
898 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
900 /* Available values for -mfpu=. */
902 static const struct arm_fpu_desc all_fpus
[] =
904 {"fpa", ARM_FP_MODEL_FPA
, 0, VFP_NONE
, false, false},
905 {"fpe2", ARM_FP_MODEL_FPA
, 2, VFP_NONE
, false, false},
906 {"fpe3", ARM_FP_MODEL_FPA
, 3, VFP_NONE
, false, false},
907 {"maverick", ARM_FP_MODEL_MAVERICK
, 0, VFP_NONE
, false, false},
908 {"vfp", ARM_FP_MODEL_VFP
, 2, VFP_REG_D16
, false, false},
909 {"vfpv3", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, false},
910 {"vfpv3-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, true},
911 {"vfpv3-d16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D16
, false, false},
912 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D16
, false, true},
913 {"vfpv3xd", ARM_FP_MODEL_VFP
, 3, VFP_REG_SINGLE
, false, false},
914 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_SINGLE
, false, true},
915 {"neon", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, true , false},
916 {"neon-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, true , true },
917 {"vfpv4", ARM_FP_MODEL_VFP
, 4, VFP_REG_D32
, false, true},
918 {"vfpv4-d16", ARM_FP_MODEL_VFP
, 4, VFP_REG_D16
, false, true},
919 {"fpv4-sp-d16", ARM_FP_MODEL_VFP
, 4, VFP_REG_SINGLE
, false, true},
920 {"neon-vfpv4", ARM_FP_MODEL_VFP
, 4, VFP_REG_D32
, true, true},
921 /* Compatibility aliases. */
922 {"vfp3", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, false},
929 enum float_abi_type abi_type
;
933 /* Available values for -mfloat-abi=. */
935 static const struct float_abi all_float_abis
[] =
937 {"soft", ARM_FLOAT_ABI_SOFT
},
938 {"softfp", ARM_FLOAT_ABI_SOFTFP
},
939 {"hard", ARM_FLOAT_ABI_HARD
}
946 enum arm_fp16_format_type fp16_format_type
;
950 /* Available values for -mfp16-format=. */
952 static const struct fp16_format all_fp16_formats
[] =
954 {"none", ARM_FP16_FORMAT_NONE
},
955 {"ieee", ARM_FP16_FORMAT_IEEE
},
956 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE
}
963 enum arm_abi_type abi_type
;
967 /* Available values for -mabi=. */
969 static const struct abi_name arm_all_abis
[] =
971 {"apcs-gnu", ARM_ABI_APCS
},
972 {"atpcs", ARM_ABI_ATPCS
},
973 {"aapcs", ARM_ABI_AAPCS
},
974 {"iwmmxt", ARM_ABI_IWMMXT
},
975 {"aapcs-linux", ARM_ABI_AAPCS_LINUX
}
978 /* Supported TLS relocations. */
988 /* The maximum number of insns to be used when loading a constant. */
990 arm_constant_limit (bool size_p
)
992 return size_p
? 1 : current_tune
->constant_limit
;
995 /* Emit an insn that's a simple single-set. Both the operands must be known
998 emit_set_insn (rtx x
, rtx y
)
1000 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1003 /* Return the number of bits set in VALUE. */
1005 bit_count (unsigned long value
)
1007 unsigned long count
= 0;
1012 value
&= value
- 1; /* Clear the least-significant set bit. */
1018 /* Set up library functions unique to ARM. */
1021 arm_init_libfuncs (void)
1023 /* There are no special library functions unless we are using the
1028 /* The functions below are described in Section 4 of the "Run-Time
1029 ABI for the ARM architecture", Version 1.0. */
1031 /* Double-precision floating-point arithmetic. Table 2. */
1032 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1033 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1034 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1035 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1036 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1038 /* Double-precision comparisons. Table 3. */
1039 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1040 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1041 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1042 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1043 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1044 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1045 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1047 /* Single-precision floating-point arithmetic. Table 4. */
1048 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1049 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1050 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1051 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1052 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1054 /* Single-precision comparisons. Table 5. */
1055 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1056 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1057 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1058 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1059 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1060 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1061 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1063 /* Floating-point to integer conversions. Table 6. */
1064 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1065 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1066 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1067 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1068 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1069 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1070 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1071 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1073 /* Conversions between floating types. Table 7. */
1074 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1075 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1077 /* Integer to floating-point conversions. Table 8. */
1078 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1079 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1080 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1081 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1082 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1083 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1084 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1085 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1087 /* Long long. Table 9. */
1088 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1089 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1090 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1091 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1092 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1093 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1094 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1095 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1097 /* Integer (32/32->32) division. \S 4.3.1. */
1098 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1099 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1101 /* The divmod functions are designed so that they can be used for
1102 plain division, even though they return both the quotient and the
1103 remainder. The quotient is returned in the usual location (i.e.,
1104 r0 for SImode, {r0, r1} for DImode), just as would be expected
1105 for an ordinary division routine. Because the AAPCS calling
1106 conventions specify that all of { r0, r1, r2, r3 } are
1107 callee-saved registers, there is no need to tell the compiler
1108 explicitly that those registers are clobbered by these
1110 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1111 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1113 /* For SImode division the ABI provides div-without-mod routines,
1114 which are faster. */
1115 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1116 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1118 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1119 divmod libcalls instead. */
1120 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1121 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1122 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1123 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1125 /* Half-precision float operations. The compiler handles all operations
1126 with NULL libfuncs by converting the SFmode. */
1127 switch (arm_fp16_format
)
1129 case ARM_FP16_FORMAT_IEEE
:
1130 case ARM_FP16_FORMAT_ALTERNATIVE
:
1133 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1134 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1136 : "__gnu_f2h_alternative"));
1137 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
1138 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1140 : "__gnu_h2f_alternative"));
1143 set_optab_libfunc (add_optab
, HFmode
, NULL
);
1144 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
1145 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
1146 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
1147 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
1150 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
1151 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
1152 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
1153 set_optab_libfunc (le_optab
, HFmode
, NULL
);
1154 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
1155 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
1156 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
1163 if (TARGET_AAPCS_BASED
)
1164 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
1167 /* On AAPCS systems, this is the "struct __va_list". */
1168 static GTY(()) tree va_list_type
;
1170 /* Return the type to use as __builtin_va_list. */
1172 arm_build_builtin_va_list (void)
1177 if (!TARGET_AAPCS_BASED
)
1178 return std_build_builtin_va_list ();
1180 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1188 The C Library ABI further reinforces this definition in \S
1191 We must follow this definition exactly. The structure tag
1192 name is visible in C++ mangled names, and thus forms a part
1193 of the ABI. The field name may be used by people who
1194 #include <stdarg.h>. */
1195 /* Create the type. */
1196 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1197 /* Give it the required name. */
1198 va_list_name
= build_decl (BUILTINS_LOCATION
,
1200 get_identifier ("__va_list"),
1202 DECL_ARTIFICIAL (va_list_name
) = 1;
1203 TYPE_NAME (va_list_type
) = va_list_name
;
1204 /* Create the __ap field. */
1205 ap_field
= build_decl (BUILTINS_LOCATION
,
1207 get_identifier ("__ap"),
1209 DECL_ARTIFICIAL (ap_field
) = 1;
1210 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
1211 TYPE_FIELDS (va_list_type
) = ap_field
;
1212 /* Compute its layout. */
1213 layout_type (va_list_type
);
1215 return va_list_type
;
1218 /* Return an expression of type "void *" pointing to the next
1219 available argument in a variable-argument list. VALIST is the
1220 user-level va_list object, of type __builtin_va_list. */
1222 arm_extract_valist_ptr (tree valist
)
1224 if (TREE_TYPE (valist
) == error_mark_node
)
1225 return error_mark_node
;
1227 /* On an AAPCS target, the pointer is stored within "struct
1229 if (TARGET_AAPCS_BASED
)
1231 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
1232 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
1233 valist
, ap_field
, NULL_TREE
);
1239 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1241 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
1243 valist
= arm_extract_valist_ptr (valist
);
1244 std_expand_builtin_va_start (valist
, nextarg
);
1247 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1249 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
1252 valist
= arm_extract_valist_ptr (valist
);
1253 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
1256 /* Lookup NAME in SEL. */
1258 static const struct processors
*
1259 arm_find_cpu (const char *name
, const struct processors
*sel
, const char *desc
)
1261 if (!(name
&& *name
))
1264 for (; sel
->name
!= NULL
; sel
++)
1266 if (streq (name
, sel
->name
))
1270 error ("bad value (%s) for %s switch", name
, desc
);
1274 /* Implement TARGET_HANDLE_OPTION. */
1277 arm_handle_option (size_t code
, const char *arg
, int value ATTRIBUTE_UNUSED
)
1282 arm_selected_arch
= arm_find_cpu(arg
, all_architectures
, "-march");
1286 arm_selected_cpu
= arm_find_cpu(arg
, all_cores
, "-mcpu");
1289 case OPT_mhard_float
:
1290 target_float_abi_name
= "hard";
1293 case OPT_msoft_float
:
1294 target_float_abi_name
= "soft";
1298 arm_selected_tune
= arm_find_cpu(arg
, all_cores
, "-mtune");
1307 arm_target_help (void)
1310 static int columns
= 0;
1313 /* If we have not done so already, obtain the desired maximum width of
1314 the output. Note - this is a duplication of the code at the start of
1315 gcc/opts.c:print_specific_help() - the two copies should probably be
1316 replaced by a single function. */
1321 GET_ENVIRONMENT (p
, "COLUMNS");
1324 int value
= atoi (p
);
1331 /* Use a reasonable default. */
1335 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1337 /* The - 2 is because we know that the last entry in the array is NULL. */
1338 i
= ARRAY_SIZE (all_cores
) - 2;
1340 printf (" %s", all_cores
[i
].name
);
1341 remaining
= columns
- (strlen (all_cores
[i
].name
) + 4);
1342 gcc_assert (remaining
>= 0);
1346 int len
= strlen (all_cores
[i
].name
);
1348 if (remaining
> len
+ 2)
1350 printf (", %s", all_cores
[i
].name
);
1351 remaining
-= len
+ 2;
1357 printf ("\n %s", all_cores
[i
].name
);
1358 remaining
= columns
- (len
+ 4);
1362 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1364 i
= ARRAY_SIZE (all_architectures
) - 2;
1367 printf (" %s", all_architectures
[i
].name
);
1368 remaining
= columns
- (strlen (all_architectures
[i
].name
) + 4);
1369 gcc_assert (remaining
>= 0);
1373 int len
= strlen (all_architectures
[i
].name
);
1375 if (remaining
> len
+ 2)
1377 printf (", %s", all_architectures
[i
].name
);
1378 remaining
-= len
+ 2;
1384 printf ("\n %s", all_architectures
[i
].name
);
1385 remaining
= columns
- (len
+ 4);
1392 /* Fix up any incompatible options that the user has specified. */
1394 arm_option_override (void)
1398 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1399 SUBTARGET_OVERRIDE_OPTIONS
;
1402 if (arm_selected_arch
)
1404 if (arm_selected_cpu
)
1406 /* Check for conflict between mcpu and march. */
1407 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
1409 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1410 arm_selected_cpu
->name
, arm_selected_arch
->name
);
1411 /* -march wins for code generation.
1412 -mcpu wins for default tuning. */
1413 if (!arm_selected_tune
)
1414 arm_selected_tune
= arm_selected_cpu
;
1416 arm_selected_cpu
= arm_selected_arch
;
1420 arm_selected_arch
= NULL
;
1423 /* Pick a CPU based on the architecture. */
1424 arm_selected_cpu
= arm_selected_arch
;
1427 /* If the user did not specify a processor, choose one for them. */
1428 if (!arm_selected_cpu
)
1430 const struct processors
* sel
;
1431 unsigned int sought
;
1433 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
1434 if (!arm_selected_cpu
->name
)
1436 #ifdef SUBTARGET_CPU_DEFAULT
1437 /* Use the subtarget default CPU if none was specified by
1439 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
1441 /* Default to ARM6. */
1442 if (!arm_selected_cpu
->name
)
1443 arm_selected_cpu
= &all_cores
[arm6
];
1446 sel
= arm_selected_cpu
;
1447 insn_flags
= sel
->flags
;
1449 /* Now check to see if the user has specified some command line
1450 switch that require certain abilities from the cpu. */
1453 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1455 sought
|= (FL_THUMB
| FL_MODE32
);
1457 /* There are no ARM processors that support both APCS-26 and
1458 interworking. Therefore we force FL_MODE26 to be removed
1459 from insn_flags here (if it was set), so that the search
1460 below will always be able to find a compatible processor. */
1461 insn_flags
&= ~FL_MODE26
;
1464 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1466 /* Try to locate a CPU type that supports all of the abilities
1467 of the default CPU, plus the extra abilities requested by
1469 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1470 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1473 if (sel
->name
== NULL
)
1475 unsigned current_bit_count
= 0;
1476 const struct processors
* best_fit
= NULL
;
1478 /* Ideally we would like to issue an error message here
1479 saying that it was not possible to find a CPU compatible
1480 with the default CPU, but which also supports the command
1481 line options specified by the programmer, and so they
1482 ought to use the -mcpu=<name> command line option to
1483 override the default CPU type.
1485 If we cannot find a cpu that has both the
1486 characteristics of the default cpu and the given
1487 command line options we scan the array again looking
1488 for a best match. */
1489 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1490 if ((sel
->flags
& sought
) == sought
)
1494 count
= bit_count (sel
->flags
& insn_flags
);
1496 if (count
>= current_bit_count
)
1499 current_bit_count
= count
;
1503 gcc_assert (best_fit
);
1507 arm_selected_cpu
= sel
;
1511 gcc_assert (arm_selected_cpu
);
1512 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1513 if (!arm_selected_tune
)
1514 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
1516 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
1517 insn_flags
= arm_selected_cpu
->flags
;
1519 arm_tune
= arm_selected_tune
->core
;
1520 tune_flags
= arm_selected_tune
->flags
;
1521 current_tune
= arm_selected_tune
->tune
;
1523 if (target_fp16_format_name
)
1525 for (i
= 0; i
< ARRAY_SIZE (all_fp16_formats
); i
++)
1527 if (streq (all_fp16_formats
[i
].name
, target_fp16_format_name
))
1529 arm_fp16_format
= all_fp16_formats
[i
].fp16_format_type
;
1533 if (i
== ARRAY_SIZE (all_fp16_formats
))
1534 error ("invalid __fp16 format option: -mfp16-format=%s",
1535 target_fp16_format_name
);
1538 arm_fp16_format
= ARM_FP16_FORMAT_NONE
;
1540 if (target_abi_name
)
1542 for (i
= 0; i
< ARRAY_SIZE (arm_all_abis
); i
++)
1544 if (streq (arm_all_abis
[i
].name
, target_abi_name
))
1546 arm_abi
= arm_all_abis
[i
].abi_type
;
1550 if (i
== ARRAY_SIZE (arm_all_abis
))
1551 error ("invalid ABI option: -mabi=%s", target_abi_name
);
1554 arm_abi
= ARM_DEFAULT_ABI
;
1556 /* Make sure that the processor choice does not conflict with any of the
1557 other command line choices. */
1558 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
1559 error ("target CPU does not support ARM mode");
1561 /* BPABI targets use linker tricks to allow interworking on cores
1562 without thumb support. */
1563 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
1565 warning (0, "target CPU does not support interworking" );
1566 target_flags
&= ~MASK_INTERWORK
;
1569 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
1571 warning (0, "target CPU does not support THUMB instructions");
1572 target_flags
&= ~MASK_THUMB
;
1575 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
1577 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1578 target_flags
&= ~MASK_APCS_FRAME
;
1581 /* Callee super interworking implies thumb interworking. Adding
1582 this to the flags here simplifies the logic elsewhere. */
1583 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
1584 target_flags
|= MASK_INTERWORK
;
1586 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1587 from here where no function is being compiled currently. */
1588 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
1589 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1591 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
1592 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1594 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
1596 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1597 target_flags
|= MASK_APCS_FRAME
;
1600 if (TARGET_POKE_FUNCTION_NAME
)
1601 target_flags
|= MASK_APCS_FRAME
;
1603 if (TARGET_APCS_REENT
&& flag_pic
)
1604 error ("-fpic and -mapcs-reent are incompatible");
1606 if (TARGET_APCS_REENT
)
1607 warning (0, "APCS reentrant code not supported. Ignored");
1609 /* If this target is normally configured to use APCS frames, warn if they
1610 are turned off and debugging is turned on. */
1612 && write_symbols
!= NO_DEBUG
1613 && !TARGET_APCS_FRAME
1614 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
1615 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1617 if (TARGET_APCS_FLOAT
)
1618 warning (0, "passing floating point arguments in fp regs not yet supported");
1620 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1621 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
1622 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
1623 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
1624 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
1625 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
1626 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
1627 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
1628 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
1629 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
1630 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
1631 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
1632 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
1633 arm_arch_cirrus
= (insn_flags
& FL_CIRRUS
) != 0;
1635 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
1636 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
1637 thumb_code
= TARGET_ARM
== 0;
1638 thumb1_code
= TARGET_THUMB1
!= 0;
1639 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
1640 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
1641 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
1642 arm_arch_hwdiv
= (insn_flags
& FL_DIV
) != 0;
1643 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
1645 /* If we are not using the default (ARM mode) section anchor offset
1646 ranges, then set the correct ranges now. */
1649 /* Thumb-1 LDR instructions cannot have negative offsets.
1650 Permissible positive offset ranges are 5-bit (for byte loads),
1651 6-bit (for halfword loads), or 7-bit (for word loads).
1652 Empirical results suggest a 7-bit anchor range gives the best
1653 overall code size. */
1654 targetm
.min_anchor_offset
= 0;
1655 targetm
.max_anchor_offset
= 127;
1657 else if (TARGET_THUMB2
)
1659 /* The minimum is set such that the total size of the block
1660 for a particular anchor is 248 + 1 + 4095 bytes, which is
1661 divisible by eight, ensuring natural spacing of anchors. */
1662 targetm
.min_anchor_offset
= -248;
1663 targetm
.max_anchor_offset
= 4095;
1666 /* V5 code we generate is completely interworking capable, so we turn off
1667 TARGET_INTERWORK here to avoid many tests later on. */
1669 /* XXX However, we must pass the right pre-processor defines to CPP
1670 or GLD can get confused. This is a hack. */
1671 if (TARGET_INTERWORK
)
1672 arm_cpp_interwork
= 1;
1675 target_flags
&= ~MASK_INTERWORK
;
1677 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
1678 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1680 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
1681 error ("iwmmxt abi requires an iwmmxt capable cpu");
1683 if (target_fpu_name
== NULL
&& target_fpe_name
!= NULL
)
1685 if (streq (target_fpe_name
, "2"))
1686 target_fpu_name
= "fpe2";
1687 else if (streq (target_fpe_name
, "3"))
1688 target_fpu_name
= "fpe3";
1690 error ("invalid floating point emulation option: -mfpe=%s",
1694 if (target_fpu_name
== NULL
)
1696 #ifdef FPUTYPE_DEFAULT
1697 target_fpu_name
= FPUTYPE_DEFAULT
;
1699 if (arm_arch_cirrus
)
1700 target_fpu_name
= "maverick";
1702 target_fpu_name
= "fpe2";
1706 arm_fpu_desc
= NULL
;
1707 for (i
= 0; i
< ARRAY_SIZE (all_fpus
); i
++)
1709 if (streq (all_fpus
[i
].name
, target_fpu_name
))
1711 arm_fpu_desc
= &all_fpus
[i
];
1718 error ("invalid floating point option: -mfpu=%s", target_fpu_name
);
1722 switch (arm_fpu_desc
->model
)
1724 case ARM_FP_MODEL_FPA
:
1725 if (arm_fpu_desc
->rev
== 2)
1726 arm_fpu_attr
= FPU_FPE2
;
1727 else if (arm_fpu_desc
->rev
== 3)
1728 arm_fpu_attr
= FPU_FPE3
;
1730 arm_fpu_attr
= FPU_FPA
;
1733 case ARM_FP_MODEL_MAVERICK
:
1734 arm_fpu_attr
= FPU_MAVERICK
;
1737 case ARM_FP_MODEL_VFP
:
1738 arm_fpu_attr
= FPU_VFP
;
1745 if (target_float_abi_name
!= NULL
)
1747 /* The user specified a FP ABI. */
1748 for (i
= 0; i
< ARRAY_SIZE (all_float_abis
); i
++)
1750 if (streq (all_float_abis
[i
].name
, target_float_abi_name
))
1752 arm_float_abi
= all_float_abis
[i
].abi_type
;
1756 if (i
== ARRAY_SIZE (all_float_abis
))
1757 error ("invalid floating point abi: -mfloat-abi=%s",
1758 target_float_abi_name
);
1761 arm_float_abi
= TARGET_DEFAULT_FLOAT_ABI
;
1763 if (TARGET_AAPCS_BASED
1764 && (arm_fpu_desc
->model
== ARM_FP_MODEL_FPA
))
1765 error ("FPA is unsupported in the AAPCS");
1767 if (TARGET_AAPCS_BASED
)
1769 if (TARGET_CALLER_INTERWORKING
)
1770 error ("AAPCS does not support -mcaller-super-interworking");
1772 if (TARGET_CALLEE_INTERWORKING
)
1773 error ("AAPCS does not support -mcallee-super-interworking");
1776 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1777 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1778 will ever exist. GCC makes no attempt to support this combination. */
1779 if (TARGET_IWMMXT
&& !TARGET_SOFT_FLOAT
)
1780 sorry ("iWMMXt and hardware floating point");
1782 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1783 if (TARGET_THUMB2
&& TARGET_IWMMXT
)
1784 sorry ("Thumb-2 iWMMXt");
1786 /* __fp16 support currently assumes the core has ldrh. */
1787 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
1788 sorry ("__fp16 and no ldrh");
1790 /* If soft-float is specified then don't use FPU. */
1791 if (TARGET_SOFT_FLOAT
)
1792 arm_fpu_attr
= FPU_NONE
;
1794 if (TARGET_AAPCS_BASED
)
1796 if (arm_abi
== ARM_ABI_IWMMXT
)
1797 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
1798 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
1799 && TARGET_HARD_FLOAT
1801 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
1803 arm_pcs_default
= ARM_PCS_AAPCS
;
1807 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
1808 sorry ("-mfloat-abi=hard and VFP");
1810 if (arm_abi
== ARM_ABI_APCS
)
1811 arm_pcs_default
= ARM_PCS_APCS
;
1813 arm_pcs_default
= ARM_PCS_ATPCS
;
1816 /* For arm2/3 there is no need to do any scheduling if there is only
1817 a floating point emulator, or we are doing software floating-point. */
1818 if ((TARGET_SOFT_FLOAT
1819 || (TARGET_FPA
&& arm_fpu_desc
->rev
))
1820 && (tune_flags
& FL_MODE32
) == 0)
1821 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
1823 if (target_thread_switch
)
1825 if (strcmp (target_thread_switch
, "soft") == 0)
1826 target_thread_pointer
= TP_SOFT
;
1827 else if (strcmp (target_thread_switch
, "auto") == 0)
1828 target_thread_pointer
= TP_AUTO
;
1829 else if (strcmp (target_thread_switch
, "cp15") == 0)
1830 target_thread_pointer
= TP_CP15
;
1832 error ("invalid thread pointer option: -mtp=%s", target_thread_switch
);
1835 /* Use the cp15 method if it is available. */
1836 if (target_thread_pointer
== TP_AUTO
)
1838 if (arm_arch6k
&& !TARGET_THUMB1
)
1839 target_thread_pointer
= TP_CP15
;
1841 target_thread_pointer
= TP_SOFT
;
1844 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
1845 error ("can not use -mtp=cp15 with 16-bit Thumb");
1847 /* Override the default structure alignment for AAPCS ABI. */
1848 if (TARGET_AAPCS_BASED
)
1849 arm_structure_size_boundary
= 8;
1851 if (structure_size_string
!= NULL
)
1853 int size
= strtol (structure_size_string
, NULL
, 0);
1855 if (size
== 8 || size
== 32
1856 || (ARM_DOUBLEWORD_ALIGN
&& size
== 64))
1857 arm_structure_size_boundary
= size
;
1859 warning (0, "structure size boundary can only be set to %s",
1860 ARM_DOUBLEWORD_ALIGN
? "8, 32 or 64": "8 or 32");
1863 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
1865 error ("RTP PIC is incompatible with Thumb");
1869 /* If stack checking is disabled, we can use r10 as the PIC register,
1870 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1871 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
1873 if (TARGET_VXWORKS_RTP
)
1874 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1875 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
1878 if (flag_pic
&& TARGET_VXWORKS_RTP
)
1879 arm_pic_register
= 9;
1881 if (arm_pic_register_string
!= NULL
)
1883 int pic_register
= decode_reg_name (arm_pic_register_string
);
1886 warning (0, "-mpic-register= is useless without -fpic");
1888 /* Prevent the user from choosing an obviously stupid PIC register. */
1889 else if (pic_register
< 0 || call_used_regs
[pic_register
]
1890 || pic_register
== HARD_FRAME_POINTER_REGNUM
1891 || pic_register
== STACK_POINTER_REGNUM
1892 || pic_register
>= PC_REGNUM
1893 || (TARGET_VXWORKS_RTP
1894 && (unsigned int) pic_register
!= arm_pic_register
))
1895 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
1897 arm_pic_register
= pic_register
;
1900 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1901 if (fix_cm3_ldrd
== 2)
1903 if (arm_selected_cpu
->core
== cortexm3
)
1909 if (TARGET_THUMB1
&& flag_schedule_insns
)
1911 /* Don't warn since it's on by default in -O2. */
1912 flag_schedule_insns
= 0;
1917 /* If optimizing for size, bump the number of instructions that we
1918 are prepared to conditionally execute (even on a StrongARM). */
1919 max_insns_skipped
= 6;
1923 /* StrongARM has early execution of branches, so a sequence
1924 that is worth skipping is shorter. */
1925 if (arm_tune_strongarm
)
1926 max_insns_skipped
= 3;
1929 /* Hot/Cold partitioning is not currently supported, since we can't
1930 handle literal pool placement in that case. */
1931 if (flag_reorder_blocks_and_partition
)
1933 inform (input_location
,
1934 "-freorder-blocks-and-partition not supported on this architecture");
1935 flag_reorder_blocks_and_partition
= 0;
1936 flag_reorder_blocks
= 1;
1939 if (!PARAM_SET_P (PARAM_GCSE_UNRESTRICTED_COST
)
1941 /* Hoisting PIC address calculations more aggressively provides a small,
1942 but measurable, size reduction for PIC code. Therefore, we decrease
1943 the bar for unrestricted expression hoisting to the cost of PIC address
1944 calculation, which is 2 instructions. */
1945 set_param_value ("gcse-unrestricted-cost", 2);
1947 /* Register global variables with the garbage collector. */
1948 arm_add_gc_roots ();
1952 arm_add_gc_roots (void)
1954 gcc_obstack_init(&minipool_obstack
);
1955 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
1958 /* A table of known ARM exception types.
1959 For use with the interrupt function attribute. */
1963 const char *const arg
;
1964 const unsigned long return_value
;
1968 static const isr_attribute_arg isr_attribute_args
[] =
1970 { "IRQ", ARM_FT_ISR
},
1971 { "irq", ARM_FT_ISR
},
1972 { "FIQ", ARM_FT_FIQ
},
1973 { "fiq", ARM_FT_FIQ
},
1974 { "ABORT", ARM_FT_ISR
},
1975 { "abort", ARM_FT_ISR
},
1976 { "ABORT", ARM_FT_ISR
},
1977 { "abort", ARM_FT_ISR
},
1978 { "UNDEF", ARM_FT_EXCEPTION
},
1979 { "undef", ARM_FT_EXCEPTION
},
1980 { "SWI", ARM_FT_EXCEPTION
},
1981 { "swi", ARM_FT_EXCEPTION
},
1982 { NULL
, ARM_FT_NORMAL
}
1985 /* Returns the (interrupt) function type of the current
1986 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1988 static unsigned long
1989 arm_isr_value (tree argument
)
1991 const isr_attribute_arg
* ptr
;
1995 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
1997 /* No argument - default to IRQ. */
1998 if (argument
== NULL_TREE
)
2001 /* Get the value of the argument. */
2002 if (TREE_VALUE (argument
) == NULL_TREE
2003 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2004 return ARM_FT_UNKNOWN
;
2006 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2008 /* Check it against the list of known arguments. */
2009 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2010 if (streq (arg
, ptr
->arg
))
2011 return ptr
->return_value
;
2013 /* An unrecognized interrupt type. */
2014 return ARM_FT_UNKNOWN
;
2017 /* Computes the type of the current function. */
2019 static unsigned long
2020 arm_compute_func_type (void)
2022 unsigned long type
= ARM_FT_UNKNOWN
;
2026 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2028 /* Decide if the current function is volatile. Such functions
2029 never return, and many memory cycles can be saved by not storing
2030 register values that will never be needed again. This optimization
2031 was added to speed up context switching in a kernel application. */
2033 && (TREE_NOTHROW (current_function_decl
)
2034 || !(flag_unwind_tables
2035 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
)))
2036 && TREE_THIS_VOLATILE (current_function_decl
))
2037 type
|= ARM_FT_VOLATILE
;
2039 if (cfun
->static_chain_decl
!= NULL
)
2040 type
|= ARM_FT_NESTED
;
2042 attr
= DECL_ATTRIBUTES (current_function_decl
);
2044 a
= lookup_attribute ("naked", attr
);
2046 type
|= ARM_FT_NAKED
;
2048 a
= lookup_attribute ("isr", attr
);
2050 a
= lookup_attribute ("interrupt", attr
);
2053 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2055 type
|= arm_isr_value (TREE_VALUE (a
));
2060 /* Returns the type of the current function. */
2063 arm_current_func_type (void)
2065 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2066 cfun
->machine
->func_type
= arm_compute_func_type ();
2068 return cfun
->machine
->func_type
;
2072 arm_allocate_stack_slots_for_args (void)
2074 /* Naked functions should not allocate stack slots for arguments. */
2075 return !IS_NAKED (arm_current_func_type ());
2079 /* Output assembler code for a block containing the constant parts
2080 of a trampoline, leaving space for the variable parts.
2082 On the ARM, (if r8 is the static chain regnum, and remembering that
2083 referencing pc adds an offset of 8) the trampoline looks like:
2086 .word static chain value
2087 .word function's address
2088 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2091 arm_asm_trampoline_template (FILE *f
)
2095 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2096 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2098 else if (TARGET_THUMB2
)
2100 /* The Thumb-2 trampoline is similar to the arm implementation.
2101 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2102 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2103 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2104 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2108 ASM_OUTPUT_ALIGN (f
, 2);
2109 fprintf (f
, "\t.code\t16\n");
2110 fprintf (f
, ".Ltrampoline_start:\n");
2111 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2112 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2113 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2114 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2115 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2116 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2118 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2119 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2122 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2125 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
2127 rtx fnaddr
, mem
, a_tramp
;
2129 emit_block_move (m_tramp
, assemble_trampoline_template (),
2130 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
2132 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
2133 emit_move_insn (mem
, chain_value
);
2135 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
2136 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
2137 emit_move_insn (mem
, fnaddr
);
2139 a_tramp
= XEXP (m_tramp
, 0);
2140 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
2141 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
2142 plus_constant (a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
2145 /* Thumb trampolines should be entered in thumb mode, so set
2146 the bottom bit of the address. */
2149 arm_trampoline_adjust_address (rtx addr
)
2152 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
2153 NULL
, 0, OPTAB_LIB_WIDEN
);
2157 /* Return 1 if it is possible to return using a single instruction.
2158 If SIBLING is non-null, this is a test for a return before a sibling
2159 call. SIBLING is the call insn, so we can examine its register usage. */
2162 use_return_insn (int iscond
, rtx sibling
)
2165 unsigned int func_type
;
2166 unsigned long saved_int_regs
;
2167 unsigned HOST_WIDE_INT stack_adjust
;
2168 arm_stack_offsets
*offsets
;
2170 /* Never use a return instruction before reload has run. */
2171 if (!reload_completed
)
2174 func_type
= arm_current_func_type ();
2176 /* Naked, volatile and stack alignment functions need special
2178 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
2181 /* So do interrupt functions that use the frame pointer and Thumb
2182 interrupt functions. */
2183 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
2186 offsets
= arm_get_frame_offsets ();
2187 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
2189 /* As do variadic functions. */
2190 if (crtl
->args
.pretend_args_size
2191 || cfun
->machine
->uses_anonymous_args
2192 /* Or if the function calls __builtin_eh_return () */
2193 || crtl
->calls_eh_return
2194 /* Or if the function calls alloca */
2195 || cfun
->calls_alloca
2196 /* Or if there is a stack adjustment. However, if the stack pointer
2197 is saved on the stack, we can use a pre-incrementing stack load. */
2198 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
2199 && stack_adjust
== 4)))
2202 saved_int_regs
= offsets
->saved_regs_mask
;
2204 /* Unfortunately, the insn
2206 ldmib sp, {..., sp, ...}
2208 triggers a bug on most SA-110 based devices, such that the stack
2209 pointer won't be correctly restored if the instruction takes a
2210 page fault. We work around this problem by popping r3 along with
2211 the other registers, since that is never slower than executing
2212 another instruction.
2214 We test for !arm_arch5 here, because code for any architecture
2215 less than this could potentially be run on one of the buggy
2217 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
2219 /* Validate that r3 is a call-clobbered register (always true in
2220 the default abi) ... */
2221 if (!call_used_regs
[3])
2224 /* ... that it isn't being used for a return value ... */
2225 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
2228 /* ... or for a tail-call argument ... */
2231 gcc_assert (GET_CODE (sibling
) == CALL_INSN
);
2233 if (find_regno_fusage (sibling
, USE
, 3))
2237 /* ... and that there are no call-saved registers in r0-r2
2238 (always true in the default ABI). */
2239 if (saved_int_regs
& 0x7)
2243 /* Can't be done if interworking with Thumb, and any registers have been
2245 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
2248 /* On StrongARM, conditional returns are expensive if they aren't
2249 taken and multiple registers have been stacked. */
2250 if (iscond
&& arm_tune_strongarm
)
2252 /* Conditional return when just the LR is stored is a simple
2253 conditional-load instruction, that's not expensive. */
2254 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
2258 && arm_pic_register
!= INVALID_REGNUM
2259 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
2263 /* If there are saved registers but the LR isn't saved, then we need
2264 two instructions for the return. */
2265 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
2268 /* Can't be done if any of the FPA regs are pushed,
2269 since this also requires an insn. */
2270 if (TARGET_HARD_FLOAT
&& TARGET_FPA
)
2271 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
2272 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2275 /* Likewise VFP regs. */
2276 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
2277 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
2278 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2281 if (TARGET_REALLY_IWMMXT
)
2282 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
2283 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2289 /* Return TRUE if int I is a valid immediate ARM constant. */
2292 const_ok_for_arm (HOST_WIDE_INT i
)
2296 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2297 be all zero, or all one. */
2298 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
2299 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
2300 != ((~(unsigned HOST_WIDE_INT
) 0)
2301 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
2304 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
2306 /* Fast return for 0 and small values. We must do this for zero, since
2307 the code below can't handle that one case. */
2308 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
2311 /* Get the number of trailing zeros. */
2312 lowbit
= ffs((int) i
) - 1;
2314 /* Only even shifts are allowed in ARM mode so round down to the
2315 nearest even number. */
2319 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
2324 /* Allow rotated constants in ARM mode. */
2326 && ((i
& ~0xc000003f) == 0
2327 || (i
& ~0xf000000f) == 0
2328 || (i
& ~0xfc000003) == 0))
2335 /* Allow repeated pattern. */
2338 if (i
== v
|| i
== (v
| (v
<< 8)))
2345 /* Return true if I is a valid constant for the operation CODE. */
2347 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
2349 if (const_ok_for_arm (i
))
2373 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
2375 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
2381 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2385 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2392 /* Emit a sequence of insns to handle a large constant.
2393 CODE is the code of the operation required, it can be any of SET, PLUS,
2394 IOR, AND, XOR, MINUS;
2395 MODE is the mode in which the operation is being performed;
2396 VAL is the integer to operate on;
2397 SOURCE is the other operand (a register, or a null-pointer for SET);
2398 SUBTARGETS means it is safe to create scratch registers if that will
2399 either produce a simpler sequence, or we will want to cse the values.
2400 Return value is the number of insns emitted. */
2402 /* ??? Tweak this for thumb2. */
2404 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
2405 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
2409 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
2410 cond
= COND_EXEC_TEST (PATTERN (insn
));
2414 if (subtargets
|| code
== SET
2415 || (GET_CODE (target
) == REG
&& GET_CODE (source
) == REG
2416 && REGNO (target
) != REGNO (source
)))
2418 /* After arm_reorg has been called, we can't fix up expensive
2419 constants by pushing them into memory so we must synthesize
2420 them in-line, regardless of the cost. This is only likely to
2421 be more costly on chips that have load delay slots and we are
2422 compiling without running the scheduler (so no splitting
2423 occurred before the final instruction emission).
2425 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2427 if (!after_arm_reorg
2429 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
2431 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
2436 /* Currently SET is the only monadic value for CODE, all
2437 the rest are diadic. */
2438 if (TARGET_USE_MOVT
)
2439 arm_emit_movpair (target
, GEN_INT (val
));
2441 emit_set_insn (target
, GEN_INT (val
));
2447 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
2449 if (TARGET_USE_MOVT
)
2450 arm_emit_movpair (temp
, GEN_INT (val
));
2452 emit_set_insn (temp
, GEN_INT (val
));
2454 /* For MINUS, the value is subtracted from, since we never
2455 have subtraction of a constant. */
2457 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
2459 emit_set_insn (target
,
2460 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
2466 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
2470 /* Return the number of instructions required to synthesize the given
2471 constant, if we start emitting them from bit-position I. */
2473 count_insns_for_constant (HOST_WIDE_INT remainder
, int i
)
2475 HOST_WIDE_INT temp1
;
2476 int step_size
= TARGET_ARM
? 2 : 1;
2479 gcc_assert (TARGET_ARM
|| i
== 0);
2487 if (remainder
& (((1 << step_size
) - 1) << (i
- step_size
)))
2492 temp1
= remainder
& ((0x0ff << end
)
2493 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
2494 remainder
&= ~temp1
;
2499 } while (remainder
);
2504 find_best_start (unsigned HOST_WIDE_INT remainder
)
2506 int best_consecutive_zeros
= 0;
2510 /* If we aren't targetting ARM, the best place to start is always at
2515 for (i
= 0; i
< 32; i
+= 2)
2517 int consecutive_zeros
= 0;
2519 if (!(remainder
& (3 << i
)))
2521 while ((i
< 32) && !(remainder
& (3 << i
)))
2523 consecutive_zeros
+= 2;
2526 if (consecutive_zeros
> best_consecutive_zeros
)
2528 best_consecutive_zeros
= consecutive_zeros
;
2529 best_start
= i
- consecutive_zeros
;
2535 /* So long as it won't require any more insns to do so, it's
2536 desirable to emit a small constant (in bits 0...9) in the last
2537 insn. This way there is more chance that it can be combined with
2538 a later addressing insn to form a pre-indexed load or store
2539 operation. Consider:
2541 *((volatile int *)0xe0000100) = 1;
2542 *((volatile int *)0xe0000110) = 2;
2544 We want this to wind up as:
2548 str rB, [rA, #0x100]
2550 str rB, [rA, #0x110]
2552 rather than having to synthesize both large constants from scratch.
2554 Therefore, we calculate how many insns would be required to emit
2555 the constant starting from `best_start', and also starting from
2556 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2557 yield a shorter sequence, we may as well use zero. */
2559 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < remainder
)
2560 && (count_insns_for_constant (remainder
, 0) <=
2561 count_insns_for_constant (remainder
, best_start
)))
2567 /* Emit an instruction with the indicated PATTERN. If COND is
2568 non-NULL, conditionalize the execution of the instruction on COND
2572 emit_constant_insn (rtx cond
, rtx pattern
)
2575 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
2576 emit_insn (pattern
);
2579 /* As above, but extra parameter GENERATE which, if clear, suppresses
2581 /* ??? This needs more work for thumb2. */
2584 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
2585 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
2590 int final_invert
= 0;
2591 int can_negate_initial
= 0;
2593 int num_bits_set
= 0;
2594 int set_sign_bit_copies
= 0;
2595 int clear_sign_bit_copies
= 0;
2596 int clear_zero_bit_copies
= 0;
2597 int set_zero_bit_copies
= 0;
2599 unsigned HOST_WIDE_INT temp1
, temp2
;
2600 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
2601 int step_size
= TARGET_ARM
? 2 : 1;
2603 /* Find out which operations are safe for a given CODE. Also do a quick
2604 check for degenerate cases; these can occur when DImode operations
2615 can_negate_initial
= 1;
2619 if (remainder
== 0xffffffff)
2622 emit_constant_insn (cond
,
2623 gen_rtx_SET (VOIDmode
, target
,
2624 GEN_INT (ARM_SIGN_EXTEND (val
))));
2630 if (reload_completed
&& rtx_equal_p (target
, source
))
2634 emit_constant_insn (cond
,
2635 gen_rtx_SET (VOIDmode
, target
, source
));
2647 emit_constant_insn (cond
,
2648 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
2651 if (remainder
== 0xffffffff)
2653 if (reload_completed
&& rtx_equal_p (target
, source
))
2656 emit_constant_insn (cond
,
2657 gen_rtx_SET (VOIDmode
, target
, source
));
2666 if (reload_completed
&& rtx_equal_p (target
, source
))
2669 emit_constant_insn (cond
,
2670 gen_rtx_SET (VOIDmode
, target
, source
));
2674 if (remainder
== 0xffffffff)
2677 emit_constant_insn (cond
,
2678 gen_rtx_SET (VOIDmode
, target
,
2679 gen_rtx_NOT (mode
, source
)));
2685 /* We treat MINUS as (val - source), since (source - val) is always
2686 passed as (source + (-val)). */
2690 emit_constant_insn (cond
,
2691 gen_rtx_SET (VOIDmode
, target
,
2692 gen_rtx_NEG (mode
, source
)));
2695 if (const_ok_for_arm (val
))
2698 emit_constant_insn (cond
,
2699 gen_rtx_SET (VOIDmode
, target
,
2700 gen_rtx_MINUS (mode
, GEN_INT (val
),
2712 /* If we can do it in one insn get out quickly. */
2713 if (const_ok_for_arm (val
)
2714 || (can_negate_initial
&& const_ok_for_arm (-val
))
2715 || (can_invert
&& const_ok_for_arm (~val
)))
2718 emit_constant_insn (cond
,
2719 gen_rtx_SET (VOIDmode
, target
,
2721 ? gen_rtx_fmt_ee (code
, mode
, source
,
2727 /* Calculate a few attributes that may be useful for specific
2729 /* Count number of leading zeros. */
2730 for (i
= 31; i
>= 0; i
--)
2732 if ((remainder
& (1 << i
)) == 0)
2733 clear_sign_bit_copies
++;
2738 /* Count number of leading 1's. */
2739 for (i
= 31; i
>= 0; i
--)
2741 if ((remainder
& (1 << i
)) != 0)
2742 set_sign_bit_copies
++;
2747 /* Count number of trailing zero's. */
2748 for (i
= 0; i
<= 31; i
++)
2750 if ((remainder
& (1 << i
)) == 0)
2751 clear_zero_bit_copies
++;
2756 /* Count number of trailing 1's. */
2757 for (i
= 0; i
<= 31; i
++)
2759 if ((remainder
& (1 << i
)) != 0)
2760 set_zero_bit_copies
++;
2768 /* See if we can use movw. */
2769 if (arm_arch_thumb2
&& (remainder
& 0xffff0000) == 0)
2772 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
2777 /* See if we can do this by sign_extending a constant that is known
2778 to be negative. This is a good, way of doing it, since the shift
2779 may well merge into a subsequent insn. */
2780 if (set_sign_bit_copies
> 1)
2782 if (const_ok_for_arm
2783 (temp1
= ARM_SIGN_EXTEND (remainder
2784 << (set_sign_bit_copies
- 1))))
2788 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2789 emit_constant_insn (cond
,
2790 gen_rtx_SET (VOIDmode
, new_src
,
2792 emit_constant_insn (cond
,
2793 gen_ashrsi3 (target
, new_src
,
2794 GEN_INT (set_sign_bit_copies
- 1)));
2798 /* For an inverted constant, we will need to set the low bits,
2799 these will be shifted out of harm's way. */
2800 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
2801 if (const_ok_for_arm (~temp1
))
2805 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2806 emit_constant_insn (cond
,
2807 gen_rtx_SET (VOIDmode
, new_src
,
2809 emit_constant_insn (cond
,
2810 gen_ashrsi3 (target
, new_src
,
2811 GEN_INT (set_sign_bit_copies
- 1)));
2817 /* See if we can calculate the value as the difference between two
2818 valid immediates. */
2819 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
2821 int topshift
= clear_sign_bit_copies
& ~1;
2823 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
2824 & (0xff000000 >> topshift
));
2826 /* If temp1 is zero, then that means the 9 most significant
2827 bits of remainder were 1 and we've caused it to overflow.
2828 When topshift is 0 we don't need to do anything since we
2829 can borrow from 'bit 32'. */
2830 if (temp1
== 0 && topshift
!= 0)
2831 temp1
= 0x80000000 >> (topshift
- 1);
2833 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
2835 if (const_ok_for_arm (temp2
))
2839 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2840 emit_constant_insn (cond
,
2841 gen_rtx_SET (VOIDmode
, new_src
,
2843 emit_constant_insn (cond
,
2844 gen_addsi3 (target
, new_src
,
2852 /* See if we can generate this by setting the bottom (or the top)
2853 16 bits, and then shifting these into the other half of the
2854 word. We only look for the simplest cases, to do more would cost
2855 too much. Be careful, however, not to generate this when the
2856 alternative would take fewer insns. */
2857 if (val
& 0xffff0000)
2859 temp1
= remainder
& 0xffff0000;
2860 temp2
= remainder
& 0x0000ffff;
2862 /* Overlaps outside this range are best done using other methods. */
2863 for (i
= 9; i
< 24; i
++)
2865 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
2866 && !const_ok_for_arm (temp2
))
2868 rtx new_src
= (subtargets
2869 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2871 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
2872 source
, subtargets
, generate
);
2880 gen_rtx_ASHIFT (mode
, source
,
2887 /* Don't duplicate cases already considered. */
2888 for (i
= 17; i
< 24; i
++)
2890 if (((temp1
| (temp1
>> i
)) == remainder
)
2891 && !const_ok_for_arm (temp1
))
2893 rtx new_src
= (subtargets
2894 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2896 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
2897 source
, subtargets
, generate
);
2902 gen_rtx_SET (VOIDmode
, target
,
2905 gen_rtx_LSHIFTRT (mode
, source
,
2916 /* If we have IOR or XOR, and the constant can be loaded in a
2917 single instruction, and we can find a temporary to put it in,
2918 then this can be done in two instructions instead of 3-4. */
2920 /* TARGET can't be NULL if SUBTARGETS is 0 */
2921 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
2923 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
2927 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2929 emit_constant_insn (cond
,
2930 gen_rtx_SET (VOIDmode
, sub
,
2932 emit_constant_insn (cond
,
2933 gen_rtx_SET (VOIDmode
, target
,
2934 gen_rtx_fmt_ee (code
, mode
,
2945 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2946 and the remainder 0s for e.g. 0xfff00000)
2947 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2949 This can be done in 2 instructions by using shifts with mov or mvn.
2954 mvn r0, r0, lsr #12 */
2955 if (set_sign_bit_copies
> 8
2956 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
2960 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2961 rtx shift
= GEN_INT (set_sign_bit_copies
);
2965 gen_rtx_SET (VOIDmode
, sub
,
2967 gen_rtx_ASHIFT (mode
,
2972 gen_rtx_SET (VOIDmode
, target
,
2974 gen_rtx_LSHIFTRT (mode
, sub
,
2981 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2983 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2985 For eg. r0 = r0 | 0xfff
2990 if (set_zero_bit_copies
> 8
2991 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
2995 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2996 rtx shift
= GEN_INT (set_zero_bit_copies
);
3000 gen_rtx_SET (VOIDmode
, sub
,
3002 gen_rtx_LSHIFTRT (mode
,
3007 gen_rtx_SET (VOIDmode
, target
,
3009 gen_rtx_ASHIFT (mode
, sub
,
3015 /* This will never be reached for Thumb2 because orn is a valid
3016 instruction. This is for Thumb1 and the ARM 32 bit cases.
3018 x = y | constant (such that ~constant is a valid constant)
3020 x = ~(~y & ~constant).
3022 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
3026 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3027 emit_constant_insn (cond
,
3028 gen_rtx_SET (VOIDmode
, sub
,
3029 gen_rtx_NOT (mode
, source
)));
3032 sub
= gen_reg_rtx (mode
);
3033 emit_constant_insn (cond
,
3034 gen_rtx_SET (VOIDmode
, sub
,
3035 gen_rtx_AND (mode
, source
,
3037 emit_constant_insn (cond
,
3038 gen_rtx_SET (VOIDmode
, target
,
3039 gen_rtx_NOT (mode
, sub
)));
3046 /* See if two shifts will do 2 or more insn's worth of work. */
3047 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
3049 HOST_WIDE_INT shift_mask
= ((0xffffffff
3050 << (32 - clear_sign_bit_copies
))
3053 if ((remainder
| shift_mask
) != 0xffffffff)
3057 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3058 insns
= arm_gen_constant (AND
, mode
, cond
,
3059 remainder
| shift_mask
,
3060 new_src
, source
, subtargets
, 1);
3065 rtx targ
= subtargets
? NULL_RTX
: target
;
3066 insns
= arm_gen_constant (AND
, mode
, cond
,
3067 remainder
| shift_mask
,
3068 targ
, source
, subtargets
, 0);
3074 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3075 rtx shift
= GEN_INT (clear_sign_bit_copies
);
3077 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
3078 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
3084 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
3086 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
3088 if ((remainder
| shift_mask
) != 0xffffffff)
3092 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3094 insns
= arm_gen_constant (AND
, mode
, cond
,
3095 remainder
| shift_mask
,
3096 new_src
, source
, subtargets
, 1);
3101 rtx targ
= subtargets
? NULL_RTX
: target
;
3103 insns
= arm_gen_constant (AND
, mode
, cond
,
3104 remainder
| shift_mask
,
3105 targ
, source
, subtargets
, 0);
3111 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3112 rtx shift
= GEN_INT (clear_zero_bit_copies
);
3114 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
3115 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
3127 for (i
= 0; i
< 32; i
++)
3128 if (remainder
& (1 << i
))
3132 || (code
!= IOR
&& can_invert
&& num_bits_set
> 16))
3133 remainder
^= 0xffffffff;
3134 else if (code
== PLUS
&& num_bits_set
> 16)
3135 remainder
= (-remainder
) & 0xffffffff;
3137 /* For XOR, if more than half the bits are set and there's a sequence
3138 of more than 8 consecutive ones in the pattern then we can XOR by the
3139 inverted constant and then invert the final result; this may save an
3140 instruction and might also lead to the final mvn being merged with
3141 some other operation. */
3142 else if (code
== XOR
&& num_bits_set
> 16
3143 && (count_insns_for_constant (remainder
^ 0xffffffff,
3145 (remainder
^ 0xffffffff))
3146 < count_insns_for_constant (remainder
,
3147 find_best_start (remainder
))))
3149 remainder
^= 0xffffffff;
3158 /* Now try and find a way of doing the job in either two or three
3160 We start by looking for the largest block of zeros that are aligned on
3161 a 2-bit boundary, we then fill up the temps, wrapping around to the
3162 top of the word when we drop off the bottom.
3163 In the worst case this code should produce no more than four insns.
3164 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3165 best place to start. */
3167 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3170 /* Now start emitting the insns. */
3171 i
= find_best_start (remainder
);
3178 if (remainder
& (3 << (i
- 2)))
3183 temp1
= remainder
& ((0x0ff << end
)
3184 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
3185 remainder
&= ~temp1
;
3189 rtx new_src
, temp1_rtx
;
3191 if (code
== SET
|| code
== MINUS
)
3193 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
3194 if (can_invert
&& code
!= MINUS
)
3199 if ((final_invert
|| remainder
) && subtargets
)
3200 new_src
= gen_reg_rtx (mode
);
3205 else if (can_negate
)
3209 temp1
= trunc_int_for_mode (temp1
, mode
);
3210 temp1_rtx
= GEN_INT (temp1
);
3214 else if (code
== MINUS
)
3215 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
3217 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
3219 emit_constant_insn (cond
,
3220 gen_rtx_SET (VOIDmode
, new_src
,
3230 else if (code
== MINUS
)
3236 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3246 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
3247 gen_rtx_NOT (mode
, source
)));
3254 /* Canonicalize a comparison so that we are more likely to recognize it.
3255 This can be done for a few constant compares, where we can make the
3256 immediate value easier to load. */
3259 arm_canonicalize_comparison (enum rtx_code code
, rtx
*op0
, rtx
*op1
)
3261 enum machine_mode mode
;
3262 unsigned HOST_WIDE_INT i
, maxval
;
3264 mode
= GET_MODE (*op0
);
3265 if (mode
== VOIDmode
)
3266 mode
= GET_MODE (*op1
);
3268 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
3270 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3271 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3272 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3273 for GTU/LEU in Thumb mode. */
3278 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3280 if (TARGET_ARM
&& TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
3283 if (code
== GT
|| code
== LE
3284 || (!TARGET_ARM
&& (code
== GTU
|| code
== LEU
)))
3286 /* Missing comparison. First try to use an available
3288 if (GET_CODE (*op1
) == CONST_INT
)
3296 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3298 *op1
= GEN_INT (i
+ 1);
3299 return code
== GT
? GE
: LT
;
3304 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3305 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3307 *op1
= GEN_INT (i
+ 1);
3308 return code
== GTU
? GEU
: LTU
;
3316 /* If that did not work, reverse the condition. */
3320 return swap_condition (code
);
3326 /* Comparisons smaller than DImode. Only adjust comparisons against
3327 an out-of-range constant. */
3328 if (GET_CODE (*op1
) != CONST_INT
3329 || const_ok_for_arm (INTVAL (*op1
))
3330 || const_ok_for_arm (- INTVAL (*op1
)))
3344 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3346 *op1
= GEN_INT (i
+ 1);
3347 return code
== GT
? GE
: LT
;
3354 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3356 *op1
= GEN_INT (i
- 1);
3357 return code
== GE
? GT
: LE
;
3363 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3364 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3366 *op1
= GEN_INT (i
+ 1);
3367 return code
== GTU
? GEU
: LTU
;
3374 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3376 *op1
= GEN_INT (i
- 1);
3377 return code
== GEU
? GTU
: LEU
;
3389 /* Define how to find the value returned by a function. */
3392 arm_function_value(const_tree type
, const_tree func
,
3393 bool outgoing ATTRIBUTE_UNUSED
)
3395 enum machine_mode mode
;
3396 int unsignedp ATTRIBUTE_UNUSED
;
3397 rtx r ATTRIBUTE_UNUSED
;
3399 mode
= TYPE_MODE (type
);
3401 if (TARGET_AAPCS_BASED
)
3402 return aapcs_allocate_return_reg (mode
, type
, func
);
3404 /* Promote integer types. */
3405 if (INTEGRAL_TYPE_P (type
))
3406 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
3408 /* Promotes small structs returned in a register to full-word size
3409 for big-endian AAPCS. */
3410 if (arm_return_in_msb (type
))
3412 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3413 if (size
% UNITS_PER_WORD
!= 0)
3415 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
3416 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
3420 return LIBCALL_VALUE (mode
);
3424 libcall_eq (const void *p1
, const void *p2
)
3426 return rtx_equal_p ((const_rtx
) p1
, (const_rtx
) p2
);
3430 libcall_hash (const void *p1
)
3432 return hash_rtx ((const_rtx
) p1
, VOIDmode
, NULL
, NULL
, FALSE
);
3436 add_libcall (htab_t htab
, rtx libcall
)
3438 *htab_find_slot (htab
, libcall
, INSERT
) = libcall
;
3442 arm_libcall_uses_aapcs_base (const_rtx libcall
)
3444 static bool init_done
= false;
3445 static htab_t libcall_htab
;
3451 libcall_htab
= htab_create (31, libcall_hash
, libcall_eq
,
3453 add_libcall (libcall_htab
,
3454 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
3455 add_libcall (libcall_htab
,
3456 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
3457 add_libcall (libcall_htab
,
3458 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
3459 add_libcall (libcall_htab
,
3460 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
3462 add_libcall (libcall_htab
,
3463 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
3464 add_libcall (libcall_htab
,
3465 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
3466 add_libcall (libcall_htab
,
3467 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
3468 add_libcall (libcall_htab
,
3469 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
3471 add_libcall (libcall_htab
,
3472 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
3473 add_libcall (libcall_htab
,
3474 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
3475 add_libcall (libcall_htab
,
3476 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
3477 add_libcall (libcall_htab
,
3478 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
3479 add_libcall (libcall_htab
,
3480 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
3481 add_libcall (libcall_htab
,
3482 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
3485 return libcall
&& htab_find (libcall_htab
, libcall
) != NULL
;
3489 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
3491 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
3492 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
3494 /* The following libcalls return their result in integer registers,
3495 even though they return a floating point value. */
3496 if (arm_libcall_uses_aapcs_base (libcall
))
3497 return gen_rtx_REG (mode
, ARG_REGISTER(1));
3501 return LIBCALL_VALUE (mode
);
3504 /* Determine the amount of memory needed to store the possible return
3505 registers of an untyped call. */
3507 arm_apply_result_size (void)
3513 if (TARGET_HARD_FLOAT_ABI
)
3519 if (TARGET_MAVERICK
)
3522 if (TARGET_IWMMXT_ABI
)
3529 /* Decide whether TYPE should be returned in memory (true)
3530 or in a register (false). FNTYPE is the type of the function making
3533 arm_return_in_memory (const_tree type
, const_tree fntype
)
3537 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
3539 if (TARGET_AAPCS_BASED
)
3541 /* Simple, non-aggregate types (ie not including vectors and
3542 complex) are always returned in a register (or registers).
3543 We don't care about which register here, so we can short-cut
3544 some of the detail. */
3545 if (!AGGREGATE_TYPE_P (type
)
3546 && TREE_CODE (type
) != VECTOR_TYPE
3547 && TREE_CODE (type
) != COMPLEX_TYPE
)
3550 /* Any return value that is no larger than one word can be
3552 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
3555 /* Check any available co-processors to see if they accept the
3556 type as a register candidate (VFP, for example, can return
3557 some aggregates in consecutive registers). These aren't
3558 available if the call is variadic. */
3559 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
3562 /* Vector values should be returned using ARM registers, not
3563 memory (unless they're over 16 bytes, which will break since
3564 we only have four call-clobbered registers to play with). */
3565 if (TREE_CODE (type
) == VECTOR_TYPE
)
3566 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3568 /* The rest go in memory. */
3572 if (TREE_CODE (type
) == VECTOR_TYPE
)
3573 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3575 if (!AGGREGATE_TYPE_P (type
) &&
3576 (TREE_CODE (type
) != VECTOR_TYPE
))
3577 /* All simple types are returned in registers. */
3580 if (arm_abi
!= ARM_ABI_APCS
)
3582 /* ATPCS and later return aggregate types in memory only if they are
3583 larger than a word (or are variable size). */
3584 return (size
< 0 || size
> UNITS_PER_WORD
);
3587 /* For the arm-wince targets we choose to be compatible with Microsoft's
3588 ARM and Thumb compilers, which always return aggregates in memory. */
3590 /* All structures/unions bigger than one word are returned in memory.
3591 Also catch the case where int_size_in_bytes returns -1. In this case
3592 the aggregate is either huge or of variable size, and in either case
3593 we will want to return it via memory and not in a register. */
3594 if (size
< 0 || size
> UNITS_PER_WORD
)
3597 if (TREE_CODE (type
) == RECORD_TYPE
)
3601 /* For a struct the APCS says that we only return in a register
3602 if the type is 'integer like' and every addressable element
3603 has an offset of zero. For practical purposes this means
3604 that the structure can have at most one non bit-field element
3605 and that this element must be the first one in the structure. */
3607 /* Find the first field, ignoring non FIELD_DECL things which will
3608 have been created by C++. */
3609 for (field
= TYPE_FIELDS (type
);
3610 field
&& TREE_CODE (field
) != FIELD_DECL
;
3611 field
= DECL_CHAIN (field
))
3615 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3617 /* Check that the first field is valid for returning in a register. */
3619 /* ... Floats are not allowed */
3620 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3623 /* ... Aggregates that are not themselves valid for returning in
3624 a register are not allowed. */
3625 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3628 /* Now check the remaining fields, if any. Only bitfields are allowed,
3629 since they are not addressable. */
3630 for (field
= DECL_CHAIN (field
);
3632 field
= DECL_CHAIN (field
))
3634 if (TREE_CODE (field
) != FIELD_DECL
)
3637 if (!DECL_BIT_FIELD_TYPE (field
))
3644 if (TREE_CODE (type
) == UNION_TYPE
)
3648 /* Unions can be returned in registers if every element is
3649 integral, or can be returned in an integer register. */
3650 for (field
= TYPE_FIELDS (type
);
3652 field
= DECL_CHAIN (field
))
3654 if (TREE_CODE (field
) != FIELD_DECL
)
3657 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3660 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3666 #endif /* not ARM_WINCE */
3668 /* Return all other types in memory. */
3672 /* Indicate whether or not words of a double are in big-endian order. */
3675 arm_float_words_big_endian (void)
3677 if (TARGET_MAVERICK
)
3680 /* For FPA, float words are always big-endian. For VFP, floats words
3681 follow the memory system mode. */
3689 return (TARGET_BIG_END
? 1 : 0);
3694 const struct pcs_attribute_arg
3698 } pcs_attribute_args
[] =
3700 {"aapcs", ARM_PCS_AAPCS
},
3701 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
3703 /* We could recognize these, but changes would be needed elsewhere
3704 * to implement them. */
3705 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
3706 {"atpcs", ARM_PCS_ATPCS
},
3707 {"apcs", ARM_PCS_APCS
},
3709 {NULL
, ARM_PCS_UNKNOWN
}
3713 arm_pcs_from_attribute (tree attr
)
3715 const struct pcs_attribute_arg
*ptr
;
3718 /* Get the value of the argument. */
3719 if (TREE_VALUE (attr
) == NULL_TREE
3720 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
3721 return ARM_PCS_UNKNOWN
;
3723 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
3725 /* Check it against the list of known arguments. */
3726 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3727 if (streq (arg
, ptr
->arg
))
3730 /* An unrecognized interrupt type. */
3731 return ARM_PCS_UNKNOWN
;
3734 /* Get the PCS variant to use for this call. TYPE is the function's type
3735 specification, DECL is the specific declartion. DECL may be null if
3736 the call could be indirect or if this is a library call. */
3738 arm_get_pcs_model (const_tree type
, const_tree decl
)
3740 bool user_convention
= false;
3741 enum arm_pcs user_pcs
= arm_pcs_default
;
3746 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
3749 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
3750 user_convention
= true;
3753 if (TARGET_AAPCS_BASED
)
3755 /* Detect varargs functions. These always use the base rules
3756 (no argument is ever a candidate for a co-processor
3758 bool base_rules
= stdarg_p (type
);
3760 if (user_convention
)
3762 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
3763 sorry ("Non-AAPCS derived PCS variant");
3764 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
3765 error ("Variadic functions must use the base AAPCS variant");
3769 return ARM_PCS_AAPCS
;
3770 else if (user_convention
)
3772 else if (decl
&& flag_unit_at_a_time
)
3774 /* Local functions never leak outside this compilation unit,
3775 so we are free to use whatever conventions are
3777 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3778 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3780 return ARM_PCS_AAPCS_LOCAL
;
3783 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
3784 sorry ("PCS variant");
3786 /* For everything else we use the target's default. */
3787 return arm_pcs_default
;
3792 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
3793 const_tree fntype ATTRIBUTE_UNUSED
,
3794 rtx libcall ATTRIBUTE_UNUSED
,
3795 const_tree fndecl ATTRIBUTE_UNUSED
)
3797 /* Record the unallocated VFP registers. */
3798 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
3799 pcum
->aapcs_vfp_reg_alloc
= 0;
3802 /* Walk down the type tree of TYPE counting consecutive base elements.
3803 If *MODEP is VOIDmode, then set it to the first valid floating point
3804 type. If a non-floating point type is found, or if a floating point
3805 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3806 otherwise return the count in the sub-tree. */
3808 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
3810 enum machine_mode mode
;
3813 switch (TREE_CODE (type
))
3816 mode
= TYPE_MODE (type
);
3817 if (mode
!= DFmode
&& mode
!= SFmode
)
3820 if (*modep
== VOIDmode
)
3829 mode
= TYPE_MODE (TREE_TYPE (type
));
3830 if (mode
!= DFmode
&& mode
!= SFmode
)
3833 if (*modep
== VOIDmode
)
3842 /* Use V2SImode and V4SImode as representatives of all 64-bit
3843 and 128-bit vector types, whether or not those modes are
3844 supported with the present options. */
3845 size
= int_size_in_bytes (type
);
3858 if (*modep
== VOIDmode
)
3861 /* Vector modes are considered to be opaque: two vectors are
3862 equivalent for the purposes of being homogeneous aggregates
3863 if they are the same size. */
3872 tree index
= TYPE_DOMAIN (type
);
3874 /* Can't handle incomplete types. */
3875 if (!COMPLETE_TYPE_P(type
))
3878 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
3881 || !TYPE_MAX_VALUE (index
)
3882 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
3883 || !TYPE_MIN_VALUE (index
)
3884 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
3888 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
3889 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
3891 /* There must be no padding. */
3892 if (!host_integerp (TYPE_SIZE (type
), 1)
3893 || (tree_low_cst (TYPE_SIZE (type
), 1)
3894 != count
* GET_MODE_BITSIZE (*modep
)))
3906 /* Can't handle incomplete types. */
3907 if (!COMPLETE_TYPE_P(type
))
3910 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3912 if (TREE_CODE (field
) != FIELD_DECL
)
3915 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
3921 /* There must be no padding. */
3922 if (!host_integerp (TYPE_SIZE (type
), 1)
3923 || (tree_low_cst (TYPE_SIZE (type
), 1)
3924 != count
* GET_MODE_BITSIZE (*modep
)))
3931 case QUAL_UNION_TYPE
:
3933 /* These aren't very interesting except in a degenerate case. */
3938 /* Can't handle incomplete types. */
3939 if (!COMPLETE_TYPE_P(type
))
3942 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3944 if (TREE_CODE (field
) != FIELD_DECL
)
3947 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
3950 count
= count
> sub_count
? count
: sub_count
;
3953 /* There must be no padding. */
3954 if (!host_integerp (TYPE_SIZE (type
), 1)
3955 || (tree_low_cst (TYPE_SIZE (type
), 1)
3956 != count
* GET_MODE_BITSIZE (*modep
)))
3969 /* Return true if PCS_VARIANT should use VFP registers. */
3971 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
3973 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
3975 static bool seen_thumb1_vfp
= false;
3977 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
3979 sorry ("Thumb-1 hard-float VFP ABI");
3980 /* sorry() is not immediately fatal, so only display this once. */
3981 seen_thumb1_vfp
= true;
3987 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
3990 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
3991 (TARGET_VFP_DOUBLE
|| !is_double
));
3995 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
3996 enum machine_mode mode
, const_tree type
,
3997 enum machine_mode
*base_mode
, int *count
)
3999 enum machine_mode new_mode
= VOIDmode
;
4001 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
4002 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
4003 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
4008 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
4011 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
4013 else if (type
&& (mode
== BLKmode
|| TREE_CODE (type
) == VECTOR_TYPE
))
4015 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
4017 if (ag_count
> 0 && ag_count
<= 4)
4026 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
4029 *base_mode
= new_mode
;
4034 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
4035 enum machine_mode mode
, const_tree type
)
4037 int count ATTRIBUTE_UNUSED
;
4038 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
4040 if (!use_vfp_abi (pcs_variant
, false))
4042 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4047 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4050 if (!use_vfp_abi (pcum
->pcs_variant
, false))
4053 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
4054 &pcum
->aapcs_vfp_rmode
,
4055 &pcum
->aapcs_vfp_rcount
);
4059 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4060 const_tree type ATTRIBUTE_UNUSED
)
4062 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
4063 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
4066 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
4067 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
4069 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
4070 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4073 int rcount
= pcum
->aapcs_vfp_rcount
;
4075 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
4079 /* Avoid using unsupported vector modes. */
4080 if (rmode
== V2SImode
)
4082 else if (rmode
== V4SImode
)
4089 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
4090 for (i
= 0; i
< rcount
; i
++)
4092 rtx tmp
= gen_rtx_REG (rmode
,
4093 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
4094 tmp
= gen_rtx_EXPR_LIST
4096 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
4097 XVECEXP (par
, 0, i
) = tmp
;
4100 pcum
->aapcs_reg
= par
;
4103 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
4110 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
4111 enum machine_mode mode
,
4112 const_tree type ATTRIBUTE_UNUSED
)
4114 if (!use_vfp_abi (pcs_variant
, false))
4117 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4120 enum machine_mode ag_mode
;
4125 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4130 if (ag_mode
== V2SImode
)
4132 else if (ag_mode
== V4SImode
)
4138 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
4139 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
4140 for (i
= 0; i
< count
; i
++)
4142 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
4143 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
4144 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
4145 XVECEXP (par
, 0, i
) = tmp
;
4151 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
4155 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4156 enum machine_mode mode ATTRIBUTE_UNUSED
,
4157 const_tree type ATTRIBUTE_UNUSED
)
4159 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
4160 pcum
->aapcs_vfp_reg_alloc
= 0;
4164 #define AAPCS_CP(X) \
4166 aapcs_ ## X ## _cum_init, \
4167 aapcs_ ## X ## _is_call_candidate, \
4168 aapcs_ ## X ## _allocate, \
4169 aapcs_ ## X ## _is_return_candidate, \
4170 aapcs_ ## X ## _allocate_return_reg, \
4171 aapcs_ ## X ## _advance \
4174 /* Table of co-processors that can be used to pass arguments in
4175 registers. Idealy no arugment should be a candidate for more than
4176 one co-processor table entry, but the table is processed in order
4177 and stops after the first match. If that entry then fails to put
4178 the argument into a co-processor register, the argument will go on
4182 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4183 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
4185 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4186 BLKmode) is a candidate for this co-processor's registers; this
4187 function should ignore any position-dependent state in
4188 CUMULATIVE_ARGS and only use call-type dependent information. */
4189 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4191 /* Return true if the argument does get a co-processor register; it
4192 should set aapcs_reg to an RTX of the register allocated as is
4193 required for a return from FUNCTION_ARG. */
4194 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4196 /* Return true if a result of mode MODE (or type TYPE if MODE is
4197 BLKmode) is can be returned in this co-processor's registers. */
4198 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4200 /* Allocate and return an RTX element to hold the return type of a
4201 call, this routine must not fail and will only be called if
4202 is_return_candidate returned true with the same parameters. */
4203 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4205 /* Finish processing this argument and prepare to start processing
4207 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4208 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
4216 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4221 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4222 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
4229 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
4231 /* We aren't passed a decl, so we can't check that a call is local.
4232 However, it isn't clear that that would be a win anyway, since it
4233 might limit some tail-calling opportunities. */
4234 enum arm_pcs pcs_variant
;
4238 const_tree fndecl
= NULL_TREE
;
4240 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4243 fntype
= TREE_TYPE (fntype
);
4246 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4249 pcs_variant
= arm_pcs_default
;
4251 if (pcs_variant
!= ARM_PCS_AAPCS
)
4255 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4256 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
4265 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
4268 /* We aren't passed a decl, so we can't check that a call is local.
4269 However, it isn't clear that that would be a win anyway, since it
4270 might limit some tail-calling opportunities. */
4271 enum arm_pcs pcs_variant
;
4272 int unsignedp ATTRIBUTE_UNUSED
;
4276 const_tree fndecl
= NULL_TREE
;
4278 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4281 fntype
= TREE_TYPE (fntype
);
4284 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4287 pcs_variant
= arm_pcs_default
;
4289 /* Promote integer types. */
4290 if (type
&& INTEGRAL_TYPE_P (type
))
4291 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
4293 if (pcs_variant
!= ARM_PCS_AAPCS
)
4297 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4298 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
4300 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
4304 /* Promotes small structs returned in a register to full-word size
4305 for big-endian AAPCS. */
4306 if (type
&& arm_return_in_msb (type
))
4308 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4309 if (size
% UNITS_PER_WORD
!= 0)
4311 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4312 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4316 return gen_rtx_REG (mode
, R0_REGNUM
);
4320 aapcs_libcall_value (enum machine_mode mode
)
4322 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
4325 /* Lay out a function argument using the AAPCS rules. The rule
4326 numbers referred to here are those in the AAPCS. */
4328 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4329 const_tree type
, bool named
)
4334 /* We only need to do this once per argument. */
4335 if (pcum
->aapcs_arg_processed
)
4338 pcum
->aapcs_arg_processed
= true;
4340 /* Special case: if named is false then we are handling an incoming
4341 anonymous argument which is on the stack. */
4345 /* Is this a potential co-processor register candidate? */
4346 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4348 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
4349 pcum
->aapcs_cprc_slot
= slot
;
4351 /* We don't have to apply any of the rules from part B of the
4352 preparation phase, these are handled elsewhere in the
4357 /* A Co-processor register candidate goes either in its own
4358 class of registers or on the stack. */
4359 if (!pcum
->aapcs_cprc_failed
[slot
])
4361 /* C1.cp - Try to allocate the argument to co-processor
4363 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
4366 /* C2.cp - Put the argument on the stack and note that we
4367 can't assign any more candidates in this slot. We also
4368 need to note that we have allocated stack space, so that
4369 we won't later try to split a non-cprc candidate between
4370 core registers and the stack. */
4371 pcum
->aapcs_cprc_failed
[slot
] = true;
4372 pcum
->can_split
= false;
4375 /* We didn't get a register, so this argument goes on the
4377 gcc_assert (pcum
->can_split
== false);
4382 /* C3 - For double-word aligned arguments, round the NCRN up to the
4383 next even number. */
4384 ncrn
= pcum
->aapcs_ncrn
;
4385 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
4388 nregs
= ARM_NUM_REGS2(mode
, type
);
4390 /* Sigh, this test should really assert that nregs > 0, but a GCC
4391 extension allows empty structs and then gives them empty size; it
4392 then allows such a structure to be passed by value. For some of
4393 the code below we have to pretend that such an argument has
4394 non-zero size so that we 'locate' it correctly either in
4395 registers or on the stack. */
4396 gcc_assert (nregs
>= 0);
4398 nregs2
= nregs
? nregs
: 1;
4400 /* C4 - Argument fits entirely in core registers. */
4401 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
4403 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4404 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
4408 /* C5 - Some core registers left and there are no arguments already
4409 on the stack: split this argument between the remaining core
4410 registers and the stack. */
4411 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
4413 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4414 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4415 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
4419 /* C6 - NCRN is set to 4. */
4420 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4422 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4426 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4427 for a call to a function whose data type is FNTYPE.
4428 For a library call, FNTYPE is NULL. */
4430 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
4432 tree fndecl ATTRIBUTE_UNUSED
)
4434 /* Long call handling. */
4436 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4438 pcum
->pcs_variant
= arm_pcs_default
;
4440 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4442 if (arm_libcall_uses_aapcs_base (libname
))
4443 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
4445 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
4446 pcum
->aapcs_reg
= NULL_RTX
;
4447 pcum
->aapcs_partial
= 0;
4448 pcum
->aapcs_arg_processed
= false;
4449 pcum
->aapcs_cprc_slot
= -1;
4450 pcum
->can_split
= true;
4452 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4456 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4458 pcum
->aapcs_cprc_failed
[i
] = false;
4459 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
4467 /* On the ARM, the offset starts at 0. */
4469 pcum
->iwmmxt_nregs
= 0;
4470 pcum
->can_split
= true;
4472 /* Varargs vectors are treated the same as long long.
4473 named_count avoids having to change the way arm handles 'named' */
4474 pcum
->named_count
= 0;
4477 if (TARGET_REALLY_IWMMXT
&& fntype
)
4481 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
4483 fn_arg
= TREE_CHAIN (fn_arg
))
4484 pcum
->named_count
+= 1;
4486 if (! pcum
->named_count
)
4487 pcum
->named_count
= INT_MAX
;
4492 /* Return true if mode/type need doubleword alignment. */
4494 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
4496 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
4497 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
4501 /* Determine where to put an argument to a function.
4502 Value is zero to push the argument on the stack,
4503 or a hard register in which to store the argument.
4505 MODE is the argument's machine mode.
4506 TYPE is the data type of the argument (as a tree).
4507 This is null for libcalls where that information may
4509 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4510 the preceding args and about the function being called.
4511 NAMED is nonzero if this argument is a named parameter
4512 (otherwise it is an extra parameter matching an ellipsis).
4514 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4515 other arguments are passed on the stack. If (NAMED == 0) (which happens
4516 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4517 defined), say it is passed in the stack (function_prologue will
4518 indeed make it pass in the stack if necessary). */
4521 arm_function_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4522 const_tree type
, bool named
)
4526 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4527 a call insn (op3 of a call_value insn). */
4528 if (mode
== VOIDmode
)
4531 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4533 aapcs_layout_arg (pcum
, mode
, type
, named
);
4534 return pcum
->aapcs_reg
;
4537 /* Varargs vectors are treated the same as long long.
4538 named_count avoids having to change the way arm handles 'named' */
4539 if (TARGET_IWMMXT_ABI
4540 && arm_vector_mode_supported_p (mode
)
4541 && pcum
->named_count
> pcum
->nargs
+ 1)
4543 if (pcum
->iwmmxt_nregs
<= 9)
4544 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
4547 pcum
->can_split
= false;
4552 /* Put doubleword aligned quantities in even register pairs. */
4554 && ARM_DOUBLEWORD_ALIGN
4555 && arm_needs_doubleword_align (mode
, type
))
4558 /* Only allow splitting an arg between regs and memory if all preceding
4559 args were allocated to regs. For args passed by reference we only count
4560 the reference pointer. */
4561 if (pcum
->can_split
)
4564 nregs
= ARM_NUM_REGS2 (mode
, type
);
4566 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
4569 return gen_rtx_REG (mode
, pcum
->nregs
);
4573 arm_arg_partial_bytes (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4574 tree type
, bool named
)
4576 int nregs
= pcum
->nregs
;
4578 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4580 aapcs_layout_arg (pcum
, mode
, type
, named
);
4581 return pcum
->aapcs_partial
;
4584 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
4587 if (NUM_ARG_REGS
> nregs
4588 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
4590 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
4595 /* Update the data in PCUM to advance over an argument
4596 of mode MODE and data type TYPE.
4597 (TYPE is null for libcalls where that information may not be available.) */
4600 arm_function_arg_advance (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4601 const_tree type
, bool named
)
4603 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4605 aapcs_layout_arg (pcum
, mode
, type
, named
);
4607 if (pcum
->aapcs_cprc_slot
>= 0)
4609 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
4611 pcum
->aapcs_cprc_slot
= -1;
4614 /* Generic stuff. */
4615 pcum
->aapcs_arg_processed
= false;
4616 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
4617 pcum
->aapcs_reg
= NULL_RTX
;
4618 pcum
->aapcs_partial
= 0;
4623 if (arm_vector_mode_supported_p (mode
)
4624 && pcum
->named_count
> pcum
->nargs
4625 && TARGET_IWMMXT_ABI
)
4626 pcum
->iwmmxt_nregs
+= 1;
4628 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
4632 /* Variable sized types are passed by reference. This is a GCC
4633 extension to the ARM ABI. */
4636 arm_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4637 enum machine_mode mode ATTRIBUTE_UNUSED
,
4638 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4640 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4643 /* Encode the current state of the #pragma [no_]long_calls. */
4646 OFF
, /* No #pragma [no_]long_calls is in effect. */
4647 LONG
, /* #pragma long_calls is in effect. */
4648 SHORT
/* #pragma no_long_calls is in effect. */
4651 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
4654 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4656 arm_pragma_long_calls
= LONG
;
4660 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4662 arm_pragma_long_calls
= SHORT
;
4666 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4668 arm_pragma_long_calls
= OFF
;
4671 /* Handle an attribute requiring a FUNCTION_DECL;
4672 arguments as in struct attribute_spec.handler. */
4674 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
4675 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4677 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4679 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4681 *no_add_attrs
= true;
4687 /* Handle an "interrupt" or "isr" attribute;
4688 arguments as in struct attribute_spec.handler. */
4690 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
4695 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4697 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4699 *no_add_attrs
= true;
4701 /* FIXME: the argument if any is checked for type attributes;
4702 should it be checked for decl ones? */
4706 if (TREE_CODE (*node
) == FUNCTION_TYPE
4707 || TREE_CODE (*node
) == METHOD_TYPE
)
4709 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
4711 warning (OPT_Wattributes
, "%qE attribute ignored",
4713 *no_add_attrs
= true;
4716 else if (TREE_CODE (*node
) == POINTER_TYPE
4717 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
4718 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
4719 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
4721 *node
= build_variant_type_copy (*node
);
4722 TREE_TYPE (*node
) = build_type_attribute_variant
4724 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
4725 *no_add_attrs
= true;
4729 /* Possibly pass this attribute on from the type to a decl. */
4730 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
4731 | (int) ATTR_FLAG_FUNCTION_NEXT
4732 | (int) ATTR_FLAG_ARRAY_NEXT
))
4734 *no_add_attrs
= true;
4735 return tree_cons (name
, args
, NULL_TREE
);
4739 warning (OPT_Wattributes
, "%qE attribute ignored",
4748 /* Handle a "pcs" attribute; arguments as in struct
4749 attribute_spec.handler. */
4751 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
4752 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4754 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
4756 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
4757 *no_add_attrs
= true;
4762 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4763 /* Handle the "notshared" attribute. This attribute is another way of
4764 requesting hidden visibility. ARM's compiler supports
4765 "__declspec(notshared)"; we support the same thing via an
4769 arm_handle_notshared_attribute (tree
*node
,
4770 tree name ATTRIBUTE_UNUSED
,
4771 tree args ATTRIBUTE_UNUSED
,
4772 int flags ATTRIBUTE_UNUSED
,
4775 tree decl
= TYPE_NAME (*node
);
4779 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
4780 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
4781 *no_add_attrs
= false;
4787 /* Return 0 if the attributes for two types are incompatible, 1 if they
4788 are compatible, and 2 if they are nearly compatible (which causes a
4789 warning to be generated). */
4791 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
4795 /* Check for mismatch of non-default calling convention. */
4796 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
4799 /* Check for mismatched call attributes. */
4800 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4801 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4802 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4803 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4805 /* Only bother to check if an attribute is defined. */
4806 if (l1
| l2
| s1
| s2
)
4808 /* If one type has an attribute, the other must have the same attribute. */
4809 if ((l1
!= l2
) || (s1
!= s2
))
4812 /* Disallow mixed attributes. */
4813 if ((l1
& s2
) || (l2
& s1
))
4817 /* Check for mismatched ISR attribute. */
4818 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
4820 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
4821 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
4823 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
4830 /* Assigns default attributes to newly defined type. This is used to
4831 set short_call/long_call attributes for function types of
4832 functions defined inside corresponding #pragma scopes. */
4834 arm_set_default_type_attributes (tree type
)
4836 /* Add __attribute__ ((long_call)) to all functions, when
4837 inside #pragma long_calls or __attribute__ ((short_call)),
4838 when inside #pragma no_long_calls. */
4839 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
4841 tree type_attr_list
, attr_name
;
4842 type_attr_list
= TYPE_ATTRIBUTES (type
);
4844 if (arm_pragma_long_calls
== LONG
)
4845 attr_name
= get_identifier ("long_call");
4846 else if (arm_pragma_long_calls
== SHORT
)
4847 attr_name
= get_identifier ("short_call");
4851 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
4852 TYPE_ATTRIBUTES (type
) = type_attr_list
;
4856 /* Return true if DECL is known to be linked into section SECTION. */
4859 arm_function_in_section_p (tree decl
, section
*section
)
4861 /* We can only be certain about functions defined in the same
4862 compilation unit. */
4863 if (!TREE_STATIC (decl
))
4866 /* Make sure that SYMBOL always binds to the definition in this
4867 compilation unit. */
4868 if (!targetm
.binds_local_p (decl
))
4871 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4872 if (!DECL_SECTION_NAME (decl
))
4874 /* Make sure that we will not create a unique section for DECL. */
4875 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
4879 return function_section (decl
) == section
;
4882 /* Return nonzero if a 32-bit "long_call" should be generated for
4883 a call from the current function to DECL. We generate a long_call
4886 a. has an __attribute__((long call))
4887 or b. is within the scope of a #pragma long_calls
4888 or c. the -mlong-calls command line switch has been specified
4890 However we do not generate a long call if the function:
4892 d. has an __attribute__ ((short_call))
4893 or e. is inside the scope of a #pragma no_long_calls
4894 or f. is defined in the same section as the current function. */
4897 arm_is_long_call_p (tree decl
)
4902 return TARGET_LONG_CALLS
;
4904 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
4905 if (lookup_attribute ("short_call", attrs
))
4908 /* For "f", be conservative, and only cater for cases in which the
4909 whole of the current function is placed in the same section. */
4910 if (!flag_reorder_blocks_and_partition
4911 && TREE_CODE (decl
) == FUNCTION_DECL
4912 && arm_function_in_section_p (decl
, current_function_section ()))
4915 if (lookup_attribute ("long_call", attrs
))
4918 return TARGET_LONG_CALLS
;
4921 /* Return nonzero if it is ok to make a tail-call to DECL. */
4923 arm_function_ok_for_sibcall (tree decl
, tree exp
)
4925 unsigned long func_type
;
4927 if (cfun
->machine
->sibcall_blocked
)
4930 /* Never tailcall something for which we have no decl, or if we
4931 are generating code for Thumb-1. */
4932 if (decl
== NULL
|| TARGET_THUMB1
)
4935 /* The PIC register is live on entry to VxWorks PLT entries, so we
4936 must make the call before restoring the PIC register. */
4937 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
4940 /* Cannot tail-call to long calls, since these are out of range of
4941 a branch instruction. */
4942 if (arm_is_long_call_p (decl
))
4945 /* If we are interworking and the function is not declared static
4946 then we can't tail-call it unless we know that it exists in this
4947 compilation unit (since it might be a Thumb routine). */
4948 if (TARGET_INTERWORK
&& TREE_PUBLIC (decl
) && !TREE_ASM_WRITTEN (decl
))
4951 func_type
= arm_current_func_type ();
4952 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4953 if (IS_INTERRUPT (func_type
))
4956 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4958 /* Check that the return value locations are the same. For
4959 example that we aren't returning a value from the sibling in
4960 a VFP register but then need to transfer it to a core
4964 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
4965 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4967 if (!rtx_equal_p (a
, b
))
4971 /* Never tailcall if function may be called with a misaligned SP. */
4972 if (IS_STACKALIGN (func_type
))
4975 /* Everything else is ok. */
4980 /* Addressing mode support functions. */
4982 /* Return nonzero if X is a legitimate immediate operand when compiling
4983 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4985 legitimate_pic_operand_p (rtx x
)
4987 if (GET_CODE (x
) == SYMBOL_REF
4988 || (GET_CODE (x
) == CONST
4989 && GET_CODE (XEXP (x
, 0)) == PLUS
4990 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
4996 /* Record that the current function needs a PIC register. Initialize
4997 cfun->machine->pic_reg if we have not already done so. */
5000 require_pic_register (void)
5002 /* A lot of the logic here is made obscure by the fact that this
5003 routine gets called as part of the rtx cost estimation process.
5004 We don't want those calls to affect any assumptions about the real
5005 function; and further, we can't call entry_of_function() until we
5006 start the real expansion process. */
5007 if (!crtl
->uses_pic_offset_table
)
5009 gcc_assert (can_create_pseudo_p ());
5010 if (arm_pic_register
!= INVALID_REGNUM
)
5012 if (!cfun
->machine
->pic_reg
)
5013 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
5015 /* Play games to avoid marking the function as needing pic
5016 if we are being called as part of the cost-estimation
5018 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5019 crtl
->uses_pic_offset_table
= 1;
5025 if (!cfun
->machine
->pic_reg
)
5026 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
5028 /* Play games to avoid marking the function as needing pic
5029 if we are being called as part of the cost-estimation
5031 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5033 crtl
->uses_pic_offset_table
= 1;
5036 arm_load_pic_register (0UL);
5040 /* We can be called during expansion of PHI nodes, where
5041 we can't yet emit instructions directly in the final
5042 insn stream. Queue the insns on the entry edge, they will
5043 be committed after everything else is expanded. */
5044 insert_insn_on_edge (seq
, single_succ_edge (ENTRY_BLOCK_PTR
));
5051 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
5053 if (GET_CODE (orig
) == SYMBOL_REF
5054 || GET_CODE (orig
) == LABEL_REF
)
5060 gcc_assert (can_create_pseudo_p ());
5061 reg
= gen_reg_rtx (Pmode
);
5064 /* VxWorks does not impose a fixed gap between segments; the run-time
5065 gap can be different from the object-file gap. We therefore can't
5066 use GOTOFF unless we are absolutely sure that the symbol is in the
5067 same segment as the GOT. Unfortunately, the flexibility of linker
5068 scripts means that we can't be sure of that in general, so assume
5069 that GOTOFF is never valid on VxWorks. */
5070 if ((GET_CODE (orig
) == LABEL_REF
5071 || (GET_CODE (orig
) == SYMBOL_REF
&&
5072 SYMBOL_REF_LOCAL_P (orig
)))
5074 && !TARGET_VXWORKS_RTP
)
5075 insn
= arm_pic_static_addr (orig
, reg
);
5081 /* If this function doesn't have a pic register, create one now. */
5082 require_pic_register ();
5084 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
5086 /* Make the MEM as close to a constant as possible. */
5087 mem
= SET_SRC (pat
);
5088 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
5089 MEM_READONLY_P (mem
) = 1;
5090 MEM_NOTRAP_P (mem
) = 1;
5092 insn
= emit_insn (pat
);
5095 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5097 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
5101 else if (GET_CODE (orig
) == CONST
)
5105 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5106 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
5109 /* Handle the case where we have: const (UNSPEC_TLS). */
5110 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
5111 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
5114 /* Handle the case where we have:
5115 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5117 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5118 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
5119 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
5121 gcc_assert (GET_CODE (XEXP (XEXP (orig
, 0), 1)) == CONST_INT
);
5127 gcc_assert (can_create_pseudo_p ());
5128 reg
= gen_reg_rtx (Pmode
);
5131 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
5133 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
5134 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
5135 base
== reg
? 0 : reg
);
5137 if (GET_CODE (offset
) == CONST_INT
)
5139 /* The base register doesn't really matter, we only want to
5140 test the index for the appropriate mode. */
5141 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
5143 gcc_assert (can_create_pseudo_p ());
5144 offset
= force_reg (Pmode
, offset
);
5147 if (GET_CODE (offset
) == CONST_INT
)
5148 return plus_constant (base
, INTVAL (offset
));
5151 if (GET_MODE_SIZE (mode
) > 4
5152 && (GET_MODE_CLASS (mode
) == MODE_INT
5153 || TARGET_SOFT_FLOAT
))
5155 emit_insn (gen_addsi3 (reg
, base
, offset
));
5159 return gen_rtx_PLUS (Pmode
, base
, offset
);
5166 /* Find a spare register to use during the prolog of a function. */
5169 thumb_find_work_register (unsigned long pushed_regs_mask
)
5173 /* Check the argument registers first as these are call-used. The
5174 register allocation order means that sometimes r3 might be used
5175 but earlier argument registers might not, so check them all. */
5176 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
5177 if (!df_regs_ever_live_p (reg
))
5180 /* Before going on to check the call-saved registers we can try a couple
5181 more ways of deducing that r3 is available. The first is when we are
5182 pushing anonymous arguments onto the stack and we have less than 4
5183 registers worth of fixed arguments(*). In this case r3 will be part of
5184 the variable argument list and so we can be sure that it will be
5185 pushed right at the start of the function. Hence it will be available
5186 for the rest of the prologue.
5187 (*): ie crtl->args.pretend_args_size is greater than 0. */
5188 if (cfun
->machine
->uses_anonymous_args
5189 && crtl
->args
.pretend_args_size
> 0)
5190 return LAST_ARG_REGNUM
;
5192 /* The other case is when we have fixed arguments but less than 4 registers
5193 worth. In this case r3 might be used in the body of the function, but
5194 it is not being used to convey an argument into the function. In theory
5195 we could just check crtl->args.size to see how many bytes are
5196 being passed in argument registers, but it seems that it is unreliable.
5197 Sometimes it will have the value 0 when in fact arguments are being
5198 passed. (See testcase execute/20021111-1.c for an example). So we also
5199 check the args_info.nregs field as well. The problem with this field is
5200 that it makes no allowances for arguments that are passed to the
5201 function but which are not used. Hence we could miss an opportunity
5202 when a function has an unused argument in r3. But it is better to be
5203 safe than to be sorry. */
5204 if (! cfun
->machine
->uses_anonymous_args
5205 && crtl
->args
.size
>= 0
5206 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
5207 && crtl
->args
.info
.nregs
< 4)
5208 return LAST_ARG_REGNUM
;
5210 /* Otherwise look for a call-saved register that is going to be pushed. */
5211 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
5212 if (pushed_regs_mask
& (1 << reg
))
5217 /* Thumb-2 can use high regs. */
5218 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
5219 if (pushed_regs_mask
& (1 << reg
))
5222 /* Something went wrong - thumb_compute_save_reg_mask()
5223 should have arranged for a suitable register to be pushed. */
5227 static GTY(()) int pic_labelno
;
5229 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5233 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
5235 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
5237 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
5240 gcc_assert (flag_pic
);
5242 pic_reg
= cfun
->machine
->pic_reg
;
5243 if (TARGET_VXWORKS_RTP
)
5245 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
5246 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5247 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5249 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
5251 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5252 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
5256 /* We use an UNSPEC rather than a LABEL_REF because this label
5257 never appears in the code stream. */
5259 labelno
= GEN_INT (pic_labelno
++);
5260 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5261 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5263 /* On the ARM the PC register contains 'dot + 8' at the time of the
5264 addition, on the Thumb it is 'dot + 4'. */
5265 pic_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5266 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
5268 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5272 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5274 emit_insn (gen_pic_add_dot_plus_eight (pic_reg
, pic_reg
, labelno
));
5276 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5278 else /* TARGET_THUMB1 */
5280 if (arm_pic_register
!= INVALID_REGNUM
5281 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
5283 /* We will have pushed the pic register, so we should always be
5284 able to find a work register. */
5285 pic_tmp
= gen_rtx_REG (SImode
,
5286 thumb_find_work_register (saved_regs
));
5287 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
5288 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
5291 emit_insn (gen_pic_load_addr_thumb1 (pic_reg
, pic_rtx
));
5292 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5296 /* Need to emit this whether or not we obey regdecls,
5297 since setjmp/longjmp can cause life info to screw up. */
5301 /* Generate code to load the address of a static var when flag_pic is set. */
5303 arm_pic_static_addr (rtx orig
, rtx reg
)
5305 rtx l1
, labelno
, offset_rtx
, insn
;
5307 gcc_assert (flag_pic
);
5309 /* We use an UNSPEC rather than a LABEL_REF because this label
5310 never appears in the code stream. */
5311 labelno
= GEN_INT (pic_labelno
++);
5312 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5313 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5315 /* On the ARM the PC register contains 'dot + 8' at the time of the
5316 addition, on the Thumb it is 'dot + 4'. */
5317 offset_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5318 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
5319 UNSPEC_SYMBOL_OFFSET
);
5320 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
5324 emit_insn (gen_pic_load_addr_32bit (reg
, offset_rtx
));
5326 insn
= emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5328 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5330 else /* TARGET_THUMB1 */
5332 emit_insn (gen_pic_load_addr_thumb1 (reg
, offset_rtx
));
5333 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5339 /* Return nonzero if X is valid as an ARM state addressing register. */
5341 arm_address_register_rtx_p (rtx x
, int strict_p
)
5345 if (GET_CODE (x
) != REG
)
5351 return ARM_REGNO_OK_FOR_BASE_P (regno
);
5353 return (regno
<= LAST_ARM_REGNUM
5354 || regno
>= FIRST_PSEUDO_REGISTER
5355 || regno
== FRAME_POINTER_REGNUM
5356 || regno
== ARG_POINTER_REGNUM
);
5359 /* Return TRUE if this rtx is the difference of a symbol and a label,
5360 and will reduce to a PC-relative relocation in the object file.
5361 Expressions like this can be left alone when generating PIC, rather
5362 than forced through the GOT. */
5364 pcrel_constant_p (rtx x
)
5366 if (GET_CODE (x
) == MINUS
)
5367 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
5372 /* Return true if X will surely end up in an index register after next
5375 will_be_in_index_register (const_rtx x
)
5377 /* arm.md: calculate_pic_address will split this into a register. */
5378 return GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_SYM
;
5381 /* Return nonzero if X is a valid ARM state address operand. */
5383 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
5387 enum rtx_code code
= GET_CODE (x
);
5389 if (arm_address_register_rtx_p (x
, strict_p
))
5392 use_ldrd
= (TARGET_LDRD
5394 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5396 if (code
== POST_INC
|| code
== PRE_DEC
5397 || ((code
== PRE_INC
|| code
== POST_DEC
)
5398 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5399 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5401 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5402 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5403 && GET_CODE (XEXP (x
, 1)) == PLUS
5404 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5406 rtx addend
= XEXP (XEXP (x
, 1), 1);
5408 /* Don't allow ldrd post increment by register because it's hard
5409 to fixup invalid register choices. */
5411 && GET_CODE (x
) == POST_MODIFY
5412 && GET_CODE (addend
) == REG
)
5415 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
5416 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
5419 /* After reload constants split into minipools will have addresses
5420 from a LABEL_REF. */
5421 else if (reload_completed
5422 && (code
== LABEL_REF
5424 && GET_CODE (XEXP (x
, 0)) == PLUS
5425 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5426 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5429 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5432 else if (code
== PLUS
)
5434 rtx xop0
= XEXP (x
, 0);
5435 rtx xop1
= XEXP (x
, 1);
5437 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5438 && ((GET_CODE(xop1
) == CONST_INT
5439 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
5440 || (!strict_p
&& will_be_in_index_register (xop1
))))
5441 || (arm_address_register_rtx_p (xop1
, strict_p
)
5442 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
5446 /* Reload currently can't handle MINUS, so disable this for now */
5447 else if (GET_CODE (x
) == MINUS
)
5449 rtx xop0
= XEXP (x
, 0);
5450 rtx xop1
= XEXP (x
, 1);
5452 return (arm_address_register_rtx_p (xop0
, strict_p
)
5453 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
5457 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5458 && code
== SYMBOL_REF
5459 && CONSTANT_POOL_ADDRESS_P (x
)
5461 && symbol_mentioned_p (get_pool_constant (x
))
5462 && ! pcrel_constant_p (get_pool_constant (x
))))
5468 /* Return nonzero if X is a valid Thumb-2 address operand. */
5470 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5473 enum rtx_code code
= GET_CODE (x
);
5475 if (arm_address_register_rtx_p (x
, strict_p
))
5478 use_ldrd
= (TARGET_LDRD
5480 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5482 if (code
== POST_INC
|| code
== PRE_DEC
5483 || ((code
== PRE_INC
|| code
== POST_DEC
)
5484 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5485 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5487 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5488 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5489 && GET_CODE (XEXP (x
, 1)) == PLUS
5490 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5492 /* Thumb-2 only has autoincrement by constant. */
5493 rtx addend
= XEXP (XEXP (x
, 1), 1);
5494 HOST_WIDE_INT offset
;
5496 if (GET_CODE (addend
) != CONST_INT
)
5499 offset
= INTVAL(addend
);
5500 if (GET_MODE_SIZE (mode
) <= 4)
5501 return (offset
> -256 && offset
< 256);
5503 return (use_ldrd
&& offset
> -1024 && offset
< 1024
5504 && (offset
& 3) == 0);
5507 /* After reload constants split into minipools will have addresses
5508 from a LABEL_REF. */
5509 else if (reload_completed
5510 && (code
== LABEL_REF
5512 && GET_CODE (XEXP (x
, 0)) == PLUS
5513 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5514 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5517 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5520 else if (code
== PLUS
)
5522 rtx xop0
= XEXP (x
, 0);
5523 rtx xop1
= XEXP (x
, 1);
5525 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5526 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
5527 || (!strict_p
&& will_be_in_index_register (xop1
))))
5528 || (arm_address_register_rtx_p (xop1
, strict_p
)
5529 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
5532 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5533 && code
== SYMBOL_REF
5534 && CONSTANT_POOL_ADDRESS_P (x
)
5536 && symbol_mentioned_p (get_pool_constant (x
))
5537 && ! pcrel_constant_p (get_pool_constant (x
))))
5543 /* Return nonzero if INDEX is valid for an address index operand in
5546 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
5549 HOST_WIDE_INT range
;
5550 enum rtx_code code
= GET_CODE (index
);
5552 /* Standard coprocessor addressing modes. */
5553 if (TARGET_HARD_FLOAT
5554 && (TARGET_FPA
|| TARGET_MAVERICK
)
5555 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
5556 || (TARGET_MAVERICK
&& mode
== DImode
)))
5557 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5558 && INTVAL (index
) > -1024
5559 && (INTVAL (index
) & 3) == 0);
5562 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
5563 return (code
== CONST_INT
5564 && INTVAL (index
) < 1016
5565 && INTVAL (index
) > -1024
5566 && (INTVAL (index
) & 3) == 0);
5568 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5569 return (code
== CONST_INT
5570 && INTVAL (index
) < 1024
5571 && INTVAL (index
) > -1024
5572 && (INTVAL (index
) & 3) == 0);
5574 if (arm_address_register_rtx_p (index
, strict_p
)
5575 && (GET_MODE_SIZE (mode
) <= 4))
5578 if (mode
== DImode
|| mode
== DFmode
)
5580 if (code
== CONST_INT
)
5582 HOST_WIDE_INT val
= INTVAL (index
);
5585 return val
> -256 && val
< 256;
5587 return val
> -4096 && val
< 4092;
5590 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
5593 if (GET_MODE_SIZE (mode
) <= 4
5597 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
5601 rtx xiop0
= XEXP (index
, 0);
5602 rtx xiop1
= XEXP (index
, 1);
5604 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5605 && power_of_two_operand (xiop1
, SImode
))
5606 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5607 && power_of_two_operand (xiop0
, SImode
)));
5609 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
5610 || code
== ASHIFT
|| code
== ROTATERT
)
5612 rtx op
= XEXP (index
, 1);
5614 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5615 && GET_CODE (op
) == CONST_INT
5617 && INTVAL (op
) <= 31);
5621 /* For ARM v4 we may be doing a sign-extend operation during the
5627 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
5633 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
5635 return (code
== CONST_INT
5636 && INTVAL (index
) < range
5637 && INTVAL (index
) > -range
);
5640 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5641 index operand. i.e. 1, 2, 4 or 8. */
5643 thumb2_index_mul_operand (rtx op
)
5647 if (GET_CODE(op
) != CONST_INT
)
5651 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
5654 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5656 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
5658 enum rtx_code code
= GET_CODE (index
);
5660 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5661 /* Standard coprocessor addressing modes. */
5662 if (TARGET_HARD_FLOAT
5663 && (TARGET_FPA
|| TARGET_MAVERICK
)
5664 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
5665 || (TARGET_MAVERICK
&& mode
== DImode
)))
5666 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5667 && INTVAL (index
) > -1024
5668 && (INTVAL (index
) & 3) == 0);
5670 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5672 /* For DImode assume values will usually live in core regs
5673 and only allow LDRD addressing modes. */
5674 if (!TARGET_LDRD
|| mode
!= DImode
)
5675 return (code
== CONST_INT
5676 && INTVAL (index
) < 1024
5677 && INTVAL (index
) > -1024
5678 && (INTVAL (index
) & 3) == 0);
5682 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
5683 return (code
== CONST_INT
5684 && INTVAL (index
) < 1016
5685 && INTVAL (index
) > -1024
5686 && (INTVAL (index
) & 3) == 0);
5688 if (arm_address_register_rtx_p (index
, strict_p
)
5689 && (GET_MODE_SIZE (mode
) <= 4))
5692 if (mode
== DImode
|| mode
== DFmode
)
5694 if (code
== CONST_INT
)
5696 HOST_WIDE_INT val
= INTVAL (index
);
5697 /* ??? Can we assume ldrd for thumb2? */
5698 /* Thumb-2 ldrd only has reg+const addressing modes. */
5699 /* ldrd supports offsets of +-1020.
5700 However the ldr fallback does not. */
5701 return val
> -256 && val
< 256 && (val
& 3) == 0;
5709 rtx xiop0
= XEXP (index
, 0);
5710 rtx xiop1
= XEXP (index
, 1);
5712 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5713 && thumb2_index_mul_operand (xiop1
))
5714 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5715 && thumb2_index_mul_operand (xiop0
)));
5717 else if (code
== ASHIFT
)
5719 rtx op
= XEXP (index
, 1);
5721 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5722 && GET_CODE (op
) == CONST_INT
5724 && INTVAL (op
) <= 3);
5727 return (code
== CONST_INT
5728 && INTVAL (index
) < 4096
5729 && INTVAL (index
) > -256);
5732 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5734 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
5738 if (GET_CODE (x
) != REG
)
5744 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
5746 return (regno
<= LAST_LO_REGNUM
5747 || regno
> LAST_VIRTUAL_REGISTER
5748 || regno
== FRAME_POINTER_REGNUM
5749 || (GET_MODE_SIZE (mode
) >= 4
5750 && (regno
== STACK_POINTER_REGNUM
5751 || regno
>= FIRST_PSEUDO_REGISTER
5752 || x
== hard_frame_pointer_rtx
5753 || x
== arg_pointer_rtx
)));
5756 /* Return nonzero if x is a legitimate index register. This is the case
5757 for any base register that can access a QImode object. */
5759 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
5761 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
5764 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5766 The AP may be eliminated to either the SP or the FP, so we use the
5767 least common denominator, e.g. SImode, and offsets from 0 to 64.
5769 ??? Verify whether the above is the right approach.
5771 ??? Also, the FP may be eliminated to the SP, so perhaps that
5772 needs special handling also.
5774 ??? Look at how the mips16 port solves this problem. It probably uses
5775 better ways to solve some of these problems.
5777 Although it is not incorrect, we don't accept QImode and HImode
5778 addresses based on the frame pointer or arg pointer until the
5779 reload pass starts. This is so that eliminating such addresses
5780 into stack based ones won't produce impossible code. */
5782 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5784 /* ??? Not clear if this is right. Experiment. */
5785 if (GET_MODE_SIZE (mode
) < 4
5786 && !(reload_in_progress
|| reload_completed
)
5787 && (reg_mentioned_p (frame_pointer_rtx
, x
)
5788 || reg_mentioned_p (arg_pointer_rtx
, x
)
5789 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
5790 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
5791 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
5792 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
5795 /* Accept any base register. SP only in SImode or larger. */
5796 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
5799 /* This is PC relative data before arm_reorg runs. */
5800 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
5801 && GET_CODE (x
) == SYMBOL_REF
5802 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
5805 /* This is PC relative data after arm_reorg runs. */
5806 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
5808 && (GET_CODE (x
) == LABEL_REF
5809 || (GET_CODE (x
) == CONST
5810 && GET_CODE (XEXP (x
, 0)) == PLUS
5811 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5812 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5815 /* Post-inc indexing only supported for SImode and larger. */
5816 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
5817 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
5820 else if (GET_CODE (x
) == PLUS
)
5822 /* REG+REG address can be any two index registers. */
5823 /* We disallow FRAME+REG addressing since we know that FRAME
5824 will be replaced with STACK, and SP relative addressing only
5825 permits SP+OFFSET. */
5826 if (GET_MODE_SIZE (mode
) <= 4
5827 && XEXP (x
, 0) != frame_pointer_rtx
5828 && XEXP (x
, 1) != frame_pointer_rtx
5829 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5830 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
5831 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
5834 /* REG+const has 5-7 bit offset for non-SP registers. */
5835 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5836 || XEXP (x
, 0) == arg_pointer_rtx
)
5837 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5838 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
5841 /* REG+const has 10-bit offset for SP, but only SImode and
5842 larger is supported. */
5843 /* ??? Should probably check for DI/DFmode overflow here
5844 just like GO_IF_LEGITIMATE_OFFSET does. */
5845 else if (GET_CODE (XEXP (x
, 0)) == REG
5846 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
5847 && GET_MODE_SIZE (mode
) >= 4
5848 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5849 && INTVAL (XEXP (x
, 1)) >= 0
5850 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
5851 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5854 else if (GET_CODE (XEXP (x
, 0)) == REG
5855 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
5856 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
5857 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
5858 && REGNO (XEXP (x
, 0)) <= LAST_VIRTUAL_REGISTER
))
5859 && GET_MODE_SIZE (mode
) >= 4
5860 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5861 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5865 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5866 && GET_MODE_SIZE (mode
) == 4
5867 && GET_CODE (x
) == SYMBOL_REF
5868 && CONSTANT_POOL_ADDRESS_P (x
)
5870 && symbol_mentioned_p (get_pool_constant (x
))
5871 && ! pcrel_constant_p (get_pool_constant (x
))))
5877 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5878 instruction of mode MODE. */
5880 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
5882 switch (GET_MODE_SIZE (mode
))
5885 return val
>= 0 && val
< 32;
5888 return val
>= 0 && val
< 64 && (val
& 1) == 0;
5892 && (val
+ GET_MODE_SIZE (mode
)) <= 128
5898 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
5901 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
5902 else if (TARGET_THUMB2
)
5903 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
5904 else /* if (TARGET_THUMB1) */
5905 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
5908 /* Build the SYMBOL_REF for __tls_get_addr. */
5910 static GTY(()) rtx tls_get_addr_libfunc
;
5913 get_tls_get_addr (void)
5915 if (!tls_get_addr_libfunc
)
5916 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
5917 return tls_get_addr_libfunc
;
5921 arm_load_tp (rtx target
)
5924 target
= gen_reg_rtx (SImode
);
5928 /* Can return in any reg. */
5929 emit_insn (gen_load_tp_hard (target
));
5933 /* Always returned in r0. Immediately copy the result into a pseudo,
5934 otherwise other uses of r0 (e.g. setting up function arguments) may
5935 clobber the value. */
5939 emit_insn (gen_load_tp_soft ());
5941 tmp
= gen_rtx_REG (SImode
, 0);
5942 emit_move_insn (target
, tmp
);
5948 load_tls_operand (rtx x
, rtx reg
)
5952 if (reg
== NULL_RTX
)
5953 reg
= gen_reg_rtx (SImode
);
5955 tmp
= gen_rtx_CONST (SImode
, x
);
5957 emit_move_insn (reg
, tmp
);
5963 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
5965 rtx insns
, label
, labelno
, sum
;
5969 labelno
= GEN_INT (pic_labelno
++);
5970 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5971 label
= gen_rtx_CONST (VOIDmode
, label
);
5973 sum
= gen_rtx_UNSPEC (Pmode
,
5974 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
5975 GEN_INT (TARGET_ARM
? 8 : 4)),
5977 reg
= load_tls_operand (sum
, reg
);
5980 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5981 else if (TARGET_THUMB2
)
5982 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5983 else /* TARGET_THUMB1 */
5984 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5986 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
, LCT_PURE
, /* LCT_CONST? */
5987 Pmode
, 1, reg
, Pmode
);
5989 insns
= get_insns ();
5996 legitimize_tls_address (rtx x
, rtx reg
)
5998 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
5999 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
6003 case TLS_MODEL_GLOBAL_DYNAMIC
:
6004 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
6005 dest
= gen_reg_rtx (Pmode
);
6006 emit_libcall_block (insns
, dest
, ret
, x
);
6009 case TLS_MODEL_LOCAL_DYNAMIC
:
6010 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
6012 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6013 share the LDM result with other LD model accesses. */
6014 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
6016 dest
= gen_reg_rtx (Pmode
);
6017 emit_libcall_block (insns
, dest
, ret
, eqv
);
6019 /* Load the addend. */
6020 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
, GEN_INT (TLS_LDO32
)),
6022 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
6023 return gen_rtx_PLUS (Pmode
, dest
, addend
);
6025 case TLS_MODEL_INITIAL_EXEC
:
6026 labelno
= GEN_INT (pic_labelno
++);
6027 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6028 label
= gen_rtx_CONST (VOIDmode
, label
);
6029 sum
= gen_rtx_UNSPEC (Pmode
,
6030 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
6031 GEN_INT (TARGET_ARM
? 8 : 4)),
6033 reg
= load_tls_operand (sum
, reg
);
6036 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
6037 else if (TARGET_THUMB2
)
6038 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
6041 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6042 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
6045 tp
= arm_load_tp (NULL_RTX
);
6047 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6049 case TLS_MODEL_LOCAL_EXEC
:
6050 tp
= arm_load_tp (NULL_RTX
);
6052 reg
= gen_rtx_UNSPEC (Pmode
,
6053 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
6055 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
6057 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6064 /* Try machine-dependent ways of modifying an illegitimate address
6065 to be legitimate. If we find one, return the new, valid address. */
6067 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6071 /* TODO: legitimize_address for Thumb2. */
6074 return thumb_legitimize_address (x
, orig_x
, mode
);
6077 if (arm_tls_symbol_p (x
))
6078 return legitimize_tls_address (x
, NULL_RTX
);
6080 if (GET_CODE (x
) == PLUS
)
6082 rtx xop0
= XEXP (x
, 0);
6083 rtx xop1
= XEXP (x
, 1);
6085 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
6086 xop0
= force_reg (SImode
, xop0
);
6088 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
6089 xop1
= force_reg (SImode
, xop1
);
6091 if (ARM_BASE_REGISTER_RTX_P (xop0
)
6092 && GET_CODE (xop1
) == CONST_INT
)
6094 HOST_WIDE_INT n
, low_n
;
6098 /* VFP addressing modes actually allow greater offsets, but for
6099 now we just stick with the lowest common denominator. */
6101 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
6113 low_n
= ((mode
) == TImode
? 0
6114 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
6118 base_reg
= gen_reg_rtx (SImode
);
6119 val
= force_operand (plus_constant (xop0
, n
), NULL_RTX
);
6120 emit_move_insn (base_reg
, val
);
6121 x
= plus_constant (base_reg
, low_n
);
6123 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6124 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6127 /* XXX We don't allow MINUS any more -- see comment in
6128 arm_legitimate_address_outer_p (). */
6129 else if (GET_CODE (x
) == MINUS
)
6131 rtx xop0
= XEXP (x
, 0);
6132 rtx xop1
= XEXP (x
, 1);
6134 if (CONSTANT_P (xop0
))
6135 xop0
= force_reg (SImode
, xop0
);
6137 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
6138 xop1
= force_reg (SImode
, xop1
);
6140 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6141 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
6144 /* Make sure to take full advantage of the pre-indexed addressing mode
6145 with absolute addresses which often allows for the base register to
6146 be factorized for multiple adjacent memory references, and it might
6147 even allows for the mini pool to be avoided entirely. */
6148 else if (GET_CODE (x
) == CONST_INT
&& optimize
> 0)
6151 HOST_WIDE_INT mask
, base
, index
;
6154 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6155 use a 8-bit index. So let's use a 12-bit index for SImode only and
6156 hope that arm_gen_constant will enable ldrb to use more bits. */
6157 bits
= (mode
== SImode
) ? 12 : 8;
6158 mask
= (1 << bits
) - 1;
6159 base
= INTVAL (x
) & ~mask
;
6160 index
= INTVAL (x
) & mask
;
6161 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
6163 /* It'll most probably be more efficient to generate the base
6164 with more bits set and use a negative index instead. */
6168 base_reg
= force_reg (SImode
, GEN_INT (base
));
6169 x
= plus_constant (base_reg
, index
);
6174 /* We need to find and carefully transform any SYMBOL and LABEL
6175 references; so go back to the original address expression. */
6176 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6178 if (new_x
!= orig_x
)
6186 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6187 to be legitimate. If we find one, return the new, valid address. */
6189 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6191 if (arm_tls_symbol_p (x
))
6192 return legitimize_tls_address (x
, NULL_RTX
);
6194 if (GET_CODE (x
) == PLUS
6195 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6196 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
6197 || INTVAL (XEXP (x
, 1)) < 0))
6199 rtx xop0
= XEXP (x
, 0);
6200 rtx xop1
= XEXP (x
, 1);
6201 HOST_WIDE_INT offset
= INTVAL (xop1
);
6203 /* Try and fold the offset into a biasing of the base register and
6204 then offsetting that. Don't do this when optimizing for space
6205 since it can cause too many CSEs. */
6206 if (optimize_size
&& offset
>= 0
6207 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
6209 HOST_WIDE_INT delta
;
6212 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
6213 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
6214 delta
= 31 * GET_MODE_SIZE (mode
);
6216 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
6218 xop0
= force_operand (plus_constant (xop0
, offset
- delta
),
6220 x
= plus_constant (xop0
, delta
);
6222 else if (offset
< 0 && offset
> -256)
6223 /* Small negative offsets are best done with a subtract before the
6224 dereference, forcing these into a register normally takes two
6226 x
= force_operand (x
, NULL_RTX
);
6229 /* For the remaining cases, force the constant into a register. */
6230 xop1
= force_reg (SImode
, xop1
);
6231 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6234 else if (GET_CODE (x
) == PLUS
6235 && s_register_operand (XEXP (x
, 1), SImode
)
6236 && !s_register_operand (XEXP (x
, 0), SImode
))
6238 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
6240 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
6245 /* We need to find and carefully transform any SYMBOL and LABEL
6246 references; so go back to the original address expression. */
6247 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6249 if (new_x
!= orig_x
)
6257 thumb_legitimize_reload_address (rtx
*x_p
,
6258 enum machine_mode mode
,
6259 int opnum
, int type
,
6260 int ind_levels ATTRIBUTE_UNUSED
)
6264 if (GET_CODE (x
) == PLUS
6265 && GET_MODE_SIZE (mode
) < 4
6266 && REG_P (XEXP (x
, 0))
6267 && XEXP (x
, 0) == stack_pointer_rtx
6268 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6269 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6274 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6275 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6279 /* If both registers are hi-regs, then it's better to reload the
6280 entire expression rather than each register individually. That
6281 only requires one reload register rather than two. */
6282 if (GET_CODE (x
) == PLUS
6283 && REG_P (XEXP (x
, 0))
6284 && REG_P (XEXP (x
, 1))
6285 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
6286 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
6291 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6292 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6299 /* Test for various thread-local symbols. */
6301 /* Return TRUE if X is a thread-local symbol. */
6304 arm_tls_symbol_p (rtx x
)
6306 if (! TARGET_HAVE_TLS
)
6309 if (GET_CODE (x
) != SYMBOL_REF
)
6312 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6315 /* Helper for arm_tls_referenced_p. */
6318 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
6320 if (GET_CODE (*x
) == SYMBOL_REF
)
6321 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
6323 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6324 TLS offsets, not real symbol references. */
6325 if (GET_CODE (*x
) == UNSPEC
6326 && XINT (*x
, 1) == UNSPEC_TLS
)
6332 /* Return TRUE if X contains any TLS symbol references. */
6335 arm_tls_referenced_p (rtx x
)
6337 if (! TARGET_HAVE_TLS
)
6340 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
6343 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6346 arm_cannot_force_const_mem (rtx x
)
6350 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
6352 split_const (x
, &base
, &offset
);
6353 if (GET_CODE (base
) == SYMBOL_REF
6354 && !offset_within_block_p (base
, INTVAL (offset
)))
6357 return arm_tls_referenced_p (x
);
6360 #define REG_OR_SUBREG_REG(X) \
6361 (GET_CODE (X) == REG \
6362 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6364 #define REG_OR_SUBREG_RTX(X) \
6365 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6368 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
6370 enum machine_mode mode
= GET_MODE (x
);
6384 return COSTS_N_INSNS (1);
6387 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6390 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
6397 return COSTS_N_INSNS (2) + cycles
;
6399 return COSTS_N_INSNS (1) + 16;
6402 return (COSTS_N_INSNS (1)
6403 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
6404 + GET_CODE (SET_DEST (x
)) == MEM
));
6409 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
6411 if (thumb_shiftable_const (INTVAL (x
)))
6412 return COSTS_N_INSNS (2);
6413 return COSTS_N_INSNS (3);
6415 else if ((outer
== PLUS
|| outer
== COMPARE
)
6416 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
6418 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
6419 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
6420 return COSTS_N_INSNS (1);
6421 else if (outer
== AND
)
6424 /* This duplicates the tests in the andsi3 expander. */
6425 for (i
= 9; i
<= 31; i
++)
6426 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
6427 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
6428 return COSTS_N_INSNS (2);
6430 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
6431 || outer
== LSHIFTRT
)
6433 return COSTS_N_INSNS (2);
6439 return COSTS_N_INSNS (3);
6457 /* XXX another guess. */
6458 /* Memory costs quite a lot for the first word, but subsequent words
6459 load at the equivalent of a single insn each. */
6460 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
6461 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
6466 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6472 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
6473 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
6479 return total
+ COSTS_N_INSNS (1);
6481 /* Assume a two-shift sequence. Increase the cost slightly so
6482 we prefer actual shifts over an extend operation. */
6483 return total
+ 1 + COSTS_N_INSNS (2);
6491 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
6493 enum machine_mode mode
= GET_MODE (x
);
6494 enum rtx_code subcode
;
6496 enum rtx_code code
= GET_CODE (x
);
6502 /* Memory costs quite a lot for the first word, but subsequent words
6503 load at the equivalent of a single insn each. */
6504 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
6511 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
6512 *total
= COSTS_N_INSNS (2);
6513 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
6514 *total
= COSTS_N_INSNS (4);
6516 *total
= COSTS_N_INSNS (20);
6520 if (GET_CODE (XEXP (x
, 1)) == REG
)
6521 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
6522 else if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6523 *total
= rtx_cost (XEXP (x
, 1), code
, speed
);
6529 *total
+= COSTS_N_INSNS (4);
6534 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
6535 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6538 *total
+= COSTS_N_INSNS (3);
6542 *total
+= COSTS_N_INSNS (1);
6543 /* Increase the cost of complex shifts because they aren't any faster,
6544 and reduce dual issue opportunities. */
6545 if (arm_tune_cortex_a9
6546 && outer
!= SET
&& GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6554 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
6555 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6556 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6558 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6562 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6563 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
6565 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6572 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6574 if (TARGET_HARD_FLOAT
6576 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6578 *total
= COSTS_N_INSNS (1);
6579 if (GET_CODE (XEXP (x
, 0)) == CONST_DOUBLE
6580 && arm_const_double_rtx (XEXP (x
, 0)))
6582 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6586 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6587 && arm_const_double_rtx (XEXP (x
, 1)))
6589 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6595 *total
= COSTS_N_INSNS (20);
6599 *total
= COSTS_N_INSNS (1);
6600 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6601 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6603 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6607 subcode
= GET_CODE (XEXP (x
, 1));
6608 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6609 || subcode
== LSHIFTRT
6610 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6612 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6613 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6617 /* A shift as a part of RSB costs no more than RSB itself. */
6618 if (GET_CODE (XEXP (x
, 0)) == MULT
6619 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6621 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, speed
);
6622 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6627 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
6629 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6630 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6634 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
6635 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
6637 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6638 if (GET_CODE (XEXP (XEXP (x
, 1), 0)) == REG
6639 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
6640 *total
+= COSTS_N_INSNS (1);
6648 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
6649 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
6650 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
6652 *total
= COSTS_N_INSNS (1);
6653 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
6655 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6659 /* MLA: All arguments must be registers. We filter out
6660 multiplication by a power of two, so that we fall down into
6662 if (GET_CODE (XEXP (x
, 0)) == MULT
6663 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6665 /* The cost comes from the cost of the multiply. */
6669 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6671 if (TARGET_HARD_FLOAT
6673 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6675 *total
= COSTS_N_INSNS (1);
6676 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6677 && arm_const_double_rtx (XEXP (x
, 1)))
6679 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6686 *total
= COSTS_N_INSNS (20);
6690 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
6691 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
6693 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, speed
);
6694 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
6695 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
6696 *total
+= COSTS_N_INSNS (1);
6702 case AND
: case XOR
: case IOR
:
6704 /* Normally the frame registers will be spilt into reg+const during
6705 reload, so it is a bad idea to combine them with other instructions,
6706 since then they might not be moved outside of loops. As a compromise
6707 we allow integration with ops that have a constant as their second
6709 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
6710 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
6711 && GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6712 *total
= COSTS_N_INSNS (1);
6716 *total
+= COSTS_N_INSNS (2);
6717 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6718 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6720 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6727 *total
+= COSTS_N_INSNS (1);
6728 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6729 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6731 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6734 subcode
= GET_CODE (XEXP (x
, 0));
6735 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6736 || subcode
== LSHIFTRT
6737 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6739 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6740 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6745 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6747 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6748 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6752 if (subcode
== UMIN
|| subcode
== UMAX
6753 || subcode
== SMIN
|| subcode
== SMAX
)
6755 *total
= COSTS_N_INSNS (3);
6762 /* This should have been handled by the CPU specific routines. */
6766 if (arm_arch3m
&& mode
== SImode
6767 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6768 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6769 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
6770 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
6771 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6772 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
6774 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, speed
);
6777 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6781 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6783 if (TARGET_HARD_FLOAT
6785 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6787 *total
= COSTS_N_INSNS (1);
6790 *total
= COSTS_N_INSNS (2);
6796 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
6797 if (mode
== SImode
&& code
== NOT
)
6799 subcode
= GET_CODE (XEXP (x
, 0));
6800 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6801 || subcode
== LSHIFTRT
6802 || subcode
== ROTATE
|| subcode
== ROTATERT
6804 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
6806 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6807 /* Register shifts cost an extra cycle. */
6808 if (GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
6809 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
6818 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6820 *total
= COSTS_N_INSNS (4);
6824 operand
= XEXP (x
, 0);
6826 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
6827 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
6828 && GET_CODE (XEXP (operand
, 0)) == REG
6829 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
6830 *total
+= COSTS_N_INSNS (1);
6831 *total
+= (rtx_cost (XEXP (x
, 1), code
, speed
)
6832 + rtx_cost (XEXP (x
, 2), code
, speed
));
6836 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6838 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6844 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6845 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6847 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6853 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6854 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6856 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6876 /* SCC insns. In the case where the comparison has already been
6877 performed, then they cost 2 instructions. Otherwise they need
6878 an additional comparison before them. */
6879 *total
= COSTS_N_INSNS (2);
6880 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
6887 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
6893 *total
+= COSTS_N_INSNS (1);
6894 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6895 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6897 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6901 subcode
= GET_CODE (XEXP (x
, 0));
6902 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6903 || subcode
== LSHIFTRT
6904 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6906 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6907 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6912 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6914 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6915 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6925 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6926 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
6927 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
6928 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6932 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6934 if (TARGET_HARD_FLOAT
6936 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6938 *total
= COSTS_N_INSNS (1);
6941 *total
= COSTS_N_INSNS (20);
6944 *total
= COSTS_N_INSNS (1);
6946 *total
+= COSTS_N_INSNS (3);
6952 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6954 rtx op
= XEXP (x
, 0);
6955 enum machine_mode opmode
= GET_MODE (op
);
6958 *total
+= COSTS_N_INSNS (1);
6960 if (opmode
!= SImode
)
6964 /* If !arm_arch4, we use one of the extendhisi2_mem
6965 or movhi_bytes patterns for HImode. For a QImode
6966 sign extension, we first zero-extend from memory
6967 and then perform a shift sequence. */
6968 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
6969 *total
+= COSTS_N_INSNS (2);
6972 *total
+= COSTS_N_INSNS (1);
6974 /* We don't have the necessary insn, so we need to perform some
6976 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
6977 /* An and with constant 255. */
6978 *total
+= COSTS_N_INSNS (1);
6980 /* A shift sequence. Increase costs slightly to avoid
6981 combining two shifts into an extend operation. */
6982 *total
+= COSTS_N_INSNS (2) + 1;
6988 switch (GET_MODE (XEXP (x
, 0)))
6995 *total
= COSTS_N_INSNS (1);
7005 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
7009 if (const_ok_for_arm (INTVAL (x
))
7010 || const_ok_for_arm (~INTVAL (x
)))
7011 *total
= COSTS_N_INSNS (1);
7013 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
7014 INTVAL (x
), NULL_RTX
,
7021 *total
= COSTS_N_INSNS (3);
7025 *total
= COSTS_N_INSNS (1);
7029 *total
= COSTS_N_INSNS (1);
7030 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7034 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
7035 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7036 *total
= COSTS_N_INSNS (1);
7038 *total
= COSTS_N_INSNS (4);
7042 *total
= COSTS_N_INSNS (4);
7047 /* Estimates the size cost of thumb1 instructions.
7048 For now most of the code is copied from thumb1_rtx_costs. We need more
7049 fine grain tuning when we have more related test cases. */
7051 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7053 enum machine_mode mode
= GET_MODE (x
);
7066 return COSTS_N_INSNS (1);
7069 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7071 /* Thumb1 mul instruction can't operate on const. We must Load it
7072 into a register first. */
7073 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
7074 return COSTS_N_INSNS (1) + const_size
;
7076 return COSTS_N_INSNS (1);
7079 return (COSTS_N_INSNS (1)
7080 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
7081 + GET_CODE (SET_DEST (x
)) == MEM
));
7086 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7087 return COSTS_N_INSNS (1);
7088 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7089 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
7090 return COSTS_N_INSNS (2);
7091 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7092 if (thumb_shiftable_const (INTVAL (x
)))
7093 return COSTS_N_INSNS (2);
7094 return COSTS_N_INSNS (3);
7096 else if ((outer
== PLUS
|| outer
== COMPARE
)
7097 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7099 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7100 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7101 return COSTS_N_INSNS (1);
7102 else if (outer
== AND
)
7105 /* This duplicates the tests in the andsi3 expander. */
7106 for (i
= 9; i
<= 31; i
++)
7107 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7108 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7109 return COSTS_N_INSNS (2);
7111 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7112 || outer
== LSHIFTRT
)
7114 return COSTS_N_INSNS (2);
7120 return COSTS_N_INSNS (3);
7138 /* XXX another guess. */
7139 /* Memory costs quite a lot for the first word, but subsequent words
7140 load at the equivalent of a single insn each. */
7141 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7142 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7147 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7152 /* XXX still guessing. */
7153 switch (GET_MODE (XEXP (x
, 0)))
7156 return (1 + (mode
== DImode
? 4 : 0)
7157 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7160 return (4 + (mode
== DImode
? 4 : 0)
7161 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7164 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7175 /* RTX costs when optimizing for size. */
7177 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7180 enum machine_mode mode
= GET_MODE (x
);
7183 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
7187 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7191 /* A memory access costs 1 insn if the mode is small, or the address is
7192 a single register, otherwise it costs one insn per word. */
7193 if (REG_P (XEXP (x
, 0)))
7194 *total
= COSTS_N_INSNS (1);
7196 && GET_CODE (XEXP (x
, 0)) == PLUS
7197 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
7198 /* This will be split into two instructions.
7199 See arm.md:calculate_pic_address. */
7200 *total
= COSTS_N_INSNS (2);
7202 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7209 /* Needs a libcall, so it costs about this. */
7210 *total
= COSTS_N_INSNS (2);
7214 if (mode
== SImode
&& GET_CODE (XEXP (x
, 1)) == REG
)
7216 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, false);
7224 if (mode
== DImode
&& GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7226 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, false);
7229 else if (mode
== SImode
)
7231 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, false);
7232 /* Slightly disparage register shifts, but not by much. */
7233 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7234 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, false);
7238 /* Needs a libcall. */
7239 *total
= COSTS_N_INSNS (2);
7243 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7244 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7246 *total
= COSTS_N_INSNS (1);
7252 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
7253 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
7255 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
7256 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
7257 || subcode1
== ROTATE
|| subcode1
== ROTATERT
7258 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
7259 || subcode1
== ASHIFTRT
)
7261 /* It's just the cost of the two operands. */
7266 *total
= COSTS_N_INSNS (1);
7270 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7274 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7275 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7277 *total
= COSTS_N_INSNS (1);
7281 /* A shift as a part of ADD costs nothing. */
7282 if (GET_CODE (XEXP (x
, 0)) == MULT
7283 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7285 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
7286 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, false);
7287 *total
+= rtx_cost (XEXP (x
, 1), code
, false);
7292 case AND
: case XOR
: case IOR
:
7295 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
7297 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
7298 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
7299 || (code
== AND
&& subcode
== NOT
))
7301 /* It's just the cost of the two operands. */
7307 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7311 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7315 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7316 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7318 *total
= COSTS_N_INSNS (1);
7324 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7333 if (cc_register (XEXP (x
, 0), VOIDmode
))
7336 *total
= COSTS_N_INSNS (1);
7340 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7341 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7342 *total
= COSTS_N_INSNS (1);
7344 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
7349 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
7352 if (const_ok_for_arm (INTVAL (x
)))
7353 /* A multiplication by a constant requires another instruction
7354 to load the constant to a register. */
7355 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
7357 else if (const_ok_for_arm (~INTVAL (x
)))
7358 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
7359 else if (const_ok_for_arm (-INTVAL (x
)))
7361 if (outer_code
== COMPARE
|| outer_code
== PLUS
7362 || outer_code
== MINUS
)
7365 *total
= COSTS_N_INSNS (1);
7368 *total
= COSTS_N_INSNS (2);
7374 *total
= COSTS_N_INSNS (2);
7378 *total
= COSTS_N_INSNS (4);
7383 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7384 cost of these slightly. */
7385 *total
= COSTS_N_INSNS (1) + 1;
7389 if (mode
!= VOIDmode
)
7390 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7392 *total
= COSTS_N_INSNS (4); /* How knows? */
7397 /* RTX costs when optimizing for size. */
7399 arm_rtx_costs (rtx x
, int code
, int outer_code
, int *total
,
7403 return arm_size_rtx_costs (x
, (enum rtx_code
) code
,
7404 (enum rtx_code
) outer_code
, total
);
7406 return current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
7407 (enum rtx_code
) outer_code
,
7411 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7412 supported on any "slowmul" cores, so it can be ignored. */
7415 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7416 int *total
, bool speed
)
7418 enum machine_mode mode
= GET_MODE (x
);
7422 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7429 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
7432 *total
= COSTS_N_INSNS (20);
7436 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7438 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7439 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7440 int cost
, const_ok
= const_ok_for_arm (i
);
7441 int j
, booth_unit_size
;
7443 /* Tune as appropriate. */
7444 cost
= const_ok
? 4 : 8;
7445 booth_unit_size
= 2;
7446 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7448 i
>>= booth_unit_size
;
7452 *total
= COSTS_N_INSNS (cost
);
7453 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7457 *total
= COSTS_N_INSNS (20);
7461 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
7466 /* RTX cost for cores with a fast multiply unit (M variants). */
7469 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7470 int *total
, bool speed
)
7472 enum machine_mode mode
= GET_MODE (x
);
7476 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7480 /* ??? should thumb2 use different costs? */
7484 /* There is no point basing this on the tuning, since it is always the
7485 fast variant if it exists at all. */
7487 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7488 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7489 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7491 *total
= COSTS_N_INSNS(2);
7498 *total
= COSTS_N_INSNS (5);
7502 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7504 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7505 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7506 int cost
, const_ok
= const_ok_for_arm (i
);
7507 int j
, booth_unit_size
;
7509 /* Tune as appropriate. */
7510 cost
= const_ok
? 4 : 8;
7511 booth_unit_size
= 8;
7512 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7514 i
>>= booth_unit_size
;
7518 *total
= COSTS_N_INSNS(cost
);
7524 *total
= COSTS_N_INSNS (4);
7528 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7530 if (TARGET_HARD_FLOAT
7532 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7534 *total
= COSTS_N_INSNS (1);
7539 /* Requires a lib call */
7540 *total
= COSTS_N_INSNS (20);
7544 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7549 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7550 so it can be ignored. */
7553 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7554 int *total
, bool speed
)
7556 enum machine_mode mode
= GET_MODE (x
);
7560 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7567 if (GET_CODE (XEXP (x
, 0)) != MULT
)
7568 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7570 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7571 will stall until the multiplication is complete. */
7572 *total
= COSTS_N_INSNS (3);
7576 /* There is no point basing this on the tuning, since it is always the
7577 fast variant if it exists at all. */
7579 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7580 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7581 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7583 *total
= COSTS_N_INSNS (2);
7590 *total
= COSTS_N_INSNS (5);
7594 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7596 /* If operand 1 is a constant we can more accurately
7597 calculate the cost of the multiply. The multiplier can
7598 retire 15 bits on the first cycle and a further 12 on the
7599 second. We do, of course, have to load the constant into
7600 a register first. */
7601 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7602 /* There's a general overhead of one cycle. */
7604 unsigned HOST_WIDE_INT masked_const
;
7609 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
7611 masked_const
= i
& 0xffff8000;
7612 if (masked_const
!= 0)
7615 masked_const
= i
& 0xf8000000;
7616 if (masked_const
!= 0)
7619 *total
= COSTS_N_INSNS (cost
);
7625 *total
= COSTS_N_INSNS (3);
7629 /* Requires a lib call */
7630 *total
= COSTS_N_INSNS (20);
7634 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7639 /* RTX costs for 9e (and later) cores. */
7642 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7643 int *total
, bool speed
)
7645 enum machine_mode mode
= GET_MODE (x
);
7652 *total
= COSTS_N_INSNS (3);
7656 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7664 /* There is no point basing this on the tuning, since it is always the
7665 fast variant if it exists at all. */
7667 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7668 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7669 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7671 *total
= COSTS_N_INSNS (2);
7678 *total
= COSTS_N_INSNS (5);
7684 *total
= COSTS_N_INSNS (2);
7688 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7690 if (TARGET_HARD_FLOAT
7692 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7694 *total
= COSTS_N_INSNS (1);
7699 *total
= COSTS_N_INSNS (20);
7703 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7706 /* All address computations that can be done are free, but rtx cost returns
7707 the same for practically all of them. So we weight the different types
7708 of address here in the order (most pref first):
7709 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7711 arm_arm_address_cost (rtx x
)
7713 enum rtx_code c
= GET_CODE (x
);
7715 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
7717 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
7722 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7725 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
7735 arm_thumb_address_cost (rtx x
)
7737 enum rtx_code c
= GET_CODE (x
);
7742 && GET_CODE (XEXP (x
, 0)) == REG
7743 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7750 arm_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
7752 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
7755 /* Adjust cost hook for XScale. */
7757 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
7759 /* Some true dependencies can have a higher cost depending
7760 on precisely how certain input operands are used. */
7761 if (REG_NOTE_KIND(link
) == 0
7762 && recog_memoized (insn
) >= 0
7763 && recog_memoized (dep
) >= 0)
7765 int shift_opnum
= get_attr_shift (insn
);
7766 enum attr_type attr_type
= get_attr_type (dep
);
7768 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7769 operand for INSN. If we have a shifted input operand and the
7770 instruction we depend on is another ALU instruction, then we may
7771 have to account for an additional stall. */
7772 if (shift_opnum
!= 0
7773 && (attr_type
== TYPE_ALU_SHIFT
|| attr_type
== TYPE_ALU_SHIFT_REG
))
7775 rtx shifted_operand
;
7778 /* Get the shifted operand. */
7779 extract_insn (insn
);
7780 shifted_operand
= recog_data
.operand
[shift_opnum
];
7782 /* Iterate over all the operands in DEP. If we write an operand
7783 that overlaps with SHIFTED_OPERAND, then we have increase the
7784 cost of this dependency. */
7786 preprocess_constraints ();
7787 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
7789 /* We can ignore strict inputs. */
7790 if (recog_data
.operand_type
[opno
] == OP_IN
)
7793 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
7805 /* Adjust cost hook for Cortex A9. */
7807 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
7809 switch (REG_NOTE_KIND (link
))
7816 case REG_DEP_OUTPUT
:
7817 if (recog_memoized (insn
) >= 0
7818 && recog_memoized (dep
) >= 0)
7820 if (GET_CODE (PATTERN (insn
)) == SET
)
7823 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
7825 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
7827 enum attr_type attr_type_insn
= get_attr_type (insn
);
7828 enum attr_type attr_type_dep
= get_attr_type (dep
);
7830 /* By default all dependencies of the form
7833 have an extra latency of 1 cycle because
7834 of the input and output dependency in this
7835 case. However this gets modeled as an true
7836 dependency and hence all these checks. */
7837 if (REG_P (SET_DEST (PATTERN (insn
)))
7838 && REG_P (SET_DEST (PATTERN (dep
)))
7839 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
7840 SET_DEST (PATTERN (dep
))))
7842 /* FMACS is a special case where the dependant
7843 instruction can be issued 3 cycles before
7844 the normal latency in case of an output
7846 if ((attr_type_insn
== TYPE_FMACS
7847 || attr_type_insn
== TYPE_FMACD
)
7848 && (attr_type_dep
== TYPE_FMACS
7849 || attr_type_dep
== TYPE_FMACD
))
7851 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
7852 *cost
= insn_default_latency (dep
) - 3;
7854 *cost
= insn_default_latency (dep
);
7859 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
7860 *cost
= insn_default_latency (dep
) + 1;
7862 *cost
= insn_default_latency (dep
);
7878 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
7879 It corrects the value of COST based on the relationship between
7880 INSN and DEP through the dependence LINK. It returns the new
7881 value. There is a per-core adjust_cost hook to adjust scheduler costs
7882 and the per-core hook can choose to completely override the generic
7883 adjust_cost function. Only put bits of code into arm_adjust_cost that
7884 are common across all cores. */
7886 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
7890 /* When generating Thumb-1 code, we want to place flag-setting operations
7891 close to a conditional branch which depends on them, so that we can
7892 omit the comparison. */
7894 && REG_NOTE_KIND (link
) == 0
7895 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
7896 && recog_memoized (dep
) >= 0
7897 && get_attr_conds (dep
) == CONDS_SET
)
7900 if (current_tune
->sched_adjust_cost
!= NULL
)
7902 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
7906 /* XXX This is not strictly true for the FPA. */
7907 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
7908 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
7911 /* Call insns don't incur a stall, even if they follow a load. */
7912 if (REG_NOTE_KIND (link
) == 0
7913 && GET_CODE (insn
) == CALL_INSN
)
7916 if ((i_pat
= single_set (insn
)) != NULL
7917 && GET_CODE (SET_SRC (i_pat
)) == MEM
7918 && (d_pat
= single_set (dep
)) != NULL
7919 && GET_CODE (SET_DEST (d_pat
)) == MEM
)
7921 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
7922 /* This is a load after a store, there is no conflict if the load reads
7923 from a cached area. Assume that loads from the stack, and from the
7924 constant pool are cached, and that others will miss. This is a
7927 if ((GET_CODE (src_mem
) == SYMBOL_REF
7928 && CONSTANT_POOL_ADDRESS_P (src_mem
))
7929 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
7930 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
7931 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
7938 static int fp_consts_inited
= 0;
7940 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7941 static const char * const strings_fp
[8] =
7944 "4", "5", "0.5", "10"
7947 static REAL_VALUE_TYPE values_fp
[8];
7950 init_fp_table (void)
7956 fp_consts_inited
= 1;
7958 fp_consts_inited
= 8;
7960 for (i
= 0; i
< fp_consts_inited
; i
++)
7962 r
= REAL_VALUE_ATOF (strings_fp
[i
], DFmode
);
7967 /* Return TRUE if rtx X is a valid immediate FP constant. */
7969 arm_const_double_rtx (rtx x
)
7974 if (!fp_consts_inited
)
7977 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7978 if (REAL_VALUE_MINUS_ZERO (r
))
7981 for (i
= 0; i
< fp_consts_inited
; i
++)
7982 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
7988 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7990 neg_const_double_rtx_ok_for_fpa (rtx x
)
7995 if (!fp_consts_inited
)
7998 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7999 r
= real_value_negate (&r
);
8000 if (REAL_VALUE_MINUS_ZERO (r
))
8003 for (i
= 0; i
< 8; i
++)
8004 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
8011 /* VFPv3 has a fairly wide range of representable immediates, formed from
8012 "quarter-precision" floating-point values. These can be evaluated using this
8013 formula (with ^ for exponentiation):
8017 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8018 16 <= n <= 31 and 0 <= r <= 7.
8020 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8022 - A (most-significant) is the sign bit.
8023 - BCD are the exponent (encoded as r XOR 3).
8024 - EFGH are the mantissa (encoded as n - 16).
8027 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8028 fconst[sd] instruction, or -1 if X isn't suitable. */
8030 vfp3_const_double_index (rtx x
)
8032 REAL_VALUE_TYPE r
, m
;
8034 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
8035 unsigned HOST_WIDE_INT mask
;
8036 HOST_WIDE_INT m1
, m2
;
8037 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8039 if (!TARGET_VFP3
|| GET_CODE (x
) != CONST_DOUBLE
)
8042 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8044 /* We can't represent these things, so detect them first. */
8045 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
8048 /* Extract sign, exponent and mantissa. */
8049 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
8050 r
= real_value_abs (&r
);
8051 exponent
= REAL_EXP (&r
);
8052 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8053 highest (sign) bit, with a fixed binary point at bit point_pos.
8054 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8055 bits for the mantissa, this may fail (low bits would be lost). */
8056 real_ldexp (&m
, &r
, point_pos
- exponent
);
8057 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
8061 /* If there are bits set in the low part of the mantissa, we can't
8062 represent this value. */
8066 /* Now make it so that mantissa contains the most-significant bits, and move
8067 the point_pos to indicate that the least-significant bits have been
8069 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8072 /* We can permit four significant bits of mantissa only, plus a high bit
8073 which is always 1. */
8074 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8075 if ((mantissa
& mask
) != 0)
8078 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8079 mantissa
>>= point_pos
- 5;
8081 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8082 floating-point immediate zero with Neon using an integer-zero load, but
8083 that case is handled elsewhere.) */
8087 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
8089 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8090 normalized significands are in the range [1, 2). (Our mantissa is shifted
8091 left 4 places at this point relative to normalized IEEE754 values). GCC
8092 internally uses [0.5, 1) (see real.c), so the exponent returned from
8093 REAL_EXP must be altered. */
8094 exponent
= 5 - exponent
;
8096 if (exponent
< 0 || exponent
> 7)
8099 /* Sign, mantissa and exponent are now in the correct form to plug into the
8100 formula described in the comment above. */
8101 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
8104 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8106 vfp3_const_double_rtx (rtx x
)
8111 return vfp3_const_double_index (x
) != -1;
8114 /* Recognize immediates which can be used in various Neon instructions. Legal
8115 immediates are described by the following table (for VMVN variants, the
8116 bitwise inverse of the constant shown is recognized. In either case, VMOV
8117 is output and the correct instruction to use for a given constant is chosen
8118 by the assembler). The constant shown is replicated across all elements of
8119 the destination vector.
8121 insn elems variant constant (binary)
8122 ---- ----- ------- -----------------
8123 vmov i32 0 00000000 00000000 00000000 abcdefgh
8124 vmov i32 1 00000000 00000000 abcdefgh 00000000
8125 vmov i32 2 00000000 abcdefgh 00000000 00000000
8126 vmov i32 3 abcdefgh 00000000 00000000 00000000
8127 vmov i16 4 00000000 abcdefgh
8128 vmov i16 5 abcdefgh 00000000
8129 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8130 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8131 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8132 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8133 vmvn i16 10 00000000 abcdefgh
8134 vmvn i16 11 abcdefgh 00000000
8135 vmov i32 12 00000000 00000000 abcdefgh 11111111
8136 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8137 vmov i32 14 00000000 abcdefgh 11111111 11111111
8138 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8140 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8141 eeeeeeee ffffffff gggggggg hhhhhhhh
8142 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8144 For case 18, B = !b. Representable values are exactly those accepted by
8145 vfp3_const_double_index, but are output as floating-point numbers rather
8148 Variants 0-5 (inclusive) may also be used as immediates for the second
8149 operand of VORR/VBIC instructions.
8151 The INVERSE argument causes the bitwise inverse of the given operand to be
8152 recognized instead (used for recognizing legal immediates for the VAND/VORN
8153 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8154 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8155 output, rather than the real insns vbic/vorr).
8157 INVERSE makes no difference to the recognition of float vectors.
8159 The return value is the variant of immediate as shown in the above table, or
8160 -1 if the given value doesn't match any of the listed patterns.
8163 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
8164 rtx
*modconst
, int *elementwidth
)
8166 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8168 for (i = 0; i < idx; i += (STRIDE)) \
8173 immtype = (CLASS); \
8174 elsize = (ELSIZE); \
8178 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
8179 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8180 unsigned char bytes
[16];
8181 int immtype
= -1, matches
;
8182 unsigned int invmask
= inverse
? 0xff : 0;
8184 /* Vectors of float constants. */
8185 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8187 rtx el0
= CONST_VECTOR_ELT (op
, 0);
8190 if (!vfp3_const_double_rtx (el0
))
8193 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
8195 for (i
= 1; i
< n_elts
; i
++)
8197 rtx elt
= CONST_VECTOR_ELT (op
, i
);
8200 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
8202 if (!REAL_VALUES_EQUAL (r0
, re
))
8207 *modconst
= CONST_VECTOR_ELT (op
, 0);
8215 /* Splat vector constant out into a byte vector. */
8216 for (i
= 0; i
< n_elts
; i
++)
8218 rtx el
= CONST_VECTOR_ELT (op
, i
);
8219 unsigned HOST_WIDE_INT elpart
;
8220 unsigned int part
, parts
;
8222 if (GET_CODE (el
) == CONST_INT
)
8224 elpart
= INTVAL (el
);
8227 else if (GET_CODE (el
) == CONST_DOUBLE
)
8229 elpart
= CONST_DOUBLE_LOW (el
);
8235 for (part
= 0; part
< parts
; part
++)
8238 for (byte
= 0; byte
< innersize
; byte
++)
8240 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8241 elpart
>>= BITS_PER_UNIT
;
8243 if (GET_CODE (el
) == CONST_DOUBLE
)
8244 elpart
= CONST_DOUBLE_HIGH (el
);
8249 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8253 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8254 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8256 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8257 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8259 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8260 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8262 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8263 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
8265 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
8267 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
8269 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8270 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8272 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8273 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8275 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8276 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8278 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8279 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
8281 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
8283 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
8285 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8286 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8288 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8289 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8291 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8292 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8294 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8295 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8297 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
8299 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8300 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
8308 *elementwidth
= elsize
;
8312 unsigned HOST_WIDE_INT imm
= 0;
8314 /* Un-invert bytes of recognized vector, if necessary. */
8316 for (i
= 0; i
< idx
; i
++)
8317 bytes
[i
] ^= invmask
;
8321 /* FIXME: Broken on 32-bit H_W_I hosts. */
8322 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8324 for (i
= 0; i
< 8; i
++)
8325 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8326 << (i
* BITS_PER_UNIT
);
8328 *modconst
= GEN_INT (imm
);
8332 unsigned HOST_WIDE_INT imm
= 0;
8334 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8335 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8337 *modconst
= GEN_INT (imm
);
8345 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8346 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8347 float elements), and a modified constant (whatever should be output for a
8348 VMOV) in *MODCONST. */
8351 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
8352 rtx
*modconst
, int *elementwidth
)
8356 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
8362 *modconst
= tmpconst
;
8365 *elementwidth
= tmpwidth
;
8370 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8371 the immediate is valid, write a constant suitable for using as an operand
8372 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8373 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8376 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
8377 rtx
*modconst
, int *elementwidth
)
8381 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
8383 if (retval
< 0 || retval
> 5)
8387 *modconst
= tmpconst
;
8390 *elementwidth
= tmpwidth
;
8395 /* Return a string suitable for output of Neon immediate logic operation
8399 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
8400 int inverse
, int quad
)
8402 int width
, is_valid
;
8403 static char templ
[40];
8405 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
8407 gcc_assert (is_valid
!= 0);
8410 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
8412 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
8417 /* Output a sequence of pairwise operations to implement a reduction.
8418 NOTE: We do "too much work" here, because pairwise operations work on two
8419 registers-worth of operands in one go. Unfortunately we can't exploit those
8420 extra calculations to do the full operation in fewer steps, I don't think.
8421 Although all vector elements of the result but the first are ignored, we
8422 actually calculate the same result in each of the elements. An alternative
8423 such as initially loading a vector with zero to use as each of the second
8424 operands would use up an additional register and take an extra instruction,
8425 for no particular gain. */
8428 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
8429 rtx (*reduc
) (rtx
, rtx
, rtx
))
8431 enum machine_mode inner
= GET_MODE_INNER (mode
);
8432 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
8435 for (i
= parts
/ 2; i
>= 1; i
/= 2)
8437 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
8438 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
8443 /* If VALS is a vector constant that can be loaded into a register
8444 using VDUP, generate instructions to do so and return an RTX to
8445 assign to the register. Otherwise return NULL_RTX. */
8448 neon_vdup_constant (rtx vals
)
8450 enum machine_mode mode
= GET_MODE (vals
);
8451 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8452 int n_elts
= GET_MODE_NUNITS (mode
);
8453 bool all_same
= true;
8457 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
8460 for (i
= 0; i
< n_elts
; ++i
)
8462 x
= XVECEXP (vals
, 0, i
);
8463 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8468 /* The elements are not all the same. We could handle repeating
8469 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8470 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8474 /* We can load this constant by using VDUP and a constant in a
8475 single ARM register. This will be cheaper than a vector
8478 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8479 return gen_rtx_VEC_DUPLICATE (mode
, x
);
8482 /* Generate code to load VALS, which is a PARALLEL containing only
8483 constants (for vec_init) or CONST_VECTOR, efficiently into a
8484 register. Returns an RTX to copy into the register, or NULL_RTX
8485 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8488 neon_make_constant (rtx vals
)
8490 enum machine_mode mode
= GET_MODE (vals
);
8492 rtx const_vec
= NULL_RTX
;
8493 int n_elts
= GET_MODE_NUNITS (mode
);
8497 if (GET_CODE (vals
) == CONST_VECTOR
)
8499 else if (GET_CODE (vals
) == PARALLEL
)
8501 /* A CONST_VECTOR must contain only CONST_INTs and
8502 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8503 Only store valid constants in a CONST_VECTOR. */
8504 for (i
= 0; i
< n_elts
; ++i
)
8506 rtx x
= XVECEXP (vals
, 0, i
);
8507 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
8510 if (n_const
== n_elts
)
8511 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8516 if (const_vec
!= NULL
8517 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
8518 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8520 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
8521 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8522 pipeline cycle; creating the constant takes one or two ARM
8525 else if (const_vec
!= NULL_RTX
)
8526 /* Load from constant pool. On Cortex-A8 this takes two cycles
8527 (for either double or quad vectors). We can not take advantage
8528 of single-cycle VLD1 because we need a PC-relative addressing
8532 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8533 We can not construct an initializer. */
8537 /* Initialize vector TARGET to VALS. */
8540 neon_expand_vector_init (rtx target
, rtx vals
)
8542 enum machine_mode mode
= GET_MODE (target
);
8543 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8544 int n_elts
= GET_MODE_NUNITS (mode
);
8545 int n_var
= 0, one_var
= -1;
8546 bool all_same
= true;
8550 for (i
= 0; i
< n_elts
; ++i
)
8552 x
= XVECEXP (vals
, 0, i
);
8553 if (!CONSTANT_P (x
))
8554 ++n_var
, one_var
= i
;
8556 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8562 rtx constant
= neon_make_constant (vals
);
8563 if (constant
!= NULL_RTX
)
8565 emit_move_insn (target
, constant
);
8570 /* Splat a single non-constant element if we can. */
8571 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
8573 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8574 emit_insn (gen_rtx_SET (VOIDmode
, target
,
8575 gen_rtx_VEC_DUPLICATE (mode
, x
)));
8579 /* One field is non-constant. Load constant then overwrite varying
8580 field. This is more efficient than using the stack. */
8583 rtx copy
= copy_rtx (vals
);
8584 rtx index
= GEN_INT (one_var
);
8586 /* Load constant part of vector, substitute neighboring value for
8588 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
8589 neon_expand_vector_init (target
, copy
);
8591 /* Insert variable. */
8592 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8596 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
8599 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
8602 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
8605 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
8608 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
8611 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
8614 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
8617 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
8620 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
8628 /* Construct the vector in memory one field at a time
8629 and load the whole vector. */
8630 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), 0);
8631 for (i
= 0; i
< n_elts
; i
++)
8632 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8633 i
* GET_MODE_SIZE (inner_mode
)),
8634 XVECEXP (vals
, 0, i
));
8635 emit_move_insn (target
, mem
);
8638 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8639 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8640 reported source locations are bogus. */
8643 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
8648 gcc_assert (GET_CODE (operand
) == CONST_INT
);
8650 lane
= INTVAL (operand
);
8652 if (lane
< low
|| lane
>= high
)
8656 /* Bounds-check lanes. */
8659 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8661 bounds_check (operand
, low
, high
, "lane out of range");
8664 /* Bounds-check constants. */
8667 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8669 bounds_check (operand
, low
, high
, "constant out of range");
8673 neon_element_bits (enum machine_mode mode
)
8676 return GET_MODE_BITSIZE (mode
);
8678 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
8682 /* Predicates for `match_operand' and `match_operator'. */
8684 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8686 cirrus_memory_offset (rtx op
)
8688 /* Reject eliminable registers. */
8689 if (! (reload_in_progress
|| reload_completed
)
8690 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8691 || reg_mentioned_p (arg_pointer_rtx
, op
)
8692 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8693 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8694 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8695 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8698 if (GET_CODE (op
) == MEM
)
8704 /* Match: (mem (reg)). */
8705 if (GET_CODE (ind
) == REG
)
8711 if (GET_CODE (ind
) == PLUS
8712 && GET_CODE (XEXP (ind
, 0)) == REG
8713 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8714 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
)
8721 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8722 WB is true if full writeback address modes are allowed and is false
8723 if limited writeback address modes (POST_INC and PRE_DEC) are
8727 arm_coproc_mem_operand (rtx op
, bool wb
)
8731 /* Reject eliminable registers. */
8732 if (! (reload_in_progress
|| reload_completed
)
8733 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8734 || reg_mentioned_p (arg_pointer_rtx
, op
)
8735 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8736 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8737 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8738 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8741 /* Constants are converted into offsets from labels. */
8742 if (GET_CODE (op
) != MEM
)
8747 if (reload_completed
8748 && (GET_CODE (ind
) == LABEL_REF
8749 || (GET_CODE (ind
) == CONST
8750 && GET_CODE (XEXP (ind
, 0)) == PLUS
8751 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8752 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8755 /* Match: (mem (reg)). */
8756 if (GET_CODE (ind
) == REG
)
8757 return arm_address_register_rtx_p (ind
, 0);
8759 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8760 acceptable in any case (subject to verification by
8761 arm_address_register_rtx_p). We need WB to be true to accept
8762 PRE_INC and POST_DEC. */
8763 if (GET_CODE (ind
) == POST_INC
8764 || GET_CODE (ind
) == PRE_DEC
8766 && (GET_CODE (ind
) == PRE_INC
8767 || GET_CODE (ind
) == POST_DEC
)))
8768 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
8771 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
8772 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
8773 && GET_CODE (XEXP (ind
, 1)) == PLUS
8774 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
8775 ind
= XEXP (ind
, 1);
8780 if (GET_CODE (ind
) == PLUS
8781 && GET_CODE (XEXP (ind
, 0)) == REG
8782 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8783 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
8784 && INTVAL (XEXP (ind
, 1)) > -1024
8785 && INTVAL (XEXP (ind
, 1)) < 1024
8786 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
8792 /* Return TRUE if OP is a memory operand which we can load or store a vector
8793 to/from. TYPE is one of the following values:
8794 0 - Vector load/stor (vldr)
8795 1 - Core registers (ldm)
8796 2 - Element/structure loads (vld1)
8799 neon_vector_mem_operand (rtx op
, int type
)
8803 /* Reject eliminable registers. */
8804 if (! (reload_in_progress
|| reload_completed
)
8805 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8806 || reg_mentioned_p (arg_pointer_rtx
, op
)
8807 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8808 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8809 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8810 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8813 /* Constants are converted into offsets from labels. */
8814 if (GET_CODE (op
) != MEM
)
8819 if (reload_completed
8820 && (GET_CODE (ind
) == LABEL_REF
8821 || (GET_CODE (ind
) == CONST
8822 && GET_CODE (XEXP (ind
, 0)) == PLUS
8823 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8824 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8827 /* Match: (mem (reg)). */
8828 if (GET_CODE (ind
) == REG
)
8829 return arm_address_register_rtx_p (ind
, 0);
8831 /* Allow post-increment with Neon registers. */
8832 if (type
!= 1 && (GET_CODE (ind
) == POST_INC
|| GET_CODE (ind
) == PRE_DEC
))
8833 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
8835 /* FIXME: vld1 allows register post-modify. */
8841 && GET_CODE (ind
) == PLUS
8842 && GET_CODE (XEXP (ind
, 0)) == REG
8843 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8844 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
8845 && INTVAL (XEXP (ind
, 1)) > -1024
8846 && INTVAL (XEXP (ind
, 1)) < 1016
8847 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
8853 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8856 neon_struct_mem_operand (rtx op
)
8860 /* Reject eliminable registers. */
8861 if (! (reload_in_progress
|| reload_completed
)
8862 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8863 || reg_mentioned_p (arg_pointer_rtx
, op
)
8864 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8865 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8866 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8867 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8870 /* Constants are converted into offsets from labels. */
8871 if (GET_CODE (op
) != MEM
)
8876 if (reload_completed
8877 && (GET_CODE (ind
) == LABEL_REF
8878 || (GET_CODE (ind
) == CONST
8879 && GET_CODE (XEXP (ind
, 0)) == PLUS
8880 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8881 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8884 /* Match: (mem (reg)). */
8885 if (GET_CODE (ind
) == REG
)
8886 return arm_address_register_rtx_p (ind
, 0);
8891 /* Return true if X is a register that will be eliminated later on. */
8893 arm_eliminable_register (rtx x
)
8895 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
8896 || REGNO (x
) == ARG_POINTER_REGNUM
8897 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
8898 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
8901 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8902 coprocessor registers. Otherwise return NO_REGS. */
8905 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
8909 if (!TARGET_NEON_FP16
)
8910 return GENERAL_REGS
;
8911 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2))
8913 return GENERAL_REGS
;
8917 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
8918 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8919 && neon_vector_mem_operand (x
, 0))
8922 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
8925 return GENERAL_REGS
;
8928 /* Values which must be returned in the most-significant end of the return
8932 arm_return_in_msb (const_tree valtype
)
8934 return (TARGET_AAPCS_BASED
8936 && (AGGREGATE_TYPE_P (valtype
)
8937 || TREE_CODE (valtype
) == COMPLEX_TYPE
));
8940 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8941 Use by the Cirrus Maverick code which has to workaround
8942 a hardware bug triggered by such instructions. */
8944 arm_memory_load_p (rtx insn
)
8946 rtx body
, lhs
, rhs
;;
8948 if (insn
== NULL_RTX
|| GET_CODE (insn
) != INSN
)
8951 body
= PATTERN (insn
);
8953 if (GET_CODE (body
) != SET
)
8956 lhs
= XEXP (body
, 0);
8957 rhs
= XEXP (body
, 1);
8959 lhs
= REG_OR_SUBREG_RTX (lhs
);
8961 /* If the destination is not a general purpose
8962 register we do not have to worry. */
8963 if (GET_CODE (lhs
) != REG
8964 || REGNO_REG_CLASS (REGNO (lhs
)) != GENERAL_REGS
)
8967 /* As well as loads from memory we also have to react
8968 to loads of invalid constants which will be turned
8969 into loads from the minipool. */
8970 return (GET_CODE (rhs
) == MEM
8971 || GET_CODE (rhs
) == SYMBOL_REF
8972 || note_invalid_constants (insn
, -1, false));
8975 /* Return TRUE if INSN is a Cirrus instruction. */
8977 arm_cirrus_insn_p (rtx insn
)
8979 enum attr_cirrus attr
;
8981 /* get_attr cannot accept USE or CLOBBER. */
8983 || GET_CODE (insn
) != INSN
8984 || GET_CODE (PATTERN (insn
)) == USE
8985 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
8988 attr
= get_attr_cirrus (insn
);
8990 return attr
!= CIRRUS_NOT
;
8993 /* Cirrus reorg for invalid instruction combinations. */
8995 cirrus_reorg (rtx first
)
8997 enum attr_cirrus attr
;
8998 rtx body
= PATTERN (first
);
9002 /* Any branch must be followed by 2 non Cirrus instructions. */
9003 if (GET_CODE (first
) == JUMP_INSN
&& GET_CODE (body
) != RETURN
)
9006 t
= next_nonnote_insn (first
);
9008 if (arm_cirrus_insn_p (t
))
9011 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
9015 emit_insn_after (gen_nop (), first
);
9020 /* (float (blah)) is in parallel with a clobber. */
9021 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
9022 body
= XVECEXP (body
, 0, 0);
9024 if (GET_CODE (body
) == SET
)
9026 rtx lhs
= XEXP (body
, 0), rhs
= XEXP (body
, 1);
9028 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9029 be followed by a non Cirrus insn. */
9030 if (get_attr_cirrus (first
) == CIRRUS_DOUBLE
)
9032 if (arm_cirrus_insn_p (next_nonnote_insn (first
)))
9033 emit_insn_after (gen_nop (), first
);
9037 else if (arm_memory_load_p (first
))
9039 unsigned int arm_regno
;
9041 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9042 ldr/cfmv64hr combination where the Rd field is the same
9043 in both instructions must be split with a non Cirrus
9050 /* Get Arm register number for ldr insn. */
9051 if (GET_CODE (lhs
) == REG
)
9052 arm_regno
= REGNO (lhs
);
9055 gcc_assert (GET_CODE (rhs
) == REG
);
9056 arm_regno
= REGNO (rhs
);
9060 first
= next_nonnote_insn (first
);
9062 if (! arm_cirrus_insn_p (first
))
9065 body
= PATTERN (first
);
9067 /* (float (blah)) is in parallel with a clobber. */
9068 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0))
9069 body
= XVECEXP (body
, 0, 0);
9071 if (GET_CODE (body
) == FLOAT
)
9072 body
= XEXP (body
, 0);
9074 if (get_attr_cirrus (first
) == CIRRUS_MOVE
9075 && GET_CODE (XEXP (body
, 1)) == REG
9076 && arm_regno
== REGNO (XEXP (body
, 1)))
9077 emit_insn_after (gen_nop (), first
);
9083 /* get_attr cannot accept USE or CLOBBER. */
9085 || GET_CODE (first
) != INSN
9086 || GET_CODE (PATTERN (first
)) == USE
9087 || GET_CODE (PATTERN (first
)) == CLOBBER
)
9090 attr
= get_attr_cirrus (first
);
9092 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9093 must be followed by a non-coprocessor instruction. */
9094 if (attr
== CIRRUS_COMPARE
)
9098 t
= next_nonnote_insn (first
);
9100 if (arm_cirrus_insn_p (t
))
9103 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
9107 emit_insn_after (gen_nop (), first
);
9113 /* Return TRUE if X references a SYMBOL_REF. */
9115 symbol_mentioned_p (rtx x
)
9120 if (GET_CODE (x
) == SYMBOL_REF
)
9123 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9124 are constant offsets, not symbols. */
9125 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9128 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9130 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9136 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9137 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
9140 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
9147 /* Return TRUE if X references a LABEL_REF. */
9149 label_mentioned_p (rtx x
)
9154 if (GET_CODE (x
) == LABEL_REF
)
9157 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9158 instruction, but they are constant offsets, not symbols. */
9159 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9162 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9163 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9169 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9170 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
9173 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
9181 tls_mentioned_p (rtx x
)
9183 switch (GET_CODE (x
))
9186 return tls_mentioned_p (XEXP (x
, 0));
9189 if (XINT (x
, 1) == UNSPEC_TLS
)
9197 /* Must not copy any rtx that uses a pc-relative address. */
9200 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
9202 if (GET_CODE (*x
) == UNSPEC
9203 && XINT (*x
, 1) == UNSPEC_PIC_BASE
)
9209 arm_cannot_copy_insn_p (rtx insn
)
9211 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
9217 enum rtx_code code
= GET_CODE (x
);
9234 /* Return 1 if memory locations are adjacent. */
9236 adjacent_mem_locations (rtx a
, rtx b
)
9238 /* We don't guarantee to preserve the order of these memory refs. */
9239 if (volatile_refs_p (a
) || volatile_refs_p (b
))
9242 if ((GET_CODE (XEXP (a
, 0)) == REG
9243 || (GET_CODE (XEXP (a
, 0)) == PLUS
9244 && GET_CODE (XEXP (XEXP (a
, 0), 1)) == CONST_INT
))
9245 && (GET_CODE (XEXP (b
, 0)) == REG
9246 || (GET_CODE (XEXP (b
, 0)) == PLUS
9247 && GET_CODE (XEXP (XEXP (b
, 0), 1)) == CONST_INT
)))
9249 HOST_WIDE_INT val0
= 0, val1
= 0;
9253 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
9255 reg0
= XEXP (XEXP (a
, 0), 0);
9256 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
9261 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
9263 reg1
= XEXP (XEXP (b
, 0), 0);
9264 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
9269 /* Don't accept any offset that will require multiple
9270 instructions to handle, since this would cause the
9271 arith_adjacentmem pattern to output an overlong sequence. */
9272 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
9275 /* Don't allow an eliminable register: register elimination can make
9276 the offset too large. */
9277 if (arm_eliminable_register (reg0
))
9280 val_diff
= val1
- val0
;
9284 /* If the target has load delay slots, then there's no benefit
9285 to using an ldm instruction unless the offset is zero and
9286 we are optimizing for size. */
9287 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
9288 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
9289 && (val_diff
== 4 || val_diff
== -4));
9292 return ((REGNO (reg0
) == REGNO (reg1
))
9293 && (val_diff
== 4 || val_diff
== -4));
9299 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9300 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9301 instruction. ADD_OFFSET is nonzero if the base address register needs
9302 to be modified with an add instruction before we can use it. */
9305 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
9306 int nops
, HOST_WIDE_INT add_offset
)
9308 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9309 if the offset isn't small enough. The reason 2 ldrs are faster
9310 is because these ARMs are able to do more than one cache access
9311 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9312 whilst the ARM8 has a double bandwidth cache. This means that
9313 these cores can do both an instruction fetch and a data fetch in
9314 a single cycle, so the trick of calculating the address into a
9315 scratch register (one of the result regs) and then doing a load
9316 multiple actually becomes slower (and no smaller in code size).
9317 That is the transformation
9319 ldr rd1, [rbase + offset]
9320 ldr rd2, [rbase + offset + 4]
9324 add rd1, rbase, offset
9325 ldmia rd1, {rd1, rd2}
9327 produces worse code -- '3 cycles + any stalls on rd2' instead of
9328 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9329 access per cycle, the first sequence could never complete in less
9330 than 6 cycles, whereas the ldm sequence would only take 5 and
9331 would make better use of sequential accesses if not hitting the
9334 We cheat here and test 'arm_ld_sched' which we currently know to
9335 only be true for the ARM8, ARM9 and StrongARM. If this ever
9336 changes, then the test below needs to be reworked. */
9337 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
9340 /* XScale has load-store double instructions, but they have stricter
9341 alignment requirements than load-store multiple, so we cannot
9344 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9345 the pipeline until completion.
9353 An ldr instruction takes 1-3 cycles, but does not block the
9362 Best case ldr will always win. However, the more ldr instructions
9363 we issue, the less likely we are to be able to schedule them well.
9364 Using ldr instructions also increases code size.
9366 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9367 for counts of 3 or 4 regs. */
9368 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
9373 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9374 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9375 an array ORDER which describes the sequence to use when accessing the
9376 offsets that produces an ascending order. In this sequence, each
9377 offset must be larger by exactly 4 than the previous one. ORDER[0]
9378 must have been filled in with the lowest offset by the caller.
9379 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9380 we use to verify that ORDER produces an ascending order of registers.
9381 Return true if it was possible to construct such an order, false if
9385 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
9389 for (i
= 1; i
< nops
; i
++)
9393 order
[i
] = order
[i
- 1];
9394 for (j
= 0; j
< nops
; j
++)
9395 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
9397 /* We must find exactly one offset that is higher than the
9398 previous one by 4. */
9399 if (order
[i
] != order
[i
- 1])
9403 if (order
[i
] == order
[i
- 1])
9405 /* The register numbers must be ascending. */
9406 if (unsorted_regs
!= NULL
9407 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
9413 /* Used to determine in a peephole whether a sequence of load
9414 instructions can be changed into a load-multiple instruction.
9415 NOPS is the number of separate load instructions we are examining. The
9416 first NOPS entries in OPERANDS are the destination registers, the
9417 next NOPS entries are memory operands. If this function is
9418 successful, *BASE is set to the common base register of the memory
9419 accesses; *LOAD_OFFSET is set to the first memory location's offset
9420 from that base register.
9421 REGS is an array filled in with the destination register numbers.
9422 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9423 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9424 the sequence of registers in REGS matches the loads from ascending memory
9425 locations, and the function verifies that the register numbers are
9426 themselves ascending. If CHECK_REGS is false, the register numbers
9427 are stored in the order they are found in the operands. */
9429 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
9430 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
9432 int unsorted_regs
[MAX_LDM_STM_OPS
];
9433 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
9434 int order
[MAX_LDM_STM_OPS
];
9435 rtx base_reg_rtx
= NULL
;
9439 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9440 easily extended if required. */
9441 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
9443 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
9445 /* Loop over the operands and check that the memory references are
9446 suitable (i.e. immediate offsets from the same base register). At
9447 the same time, extract the target register, and the memory
9449 for (i
= 0; i
< nops
; i
++)
9454 /* Convert a subreg of a mem into the mem itself. */
9455 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9456 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9458 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9460 /* Don't reorder volatile memory references; it doesn't seem worth
9461 looking for the case where the order is ok anyway. */
9462 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9465 offset
= const0_rtx
;
9467 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9468 || (GET_CODE (reg
) == SUBREG
9469 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9470 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9471 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9473 || (GET_CODE (reg
) == SUBREG
9474 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9475 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9480 base_reg
= REGNO (reg
);
9482 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
9485 else if (base_reg
!= (int) REGNO (reg
))
9486 /* Not addressed from the same base register. */
9489 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
9490 ? REGNO (operands
[i
])
9491 : REGNO (SUBREG_REG (operands
[i
])));
9493 /* If it isn't an integer register, or if it overwrites the
9494 base register but isn't the last insn in the list, then
9495 we can't do this. */
9496 if (unsorted_regs
[i
] < 0
9497 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
9498 || unsorted_regs
[i
] > 14
9499 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
9502 unsorted_offsets
[i
] = INTVAL (offset
);
9503 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
9507 /* Not a suitable memory address. */
9511 /* All the useful information has now been extracted from the
9512 operands into unsorted_regs and unsorted_offsets; additionally,
9513 order[0] has been set to the lowest offset in the list. Sort
9514 the offsets into order, verifying that they are adjacent, and
9515 check that the register numbers are ascending. */
9516 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
9517 check_regs
? unsorted_regs
: NULL
))
9521 memcpy (saved_order
, order
, sizeof order
);
9527 for (i
= 0; i
< nops
; i
++)
9528 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
9530 *load_offset
= unsorted_offsets
[order
[0]];
9534 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
9537 if (unsorted_offsets
[order
[0]] == 0)
9538 ldm_case
= 1; /* ldmia */
9539 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
9540 ldm_case
= 2; /* ldmib */
9541 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
9542 ldm_case
= 3; /* ldmda */
9543 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
9544 ldm_case
= 4; /* ldmdb */
9545 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
9546 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
9551 if (!multiple_operation_profitable_p (false, nops
,
9553 ? unsorted_offsets
[order
[0]] : 0))
9559 /* Used to determine in a peephole whether a sequence of store instructions can
9560 be changed into a store-multiple instruction.
9561 NOPS is the number of separate store instructions we are examining.
9562 NOPS_TOTAL is the total number of instructions recognized by the peephole
9564 The first NOPS entries in OPERANDS are the source registers, the next
9565 NOPS entries are memory operands. If this function is successful, *BASE is
9566 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9567 to the first memory location's offset from that base register. REGS is an
9568 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9569 likewise filled with the corresponding rtx's.
9570 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9571 numbers to to an ascending order of stores.
9572 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9573 from ascending memory locations, and the function verifies that the register
9574 numbers are themselves ascending. If CHECK_REGS is false, the register
9575 numbers are stored in the order they are found in the operands. */
9577 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
9578 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
9579 HOST_WIDE_INT
*load_offset
, bool check_regs
)
9581 int unsorted_regs
[MAX_LDM_STM_OPS
];
9582 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
9583 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
9584 int order
[MAX_LDM_STM_OPS
];
9586 rtx base_reg_rtx
= NULL
;
9589 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9590 easily extended if required. */
9591 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
9593 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
9595 /* Loop over the operands and check that the memory references are
9596 suitable (i.e. immediate offsets from the same base register). At
9597 the same time, extract the target register, and the memory
9599 for (i
= 0; i
< nops
; i
++)
9604 /* Convert a subreg of a mem into the mem itself. */
9605 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9606 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9608 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9610 /* Don't reorder volatile memory references; it doesn't seem worth
9611 looking for the case where the order is ok anyway. */
9612 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9615 offset
= const0_rtx
;
9617 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9618 || (GET_CODE (reg
) == SUBREG
9619 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9620 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9621 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9623 || (GET_CODE (reg
) == SUBREG
9624 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9625 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9628 unsorted_reg_rtxs
[i
] = (GET_CODE (operands
[i
]) == REG
9629 ? operands
[i
] : SUBREG_REG (operands
[i
]));
9630 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
9634 base_reg
= REGNO (reg
);
9636 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
9639 else if (base_reg
!= (int) REGNO (reg
))
9640 /* Not addressed from the same base register. */
9643 /* If it isn't an integer register, then we can't do this. */
9644 if (unsorted_regs
[i
] < 0
9645 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
9646 || (TARGET_THUMB2
&& unsorted_regs
[i
] == base_reg
)
9647 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
9648 || unsorted_regs
[i
] > 14)
9651 unsorted_offsets
[i
] = INTVAL (offset
);
9652 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
9656 /* Not a suitable memory address. */
9660 /* All the useful information has now been extracted from the
9661 operands into unsorted_regs and unsorted_offsets; additionally,
9662 order[0] has been set to the lowest offset in the list. Sort
9663 the offsets into order, verifying that they are adjacent, and
9664 check that the register numbers are ascending. */
9665 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
9666 check_regs
? unsorted_regs
: NULL
))
9670 memcpy (saved_order
, order
, sizeof order
);
9676 for (i
= 0; i
< nops
; i
++)
9678 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
9680 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
9683 *load_offset
= unsorted_offsets
[order
[0]];
9687 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
9690 if (unsorted_offsets
[order
[0]] == 0)
9691 stm_case
= 1; /* stmia */
9692 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
9693 stm_case
= 2; /* stmib */
9694 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
9695 stm_case
= 3; /* stmda */
9696 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
9697 stm_case
= 4; /* stmdb */
9701 if (!multiple_operation_profitable_p (false, nops
, 0))
9707 /* Routines for use in generating RTL. */
9709 /* Generate a load-multiple instruction. COUNT is the number of loads in
9710 the instruction; REGS and MEMS are arrays containing the operands.
9711 BASEREG is the base register to be used in addressing the memory operands.
9712 WBACK_OFFSET is nonzero if the instruction should update the base
9716 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
9717 HOST_WIDE_INT wback_offset
)
9722 if (!multiple_operation_profitable_p (false, count
, 0))
9728 for (i
= 0; i
< count
; i
++)
9729 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
9731 if (wback_offset
!= 0)
9732 emit_move_insn (basereg
, plus_constant (basereg
, wback_offset
));
9740 result
= gen_rtx_PARALLEL (VOIDmode
,
9741 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
9742 if (wback_offset
!= 0)
9744 XVECEXP (result
, 0, 0)
9745 = gen_rtx_SET (VOIDmode
, basereg
,
9746 plus_constant (basereg
, wback_offset
));
9751 for (j
= 0; i
< count
; i
++, j
++)
9752 XVECEXP (result
, 0, i
)
9753 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
9758 /* Generate a store-multiple instruction. COUNT is the number of stores in
9759 the instruction; REGS and MEMS are arrays containing the operands.
9760 BASEREG is the base register to be used in addressing the memory operands.
9761 WBACK_OFFSET is nonzero if the instruction should update the base
9765 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
9766 HOST_WIDE_INT wback_offset
)
9771 if (GET_CODE (basereg
) == PLUS
)
9772 basereg
= XEXP (basereg
, 0);
9774 if (!multiple_operation_profitable_p (false, count
, 0))
9780 for (i
= 0; i
< count
; i
++)
9781 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
9783 if (wback_offset
!= 0)
9784 emit_move_insn (basereg
, plus_constant (basereg
, wback_offset
));
9792 result
= gen_rtx_PARALLEL (VOIDmode
,
9793 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
9794 if (wback_offset
!= 0)
9796 XVECEXP (result
, 0, 0)
9797 = gen_rtx_SET (VOIDmode
, basereg
,
9798 plus_constant (basereg
, wback_offset
));
9803 for (j
= 0; i
< count
; i
++, j
++)
9804 XVECEXP (result
, 0, i
)
9805 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
9810 /* Generate either a load-multiple or a store-multiple instruction. This
9811 function can be used in situations where we can start with a single MEM
9812 rtx and adjust its address upwards.
9813 COUNT is the number of operations in the instruction, not counting a
9814 possible update of the base register. REGS is an array containing the
9816 BASEREG is the base register to be used in addressing the memory operands,
9817 which are constructed from BASEMEM.
9818 WRITE_BACK specifies whether the generated instruction should include an
9819 update of the base register.
9820 OFFSETP is used to pass an offset to and from this function; this offset
9821 is not used when constructing the address (instead BASEMEM should have an
9822 appropriate offset in its address), it is used only for setting
9823 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9826 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
9827 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
9829 rtx mems
[MAX_LDM_STM_OPS
];
9830 HOST_WIDE_INT offset
= *offsetp
;
9833 gcc_assert (count
<= MAX_LDM_STM_OPS
);
9835 if (GET_CODE (basereg
) == PLUS
)
9836 basereg
= XEXP (basereg
, 0);
9838 for (i
= 0; i
< count
; i
++)
9840 rtx addr
= plus_constant (basereg
, i
* 4);
9841 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
9849 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
9850 write_back
? 4 * count
: 0);
9852 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
9853 write_back
? 4 * count
: 0);
9857 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
9858 rtx basemem
, HOST_WIDE_INT
*offsetp
)
9860 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
9865 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
9866 rtx basemem
, HOST_WIDE_INT
*offsetp
)
9868 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
9872 /* Called from a peephole2 expander to turn a sequence of loads into an
9873 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9874 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9875 is true if we can reorder the registers because they are used commutatively
9877 Returns true iff we could generate a new instruction. */
9880 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
9882 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
9883 rtx mems
[MAX_LDM_STM_OPS
];
9886 HOST_WIDE_INT offset
;
9887 int write_back
= FALSE
;
9891 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
9892 &base_reg
, &offset
, !sort_regs
);
9898 for (i
= 0; i
< nops
- 1; i
++)
9899 for (j
= i
+ 1; j
< nops
; j
++)
9900 if (regs
[i
] > regs
[j
])
9906 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
9910 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
9911 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
9917 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
9918 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
9923 base_reg_rtx
= newbase
;
9927 for (i
= 0; i
< nops
; i
++)
9929 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
9930 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
9933 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
9934 write_back
? offset
+ i
* 4 : 0));
9938 /* Called from a peephole2 expander to turn a sequence of stores into an
9939 STM instruction. OPERANDS are the operands found by the peephole matcher;
9940 NOPS indicates how many separate stores we are trying to combine.
9941 Returns true iff we could generate a new instruction. */
9944 gen_stm_seq (rtx
*operands
, int nops
)
9947 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
9948 rtx mems
[MAX_LDM_STM_OPS
];
9951 HOST_WIDE_INT offset
;
9952 int write_back
= FALSE
;
9957 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
9958 mem_order
, &base_reg
, &offset
, true);
9963 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
9965 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
9968 gcc_assert (base_reg_dies
);
9974 gcc_assert (base_reg_dies
);
9975 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
9979 addr
= plus_constant (base_reg_rtx
, offset
);
9981 for (i
= 0; i
< nops
; i
++)
9983 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
9984 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
9987 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
9988 write_back
? offset
+ i
* 4 : 0));
9992 /* Called from a peephole2 expander to turn a sequence of stores that are
9993 preceded by constant loads into an STM instruction. OPERANDS are the
9994 operands found by the peephole matcher; NOPS indicates how many
9995 separate stores we are trying to combine; there are 2 * NOPS
9996 instructions in the peephole.
9997 Returns true iff we could generate a new instruction. */
10000 gen_const_stm_seq (rtx
*operands
, int nops
)
10002 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
10003 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10004 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
10005 rtx mems
[MAX_LDM_STM_OPS
];
10008 HOST_WIDE_INT offset
;
10009 int write_back
= FALSE
;
10012 bool base_reg_dies
;
10014 HARD_REG_SET allocated
;
10016 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
10017 mem_order
, &base_reg
, &offset
, false);
10022 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
10024 /* If the same register is used more than once, try to find a free
10026 CLEAR_HARD_REG_SET (allocated
);
10027 for (i
= 0; i
< nops
; i
++)
10029 for (j
= i
+ 1; j
< nops
; j
++)
10030 if (regs
[i
] == regs
[j
])
10032 rtx t
= peep2_find_free_register (0, nops
* 2,
10033 TARGET_THUMB1
? "l" : "r",
10034 SImode
, &allocated
);
10038 regs
[i
] = REGNO (t
);
10042 /* Compute an ordering that maps the register numbers to an ascending
10045 for (i
= 0; i
< nops
; i
++)
10046 if (regs
[i
] < regs
[reg_order
[0]])
10049 for (i
= 1; i
< nops
; i
++)
10051 int this_order
= reg_order
[i
- 1];
10052 for (j
= 0; j
< nops
; j
++)
10053 if (regs
[j
] > regs
[reg_order
[i
- 1]]
10054 && (this_order
== reg_order
[i
- 1]
10055 || regs
[j
] < regs
[this_order
]))
10057 reg_order
[i
] = this_order
;
10060 /* Ensure that registers that must be live after the instruction end
10061 up with the correct value. */
10062 for (i
= 0; i
< nops
; i
++)
10064 int this_order
= reg_order
[i
];
10065 if ((this_order
!= mem_order
[i
]
10066 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
10067 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
10071 /* Load the constants. */
10072 for (i
= 0; i
< nops
; i
++)
10074 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
10075 sorted_regs
[i
] = regs
[reg_order
[i
]];
10076 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
10079 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10081 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
10084 gcc_assert (base_reg_dies
);
10090 gcc_assert (base_reg_dies
);
10091 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10095 addr
= plus_constant (base_reg_rtx
, offset
);
10097 for (i
= 0; i
< nops
; i
++)
10099 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10100 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10103 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
10104 write_back
? offset
+ i
* 4 : 0));
10109 arm_gen_movmemqi (rtx
*operands
)
10111 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
10112 HOST_WIDE_INT srcoffset
, dstoffset
;
10114 rtx src
, dst
, srcbase
, dstbase
;
10115 rtx part_bytes_reg
= NULL
;
10118 if (GET_CODE (operands
[2]) != CONST_INT
10119 || GET_CODE (operands
[3]) != CONST_INT
10120 || INTVAL (operands
[2]) > 64
10121 || INTVAL (operands
[3]) & 3)
10124 dstbase
= operands
[0];
10125 srcbase
= operands
[1];
10127 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
10128 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
10130 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
10131 out_words_to_go
= INTVAL (operands
[2]) / 4;
10132 last_bytes
= INTVAL (operands
[2]) & 3;
10133 dstoffset
= srcoffset
= 0;
10135 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
10136 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
10138 for (i
= 0; in_words_to_go
>= 2; i
+=4)
10140 if (in_words_to_go
> 4)
10141 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
10142 TRUE
, srcbase
, &srcoffset
));
10144 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
10145 src
, FALSE
, srcbase
,
10148 if (out_words_to_go
)
10150 if (out_words_to_go
> 4)
10151 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
10152 TRUE
, dstbase
, &dstoffset
));
10153 else if (out_words_to_go
!= 1)
10154 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
10155 out_words_to_go
, dst
,
10158 dstbase
, &dstoffset
));
10161 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
10162 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
10163 if (last_bytes
!= 0)
10165 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
10171 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
10172 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
10175 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10176 if (out_words_to_go
)
10180 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
10181 sreg
= copy_to_reg (mem
);
10183 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
10184 emit_move_insn (mem
, sreg
);
10187 gcc_assert (!in_words_to_go
); /* Sanity check */
10190 if (in_words_to_go
)
10192 gcc_assert (in_words_to_go
> 0);
10194 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
10195 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
10198 gcc_assert (!last_bytes
|| part_bytes_reg
);
10200 if (BYTES_BIG_ENDIAN
&& last_bytes
)
10202 rtx tmp
= gen_reg_rtx (SImode
);
10204 /* The bytes we want are in the top end of the word. */
10205 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
10206 GEN_INT (8 * (4 - last_bytes
))));
10207 part_bytes_reg
= tmp
;
10211 mem
= adjust_automodify_address (dstbase
, QImode
,
10212 plus_constant (dst
, last_bytes
- 1),
10213 dstoffset
+ last_bytes
- 1);
10214 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
10218 tmp
= gen_reg_rtx (SImode
);
10219 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
10220 part_bytes_reg
= tmp
;
10227 if (last_bytes
> 1)
10229 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
10230 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
10234 rtx tmp
= gen_reg_rtx (SImode
);
10235 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
10236 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
10237 part_bytes_reg
= tmp
;
10244 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
10245 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
10252 /* Select a dominance comparison mode if possible for a test of the general
10253 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10254 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10255 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10256 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10257 In all cases OP will be either EQ or NE, but we don't need to know which
10258 here. If we are unable to support a dominance comparison we return
10259 CC mode. This will then fail to match for the RTL expressions that
10260 generate this call. */
10262 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
10264 enum rtx_code cond1
, cond2
;
10267 /* Currently we will probably get the wrong result if the individual
10268 comparisons are not simple. This also ensures that it is safe to
10269 reverse a comparison if necessary. */
10270 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
10272 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
10276 /* The if_then_else variant of this tests the second condition if the
10277 first passes, but is true if the first fails. Reverse the first
10278 condition to get a true "inclusive-or" expression. */
10279 if (cond_or
== DOM_CC_NX_OR_Y
)
10280 cond1
= reverse_condition (cond1
);
10282 /* If the comparisons are not equal, and one doesn't dominate the other,
10283 then we can't do this. */
10285 && !comparison_dominates_p (cond1
, cond2
)
10286 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
10291 enum rtx_code temp
= cond1
;
10299 if (cond_or
== DOM_CC_X_AND_Y
)
10304 case EQ
: return CC_DEQmode
;
10305 case LE
: return CC_DLEmode
;
10306 case LEU
: return CC_DLEUmode
;
10307 case GE
: return CC_DGEmode
;
10308 case GEU
: return CC_DGEUmode
;
10309 default: gcc_unreachable ();
10313 if (cond_or
== DOM_CC_X_AND_Y
)
10325 gcc_unreachable ();
10329 if (cond_or
== DOM_CC_X_AND_Y
)
10341 gcc_unreachable ();
10345 if (cond_or
== DOM_CC_X_AND_Y
)
10346 return CC_DLTUmode
;
10351 return CC_DLTUmode
;
10353 return CC_DLEUmode
;
10357 gcc_unreachable ();
10361 if (cond_or
== DOM_CC_X_AND_Y
)
10362 return CC_DGTUmode
;
10367 return CC_DGTUmode
;
10369 return CC_DGEUmode
;
10373 gcc_unreachable ();
10376 /* The remaining cases only occur when both comparisons are the
10379 gcc_assert (cond1
== cond2
);
10383 gcc_assert (cond1
== cond2
);
10387 gcc_assert (cond1
== cond2
);
10391 gcc_assert (cond1
== cond2
);
10392 return CC_DLEUmode
;
10395 gcc_assert (cond1
== cond2
);
10396 return CC_DGEUmode
;
10399 gcc_unreachable ();
10404 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
10406 /* All floating point compares return CCFP if it is an equality
10407 comparison, and CCFPE otherwise. */
10408 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
10428 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10433 gcc_unreachable ();
10437 /* A compare with a shifted operand. Because of canonicalization, the
10438 comparison will have to be swapped when we emit the assembler. */
10439 if (GET_MODE (y
) == SImode
10440 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
10441 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
10442 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
10443 || GET_CODE (x
) == ROTATERT
))
10446 /* This operation is performed swapped, but since we only rely on the Z
10447 flag we don't need an additional mode. */
10448 if (GET_MODE (y
) == SImode
10449 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
10450 && GET_CODE (x
) == NEG
10451 && (op
== EQ
|| op
== NE
))
10454 /* This is a special case that is used by combine to allow a
10455 comparison of a shifted byte load to be split into a zero-extend
10456 followed by a comparison of the shifted integer (only valid for
10457 equalities and unsigned inequalities). */
10458 if (GET_MODE (x
) == SImode
10459 && GET_CODE (x
) == ASHIFT
10460 && GET_CODE (XEXP (x
, 1)) == CONST_INT
&& INTVAL (XEXP (x
, 1)) == 24
10461 && GET_CODE (XEXP (x
, 0)) == SUBREG
10462 && GET_CODE (SUBREG_REG (XEXP (x
, 0))) == MEM
10463 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
10464 && (op
== EQ
|| op
== NE
10465 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
10466 && GET_CODE (y
) == CONST_INT
)
10469 /* A construct for a conditional compare, if the false arm contains
10470 0, then both conditions must be true, otherwise either condition
10471 must be true. Not all conditions are possible, so CCmode is
10472 returned if it can't be done. */
10473 if (GET_CODE (x
) == IF_THEN_ELSE
10474 && (XEXP (x
, 2) == const0_rtx
10475 || XEXP (x
, 2) == const1_rtx
)
10476 && COMPARISON_P (XEXP (x
, 0))
10477 && COMPARISON_P (XEXP (x
, 1)))
10478 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10479 INTVAL (XEXP (x
, 2)));
10481 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10482 if (GET_CODE (x
) == AND
10483 && COMPARISON_P (XEXP (x
, 0))
10484 && COMPARISON_P (XEXP (x
, 1)))
10485 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10488 if (GET_CODE (x
) == IOR
10489 && COMPARISON_P (XEXP (x
, 0))
10490 && COMPARISON_P (XEXP (x
, 1)))
10491 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10494 /* An operation (on Thumb) where we want to test for a single bit.
10495 This is done by shifting that bit up into the top bit of a
10496 scratch register; we can then branch on the sign bit. */
10498 && GET_MODE (x
) == SImode
10499 && (op
== EQ
|| op
== NE
)
10500 && GET_CODE (x
) == ZERO_EXTRACT
10501 && XEXP (x
, 1) == const1_rtx
)
10504 /* An operation that sets the condition codes as a side-effect, the
10505 V flag is not set correctly, so we can only use comparisons where
10506 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10508 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10509 if (GET_MODE (x
) == SImode
10511 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
10512 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
10513 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
10514 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
10515 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
10516 || GET_CODE (x
) == LSHIFTRT
10517 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
10518 || GET_CODE (x
) == ROTATERT
10519 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
10520 return CC_NOOVmode
;
10522 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
10525 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
10526 && GET_CODE (x
) == PLUS
10527 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
10530 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
10532 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10534 if (TARGET_ARM
&& TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10541 /* A DImode comparison against zero can be implemented by
10542 or'ing the two halves together. */
10543 if (y
== const0_rtx
)
10546 /* We can do an equality test in three Thumb instructions. */
10556 /* DImode unsigned comparisons can be implemented by cmp +
10557 cmpeq without a scratch register. Not worth doing in
10568 /* DImode signed and unsigned comparisons can be implemented
10569 by cmp + sbcs with a scratch register, but that does not
10570 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10571 gcc_assert (op
!= EQ
&& op
!= NE
);
10575 gcc_unreachable ();
10582 /* X and Y are two things to compare using CODE. Emit the compare insn and
10583 return the rtx for register 0 in the proper mode. FP means this is a
10584 floating point compare: I don't think that it is needed on the arm. */
10586 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
)
10588 enum machine_mode mode
;
10590 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
10592 /* We might have X as a constant, Y as a register because of the predicates
10593 used for cmpdi. If so, force X to a register here. */
10594 if (dimode_comparison
&& !REG_P (x
))
10595 x
= force_reg (DImode
, x
);
10597 mode
= SELECT_CC_MODE (code
, x
, y
);
10598 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
10600 if (dimode_comparison
10601 && !(TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10602 && mode
!= CC_CZmode
)
10606 /* To compare two non-zero values for equality, XOR them and
10607 then compare against zero. Not used for ARM mode; there
10608 CC_CZmode is cheaper. */
10609 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
10611 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
10614 /* A scratch register is required. */
10615 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (SImode
));
10616 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
10617 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
10620 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
10625 /* Generate a sequence of insns that will generate the correct return
10626 address mask depending on the physical architecture that the program
10629 arm_gen_return_addr_mask (void)
10631 rtx reg
= gen_reg_rtx (Pmode
);
10633 emit_insn (gen_return_addr_mask (reg
));
10638 arm_reload_in_hi (rtx
*operands
)
10640 rtx ref
= operands
[1];
10642 HOST_WIDE_INT offset
= 0;
10644 if (GET_CODE (ref
) == SUBREG
)
10646 offset
= SUBREG_BYTE (ref
);
10647 ref
= SUBREG_REG (ref
);
10650 if (GET_CODE (ref
) == REG
)
10652 /* We have a pseudo which has been spilt onto the stack; there
10653 are two cases here: the first where there is a simple
10654 stack-slot replacement and a second where the stack-slot is
10655 out of range, or is used as a subreg. */
10656 if (reg_equiv_mem
[REGNO (ref
)])
10658 ref
= reg_equiv_mem
[REGNO (ref
)];
10659 base
= find_replacement (&XEXP (ref
, 0));
10662 /* The slot is out of range, or was dressed up in a SUBREG. */
10663 base
= reg_equiv_address
[REGNO (ref
)];
10666 base
= find_replacement (&XEXP (ref
, 0));
10668 /* Handle the case where the address is too complex to be offset by 1. */
10669 if (GET_CODE (base
) == MINUS
10670 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10672 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10674 emit_set_insn (base_plus
, base
);
10677 else if (GET_CODE (base
) == PLUS
)
10679 /* The addend must be CONST_INT, or we would have dealt with it above. */
10680 HOST_WIDE_INT hi
, lo
;
10682 offset
+= INTVAL (XEXP (base
, 1));
10683 base
= XEXP (base
, 0);
10685 /* Rework the address into a legal sequence of insns. */
10686 /* Valid range for lo is -4095 -> 4095 */
10689 : -((-offset
) & 0xfff));
10691 /* Corner case, if lo is the max offset then we would be out of range
10692 once we have added the additional 1 below, so bump the msb into the
10693 pre-loading insn(s). */
10697 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
10698 ^ (HOST_WIDE_INT
) 0x80000000)
10699 - (HOST_WIDE_INT
) 0x80000000);
10701 gcc_assert (hi
+ lo
== offset
);
10705 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10707 /* Get the base address; addsi3 knows how to handle constants
10708 that require more than one insn. */
10709 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
10715 /* Operands[2] may overlap operands[0] (though it won't overlap
10716 operands[1]), that's why we asked for a DImode reg -- so we can
10717 use the bit that does not overlap. */
10718 if (REGNO (operands
[2]) == REGNO (operands
[0]))
10719 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10721 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10723 emit_insn (gen_zero_extendqisi2 (scratch
,
10724 gen_rtx_MEM (QImode
,
10725 plus_constant (base
,
10727 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10728 gen_rtx_MEM (QImode
,
10729 plus_constant (base
,
10731 if (!BYTES_BIG_ENDIAN
)
10732 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10733 gen_rtx_IOR (SImode
,
10736 gen_rtx_SUBREG (SImode
, operands
[0], 0),
10740 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10741 gen_rtx_IOR (SImode
,
10742 gen_rtx_ASHIFT (SImode
, scratch
,
10744 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
10747 /* Handle storing a half-word to memory during reload by synthesizing as two
10748 byte stores. Take care not to clobber the input values until after we
10749 have moved them somewhere safe. This code assumes that if the DImode
10750 scratch in operands[2] overlaps either the input value or output address
10751 in some way, then that value must die in this insn (we absolutely need
10752 two scratch registers for some corner cases). */
10754 arm_reload_out_hi (rtx
*operands
)
10756 rtx ref
= operands
[0];
10757 rtx outval
= operands
[1];
10759 HOST_WIDE_INT offset
= 0;
10761 if (GET_CODE (ref
) == SUBREG
)
10763 offset
= SUBREG_BYTE (ref
);
10764 ref
= SUBREG_REG (ref
);
10767 if (GET_CODE (ref
) == REG
)
10769 /* We have a pseudo which has been spilt onto the stack; there
10770 are two cases here: the first where there is a simple
10771 stack-slot replacement and a second where the stack-slot is
10772 out of range, or is used as a subreg. */
10773 if (reg_equiv_mem
[REGNO (ref
)])
10775 ref
= reg_equiv_mem
[REGNO (ref
)];
10776 base
= find_replacement (&XEXP (ref
, 0));
10779 /* The slot is out of range, or was dressed up in a SUBREG. */
10780 base
= reg_equiv_address
[REGNO (ref
)];
10783 base
= find_replacement (&XEXP (ref
, 0));
10785 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10787 /* Handle the case where the address is too complex to be offset by 1. */
10788 if (GET_CODE (base
) == MINUS
10789 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10791 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10793 /* Be careful not to destroy OUTVAL. */
10794 if (reg_overlap_mentioned_p (base_plus
, outval
))
10796 /* Updating base_plus might destroy outval, see if we can
10797 swap the scratch and base_plus. */
10798 if (!reg_overlap_mentioned_p (scratch
, outval
))
10801 scratch
= base_plus
;
10806 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
10808 /* Be conservative and copy OUTVAL into the scratch now,
10809 this should only be necessary if outval is a subreg
10810 of something larger than a word. */
10811 /* XXX Might this clobber base? I can't see how it can,
10812 since scratch is known to overlap with OUTVAL, and
10813 must be wider than a word. */
10814 emit_insn (gen_movhi (scratch_hi
, outval
));
10815 outval
= scratch_hi
;
10819 emit_set_insn (base_plus
, base
);
10822 else if (GET_CODE (base
) == PLUS
)
10824 /* The addend must be CONST_INT, or we would have dealt with it above. */
10825 HOST_WIDE_INT hi
, lo
;
10827 offset
+= INTVAL (XEXP (base
, 1));
10828 base
= XEXP (base
, 0);
10830 /* Rework the address into a legal sequence of insns. */
10831 /* Valid range for lo is -4095 -> 4095 */
10834 : -((-offset
) & 0xfff));
10836 /* Corner case, if lo is the max offset then we would be out of range
10837 once we have added the additional 1 below, so bump the msb into the
10838 pre-loading insn(s). */
10842 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
10843 ^ (HOST_WIDE_INT
) 0x80000000)
10844 - (HOST_WIDE_INT
) 0x80000000);
10846 gcc_assert (hi
+ lo
== offset
);
10850 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10852 /* Be careful not to destroy OUTVAL. */
10853 if (reg_overlap_mentioned_p (base_plus
, outval
))
10855 /* Updating base_plus might destroy outval, see if we
10856 can swap the scratch and base_plus. */
10857 if (!reg_overlap_mentioned_p (scratch
, outval
))
10860 scratch
= base_plus
;
10865 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
10867 /* Be conservative and copy outval into scratch now,
10868 this should only be necessary if outval is a
10869 subreg of something larger than a word. */
10870 /* XXX Might this clobber base? I can't see how it
10871 can, since scratch is known to overlap with
10873 emit_insn (gen_movhi (scratch_hi
, outval
));
10874 outval
= scratch_hi
;
10878 /* Get the base address; addsi3 knows how to handle constants
10879 that require more than one insn. */
10880 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
10886 if (BYTES_BIG_ENDIAN
)
10888 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
10889 plus_constant (base
, offset
+ 1)),
10890 gen_lowpart (QImode
, outval
)));
10891 emit_insn (gen_lshrsi3 (scratch
,
10892 gen_rtx_SUBREG (SImode
, outval
, 0),
10894 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
10895 gen_lowpart (QImode
, scratch
)));
10899 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
10900 gen_lowpart (QImode
, outval
)));
10901 emit_insn (gen_lshrsi3 (scratch
,
10902 gen_rtx_SUBREG (SImode
, outval
, 0),
10904 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
10905 plus_constant (base
, offset
+ 1)),
10906 gen_lowpart (QImode
, scratch
)));
10910 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10911 (padded to the size of a word) should be passed in a register. */
10914 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
10916 if (TARGET_AAPCS_BASED
)
10917 return must_pass_in_stack_var_size (mode
, type
);
10919 return must_pass_in_stack_var_size_or_pad (mode
, type
);
10923 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10924 Return true if an argument passed on the stack should be padded upwards,
10925 i.e. if the least-significant byte has useful data.
10926 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10927 aggregate types are placed in the lowest memory address. */
10930 arm_pad_arg_upward (enum machine_mode mode
, const_tree type
)
10932 if (!TARGET_AAPCS_BASED
)
10933 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
10935 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
10942 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10943 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10944 byte of the register has useful data, and return the opposite if the
10945 most significant byte does.
10946 For AAPCS, small aggregates and small complex types are always padded
10950 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
,
10951 tree type
, int first ATTRIBUTE_UNUSED
)
10953 if (TARGET_AAPCS_BASED
10954 && BYTES_BIG_ENDIAN
10955 && (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
)
10956 && int_size_in_bytes (type
) <= 4)
10959 /* Otherwise, use default padding. */
10960 return !BYTES_BIG_ENDIAN
;
10964 /* Print a symbolic form of X to the debug file, F. */
10966 arm_print_value (FILE *f
, rtx x
)
10968 switch (GET_CODE (x
))
10971 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
10975 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
10983 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
10985 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
10986 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
10994 fprintf (f
, "\"%s\"", XSTR (x
, 0));
10998 fprintf (f
, "`%s'", XSTR (x
, 0));
11002 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
11006 arm_print_value (f
, XEXP (x
, 0));
11010 arm_print_value (f
, XEXP (x
, 0));
11012 arm_print_value (f
, XEXP (x
, 1));
11020 fprintf (f
, "????");
11025 /* Routines for manipulation of the constant pool. */
11027 /* Arm instructions cannot load a large constant directly into a
11028 register; they have to come from a pc relative load. The constant
11029 must therefore be placed in the addressable range of the pc
11030 relative load. Depending on the precise pc relative load
11031 instruction the range is somewhere between 256 bytes and 4k. This
11032 means that we often have to dump a constant inside a function, and
11033 generate code to branch around it.
11035 It is important to minimize this, since the branches will slow
11036 things down and make the code larger.
11038 Normally we can hide the table after an existing unconditional
11039 branch so that there is no interruption of the flow, but in the
11040 worst case the code looks like this:
11058 We fix this by performing a scan after scheduling, which notices
11059 which instructions need to have their operands fetched from the
11060 constant table and builds the table.
11062 The algorithm starts by building a table of all the constants that
11063 need fixing up and all the natural barriers in the function (places
11064 where a constant table can be dropped without breaking the flow).
11065 For each fixup we note how far the pc-relative replacement will be
11066 able to reach and the offset of the instruction into the function.
11068 Having built the table we then group the fixes together to form
11069 tables that are as large as possible (subject to addressing
11070 constraints) and emit each table of constants after the last
11071 barrier that is within range of all the instructions in the group.
11072 If a group does not contain a barrier, then we forcibly create one
11073 by inserting a jump instruction into the flow. Once the table has
11074 been inserted, the insns are then modified to reference the
11075 relevant entry in the pool.
11077 Possible enhancements to the algorithm (not implemented) are:
11079 1) For some processors and object formats, there may be benefit in
11080 aligning the pools to the start of cache lines; this alignment
11081 would need to be taken into account when calculating addressability
11084 /* These typedefs are located at the start of this file, so that
11085 they can be used in the prototypes there. This comment is to
11086 remind readers of that fact so that the following structures
11087 can be understood more easily.
11089 typedef struct minipool_node Mnode;
11090 typedef struct minipool_fixup Mfix; */
11092 struct minipool_node
11094 /* Doubly linked chain of entries. */
11097 /* The maximum offset into the code that this entry can be placed. While
11098 pushing fixes for forward references, all entries are sorted in order
11099 of increasing max_address. */
11100 HOST_WIDE_INT max_address
;
11101 /* Similarly for an entry inserted for a backwards ref. */
11102 HOST_WIDE_INT min_address
;
11103 /* The number of fixes referencing this entry. This can become zero
11104 if we "unpush" an entry. In this case we ignore the entry when we
11105 come to emit the code. */
11107 /* The offset from the start of the minipool. */
11108 HOST_WIDE_INT offset
;
11109 /* The value in table. */
11111 /* The mode of value. */
11112 enum machine_mode mode
;
11113 /* The size of the value. With iWMMXt enabled
11114 sizes > 4 also imply an alignment of 8-bytes. */
11118 struct minipool_fixup
11122 HOST_WIDE_INT address
;
11124 enum machine_mode mode
;
11128 HOST_WIDE_INT forwards
;
11129 HOST_WIDE_INT backwards
;
11132 /* Fixes less than a word need padding out to a word boundary. */
11133 #define MINIPOOL_FIX_SIZE(mode) \
11134 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11136 static Mnode
* minipool_vector_head
;
11137 static Mnode
* minipool_vector_tail
;
11138 static rtx minipool_vector_label
;
11139 static int minipool_pad
;
11141 /* The linked list of all minipool fixes required for this function. */
11142 Mfix
* minipool_fix_head
;
11143 Mfix
* minipool_fix_tail
;
11144 /* The fix entry for the current minipool, once it has been placed. */
11145 Mfix
* minipool_barrier
;
11147 /* Determines if INSN is the start of a jump table. Returns the end
11148 of the TABLE or NULL_RTX. */
11150 is_jump_table (rtx insn
)
11154 if (GET_CODE (insn
) == JUMP_INSN
11155 && JUMP_LABEL (insn
) != NULL
11156 && ((table
= next_real_insn (JUMP_LABEL (insn
)))
11157 == next_real_insn (insn
))
11159 && GET_CODE (table
) == JUMP_INSN
11160 && (GET_CODE (PATTERN (table
)) == ADDR_VEC
11161 || GET_CODE (PATTERN (table
)) == ADDR_DIFF_VEC
))
11167 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11168 #define JUMP_TABLES_IN_TEXT_SECTION 0
11171 static HOST_WIDE_INT
11172 get_jump_table_size (rtx insn
)
11174 /* ADDR_VECs only take room if read-only data does into the text
11176 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
11178 rtx body
= PATTERN (insn
);
11179 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
11180 HOST_WIDE_INT size
;
11181 HOST_WIDE_INT modesize
;
11183 modesize
= GET_MODE_SIZE (GET_MODE (body
));
11184 size
= modesize
* XVECLEN (body
, elt
);
11188 /* Round up size of TBB table to a halfword boundary. */
11189 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
11192 /* No padding necessary for TBH. */
11195 /* Add two bytes for alignment on Thumb. */
11200 gcc_unreachable ();
11208 /* Move a minipool fix MP from its current location to before MAX_MP.
11209 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11210 constraints may need updating. */
11212 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
11213 HOST_WIDE_INT max_address
)
11215 /* The code below assumes these are different. */
11216 gcc_assert (mp
!= max_mp
);
11218 if (max_mp
== NULL
)
11220 if (max_address
< mp
->max_address
)
11221 mp
->max_address
= max_address
;
11225 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
11226 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
11228 mp
->max_address
= max_address
;
11230 /* Unlink MP from its current position. Since max_mp is non-null,
11231 mp->prev must be non-null. */
11232 mp
->prev
->next
= mp
->next
;
11233 if (mp
->next
!= NULL
)
11234 mp
->next
->prev
= mp
->prev
;
11236 minipool_vector_tail
= mp
->prev
;
11238 /* Re-insert it before MAX_MP. */
11240 mp
->prev
= max_mp
->prev
;
11243 if (mp
->prev
!= NULL
)
11244 mp
->prev
->next
= mp
;
11246 minipool_vector_head
= mp
;
11249 /* Save the new entry. */
11252 /* Scan over the preceding entries and adjust their addresses as
11254 while (mp
->prev
!= NULL
11255 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
11257 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
11264 /* Add a constant to the minipool for a forward reference. Returns the
11265 node added or NULL if the constant will not fit in this pool. */
11267 add_minipool_forward_ref (Mfix
*fix
)
11269 /* If set, max_mp is the first pool_entry that has a lower
11270 constraint than the one we are trying to add. */
11271 Mnode
* max_mp
= NULL
;
11272 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
11275 /* If the minipool starts before the end of FIX->INSN then this FIX
11276 can not be placed into the current pool. Furthermore, adding the
11277 new constant pool entry may cause the pool to start FIX_SIZE bytes
11279 if (minipool_vector_head
&&
11280 (fix
->address
+ get_attr_length (fix
->insn
)
11281 >= minipool_vector_head
->max_address
- fix
->fix_size
))
11284 /* Scan the pool to see if a constant with the same value has
11285 already been added. While we are doing this, also note the
11286 location where we must insert the constant if it doesn't already
11288 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11290 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
11291 && fix
->mode
== mp
->mode
11292 && (GET_CODE (fix
->value
) != CODE_LABEL
11293 || (CODE_LABEL_NUMBER (fix
->value
)
11294 == CODE_LABEL_NUMBER (mp
->value
)))
11295 && rtx_equal_p (fix
->value
, mp
->value
))
11297 /* More than one fix references this entry. */
11299 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
11302 /* Note the insertion point if necessary. */
11304 && mp
->max_address
> max_address
)
11307 /* If we are inserting an 8-bytes aligned quantity and
11308 we have not already found an insertion point, then
11309 make sure that all such 8-byte aligned quantities are
11310 placed at the start of the pool. */
11311 if (ARM_DOUBLEWORD_ALIGN
11313 && fix
->fix_size
>= 8
11314 && mp
->fix_size
< 8)
11317 max_address
= mp
->max_address
;
11321 /* The value is not currently in the minipool, so we need to create
11322 a new entry for it. If MAX_MP is NULL, the entry will be put on
11323 the end of the list since the placement is less constrained than
11324 any existing entry. Otherwise, we insert the new fix before
11325 MAX_MP and, if necessary, adjust the constraints on the other
11328 mp
->fix_size
= fix
->fix_size
;
11329 mp
->mode
= fix
->mode
;
11330 mp
->value
= fix
->value
;
11332 /* Not yet required for a backwards ref. */
11333 mp
->min_address
= -65536;
11335 if (max_mp
== NULL
)
11337 mp
->max_address
= max_address
;
11339 mp
->prev
= minipool_vector_tail
;
11341 if (mp
->prev
== NULL
)
11343 minipool_vector_head
= mp
;
11344 minipool_vector_label
= gen_label_rtx ();
11347 mp
->prev
->next
= mp
;
11349 minipool_vector_tail
= mp
;
11353 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
11354 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
11356 mp
->max_address
= max_address
;
11359 mp
->prev
= max_mp
->prev
;
11361 if (mp
->prev
!= NULL
)
11362 mp
->prev
->next
= mp
;
11364 minipool_vector_head
= mp
;
11367 /* Save the new entry. */
11370 /* Scan over the preceding entries and adjust their addresses as
11372 while (mp
->prev
!= NULL
11373 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
11375 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
11383 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
11384 HOST_WIDE_INT min_address
)
11386 HOST_WIDE_INT offset
;
11388 /* The code below assumes these are different. */
11389 gcc_assert (mp
!= min_mp
);
11391 if (min_mp
== NULL
)
11393 if (min_address
> mp
->min_address
)
11394 mp
->min_address
= min_address
;
11398 /* We will adjust this below if it is too loose. */
11399 mp
->min_address
= min_address
;
11401 /* Unlink MP from its current position. Since min_mp is non-null,
11402 mp->next must be non-null. */
11403 mp
->next
->prev
= mp
->prev
;
11404 if (mp
->prev
!= NULL
)
11405 mp
->prev
->next
= mp
->next
;
11407 minipool_vector_head
= mp
->next
;
11409 /* Reinsert it after MIN_MP. */
11411 mp
->next
= min_mp
->next
;
11413 if (mp
->next
!= NULL
)
11414 mp
->next
->prev
= mp
;
11416 minipool_vector_tail
= mp
;
11422 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11424 mp
->offset
= offset
;
11425 if (mp
->refcount
> 0)
11426 offset
+= mp
->fix_size
;
11428 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
11429 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
11435 /* Add a constant to the minipool for a backward reference. Returns the
11436 node added or NULL if the constant will not fit in this pool.
11438 Note that the code for insertion for a backwards reference can be
11439 somewhat confusing because the calculated offsets for each fix do
11440 not take into account the size of the pool (which is still under
11443 add_minipool_backward_ref (Mfix
*fix
)
11445 /* If set, min_mp is the last pool_entry that has a lower constraint
11446 than the one we are trying to add. */
11447 Mnode
*min_mp
= NULL
;
11448 /* This can be negative, since it is only a constraint. */
11449 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
11452 /* If we can't reach the current pool from this insn, or if we can't
11453 insert this entry at the end of the pool without pushing other
11454 fixes out of range, then we don't try. This ensures that we
11455 can't fail later on. */
11456 if (min_address
>= minipool_barrier
->address
11457 || (minipool_vector_tail
->min_address
+ fix
->fix_size
11458 >= minipool_barrier
->address
))
11461 /* Scan the pool to see if a constant with the same value has
11462 already been added. While we are doing this, also note the
11463 location where we must insert the constant if it doesn't already
11465 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
11467 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
11468 && fix
->mode
== mp
->mode
11469 && (GET_CODE (fix
->value
) != CODE_LABEL
11470 || (CODE_LABEL_NUMBER (fix
->value
)
11471 == CODE_LABEL_NUMBER (mp
->value
)))
11472 && rtx_equal_p (fix
->value
, mp
->value
)
11473 /* Check that there is enough slack to move this entry to the
11474 end of the table (this is conservative). */
11475 && (mp
->max_address
11476 > (minipool_barrier
->address
11477 + minipool_vector_tail
->offset
11478 + minipool_vector_tail
->fix_size
)))
11481 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
11484 if (min_mp
!= NULL
)
11485 mp
->min_address
+= fix
->fix_size
;
11488 /* Note the insertion point if necessary. */
11489 if (mp
->min_address
< min_address
)
11491 /* For now, we do not allow the insertion of 8-byte alignment
11492 requiring nodes anywhere but at the start of the pool. */
11493 if (ARM_DOUBLEWORD_ALIGN
11494 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
11499 else if (mp
->max_address
11500 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
11502 /* Inserting before this entry would push the fix beyond
11503 its maximum address (which can happen if we have
11504 re-located a forwards fix); force the new fix to come
11506 if (ARM_DOUBLEWORD_ALIGN
11507 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
11512 min_address
= mp
->min_address
+ fix
->fix_size
;
11515 /* Do not insert a non-8-byte aligned quantity before 8-byte
11516 aligned quantities. */
11517 else if (ARM_DOUBLEWORD_ALIGN
11518 && fix
->fix_size
< 8
11519 && mp
->fix_size
>= 8)
11522 min_address
= mp
->min_address
+ fix
->fix_size
;
11527 /* We need to create a new entry. */
11529 mp
->fix_size
= fix
->fix_size
;
11530 mp
->mode
= fix
->mode
;
11531 mp
->value
= fix
->value
;
11533 mp
->max_address
= minipool_barrier
->address
+ 65536;
11535 mp
->min_address
= min_address
;
11537 if (min_mp
== NULL
)
11540 mp
->next
= minipool_vector_head
;
11542 if (mp
->next
== NULL
)
11544 minipool_vector_tail
= mp
;
11545 minipool_vector_label
= gen_label_rtx ();
11548 mp
->next
->prev
= mp
;
11550 minipool_vector_head
= mp
;
11554 mp
->next
= min_mp
->next
;
11558 if (mp
->next
!= NULL
)
11559 mp
->next
->prev
= mp
;
11561 minipool_vector_tail
= mp
;
11564 /* Save the new entry. */
11572 /* Scan over the following entries and adjust their offsets. */
11573 while (mp
->next
!= NULL
)
11575 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
11576 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
11579 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
11581 mp
->next
->offset
= mp
->offset
;
11590 assign_minipool_offsets (Mfix
*barrier
)
11592 HOST_WIDE_INT offset
= 0;
11595 minipool_barrier
= barrier
;
11597 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11599 mp
->offset
= offset
;
11601 if (mp
->refcount
> 0)
11602 offset
+= mp
->fix_size
;
11606 /* Output the literal table */
11608 dump_minipool (rtx scan
)
11614 if (ARM_DOUBLEWORD_ALIGN
)
11615 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11616 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
11623 fprintf (dump_file
,
11624 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11625 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
11627 scan
= emit_label_after (gen_label_rtx (), scan
);
11628 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
11629 scan
= emit_label_after (minipool_vector_label
, scan
);
11631 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
11633 if (mp
->refcount
> 0)
11637 fprintf (dump_file
,
11638 ";; Offset %u, min %ld, max %ld ",
11639 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
11640 (unsigned long) mp
->max_address
);
11641 arm_print_value (dump_file
, mp
->value
);
11642 fputc ('\n', dump_file
);
11645 switch (mp
->fix_size
)
11647 #ifdef HAVE_consttable_1
11649 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
11653 #ifdef HAVE_consttable_2
11655 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
11659 #ifdef HAVE_consttable_4
11661 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
11665 #ifdef HAVE_consttable_8
11667 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
11671 #ifdef HAVE_consttable_16
11673 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
11678 gcc_unreachable ();
11686 minipool_vector_head
= minipool_vector_tail
= NULL
;
11687 scan
= emit_insn_after (gen_consttable_end (), scan
);
11688 scan
= emit_barrier_after (scan
);
11691 /* Return the cost of forcibly inserting a barrier after INSN. */
11693 arm_barrier_cost (rtx insn
)
11695 /* Basing the location of the pool on the loop depth is preferable,
11696 but at the moment, the basic block information seems to be
11697 corrupt by this stage of the compilation. */
11698 int base_cost
= 50;
11699 rtx next
= next_nonnote_insn (insn
);
11701 if (next
!= NULL
&& GET_CODE (next
) == CODE_LABEL
)
11704 switch (GET_CODE (insn
))
11707 /* It will always be better to place the table before the label, rather
11716 return base_cost
- 10;
11719 return base_cost
+ 10;
11723 /* Find the best place in the insn stream in the range
11724 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11725 Create the barrier by inserting a jump and add a new fix entry for
11728 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
11730 HOST_WIDE_INT count
= 0;
11732 rtx from
= fix
->insn
;
11733 /* The instruction after which we will insert the jump. */
11734 rtx selected
= NULL
;
11736 /* The address at which the jump instruction will be placed. */
11737 HOST_WIDE_INT selected_address
;
11739 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
11740 rtx label
= gen_label_rtx ();
11742 selected_cost
= arm_barrier_cost (from
);
11743 selected_address
= fix
->address
;
11745 while (from
&& count
< max_count
)
11750 /* This code shouldn't have been called if there was a natural barrier
11752 gcc_assert (GET_CODE (from
) != BARRIER
);
11754 /* Count the length of this insn. */
11755 count
+= get_attr_length (from
);
11757 /* If there is a jump table, add its length. */
11758 tmp
= is_jump_table (from
);
11761 count
+= get_jump_table_size (tmp
);
11763 /* Jump tables aren't in a basic block, so base the cost on
11764 the dispatch insn. If we select this location, we will
11765 still put the pool after the table. */
11766 new_cost
= arm_barrier_cost (from
);
11768 if (count
< max_count
11769 && (!selected
|| new_cost
<= selected_cost
))
11772 selected_cost
= new_cost
;
11773 selected_address
= fix
->address
+ count
;
11776 /* Continue after the dispatch table. */
11777 from
= NEXT_INSN (tmp
);
11781 new_cost
= arm_barrier_cost (from
);
11783 if (count
< max_count
11784 && (!selected
|| new_cost
<= selected_cost
))
11787 selected_cost
= new_cost
;
11788 selected_address
= fix
->address
+ count
;
11791 from
= NEXT_INSN (from
);
11794 /* Make sure that we found a place to insert the jump. */
11795 gcc_assert (selected
);
11797 /* Create a new JUMP_INSN that branches around a barrier. */
11798 from
= emit_jump_insn_after (gen_jump (label
), selected
);
11799 JUMP_LABEL (from
) = label
;
11800 barrier
= emit_barrier_after (from
);
11801 emit_label_after (label
, barrier
);
11803 /* Create a minipool barrier entry for the new barrier. */
11804 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
11805 new_fix
->insn
= barrier
;
11806 new_fix
->address
= selected_address
;
11807 new_fix
->next
= fix
->next
;
11808 fix
->next
= new_fix
;
11813 /* Record that there is a natural barrier in the insn stream at
11816 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
11818 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
11821 fix
->address
= address
;
11824 if (minipool_fix_head
!= NULL
)
11825 minipool_fix_tail
->next
= fix
;
11827 minipool_fix_head
= fix
;
11829 minipool_fix_tail
= fix
;
11832 /* Record INSN, which will need fixing up to load a value from the
11833 minipool. ADDRESS is the offset of the insn since the start of the
11834 function; LOC is a pointer to the part of the insn which requires
11835 fixing; VALUE is the constant that must be loaded, which is of type
11838 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
11839 enum machine_mode mode
, rtx value
)
11841 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
11844 fix
->address
= address
;
11847 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
11848 fix
->value
= value
;
11849 fix
->forwards
= get_attr_pool_range (insn
);
11850 fix
->backwards
= get_attr_neg_pool_range (insn
);
11851 fix
->minipool
= NULL
;
11853 /* If an insn doesn't have a range defined for it, then it isn't
11854 expecting to be reworked by this code. Better to stop now than
11855 to generate duff assembly code. */
11856 gcc_assert (fix
->forwards
|| fix
->backwards
);
11858 /* If an entry requires 8-byte alignment then assume all constant pools
11859 require 4 bytes of padding. Trying to do this later on a per-pool
11860 basis is awkward because existing pool entries have to be modified. */
11861 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
11866 fprintf (dump_file
,
11867 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11868 GET_MODE_NAME (mode
),
11869 INSN_UID (insn
), (unsigned long) address
,
11870 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
11871 arm_print_value (dump_file
, fix
->value
);
11872 fprintf (dump_file
, "\n");
11875 /* Add it to the chain of fixes. */
11878 if (minipool_fix_head
!= NULL
)
11879 minipool_fix_tail
->next
= fix
;
11881 minipool_fix_head
= fix
;
11883 minipool_fix_tail
= fix
;
11886 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11887 Returns the number of insns needed, or 99 if we don't know how to
11890 arm_const_double_inline_cost (rtx val
)
11892 rtx lowpart
, highpart
;
11893 enum machine_mode mode
;
11895 mode
= GET_MODE (val
);
11897 if (mode
== VOIDmode
)
11900 gcc_assert (GET_MODE_SIZE (mode
) == 8);
11902 lowpart
= gen_lowpart (SImode
, val
);
11903 highpart
= gen_highpart_mode (SImode
, mode
, val
);
11905 gcc_assert (GET_CODE (lowpart
) == CONST_INT
);
11906 gcc_assert (GET_CODE (highpart
) == CONST_INT
);
11908 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
11909 NULL_RTX
, NULL_RTX
, 0, 0)
11910 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
11911 NULL_RTX
, NULL_RTX
, 0, 0));
11914 /* Return true if it is worthwhile to split a 64-bit constant into two
11915 32-bit operations. This is the case if optimizing for size, or
11916 if we have load delay slots, or if one 32-bit part can be done with
11917 a single data operation. */
11919 arm_const_double_by_parts (rtx val
)
11921 enum machine_mode mode
= GET_MODE (val
);
11924 if (optimize_size
|| arm_ld_sched
)
11927 if (mode
== VOIDmode
)
11930 part
= gen_highpart_mode (SImode
, mode
, val
);
11932 gcc_assert (GET_CODE (part
) == CONST_INT
);
11934 if (const_ok_for_arm (INTVAL (part
))
11935 || const_ok_for_arm (~INTVAL (part
)))
11938 part
= gen_lowpart (SImode
, val
);
11940 gcc_assert (GET_CODE (part
) == CONST_INT
);
11942 if (const_ok_for_arm (INTVAL (part
))
11943 || const_ok_for_arm (~INTVAL (part
)))
11949 /* Return true if it is possible to inline both the high and low parts
11950 of a 64-bit constant into 32-bit data processing instructions. */
11952 arm_const_double_by_immediates (rtx val
)
11954 enum machine_mode mode
= GET_MODE (val
);
11957 if (mode
== VOIDmode
)
11960 part
= gen_highpart_mode (SImode
, mode
, val
);
11962 gcc_assert (GET_CODE (part
) == CONST_INT
);
11964 if (!const_ok_for_arm (INTVAL (part
)))
11967 part
= gen_lowpart (SImode
, val
);
11969 gcc_assert (GET_CODE (part
) == CONST_INT
);
11971 if (!const_ok_for_arm (INTVAL (part
)))
11977 /* Scan INSN and note any of its operands that need fixing.
11978 If DO_PUSHES is false we do not actually push any of the fixups
11979 needed. The function returns TRUE if any fixups were needed/pushed.
11980 This is used by arm_memory_load_p() which needs to know about loads
11981 of constants that will be converted into minipool loads. */
11983 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
11985 bool result
= false;
11988 extract_insn (insn
);
11990 if (!constrain_operands (1))
11991 fatal_insn_not_found (insn
);
11993 if (recog_data
.n_alternatives
== 0)
11996 /* Fill in recog_op_alt with information about the constraints of
11998 preprocess_constraints ();
12000 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12002 /* Things we need to fix can only occur in inputs. */
12003 if (recog_data
.operand_type
[opno
] != OP_IN
)
12006 /* If this alternative is a memory reference, then any mention
12007 of constants in this alternative is really to fool reload
12008 into allowing us to accept one there. We need to fix them up
12009 now so that we output the right code. */
12010 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
12012 rtx op
= recog_data
.operand
[opno
];
12014 if (CONSTANT_P (op
))
12017 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
12018 recog_data
.operand_mode
[opno
], op
);
12021 else if (GET_CODE (op
) == MEM
12022 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
12023 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
12027 rtx cop
= avoid_constant_pool_reference (op
);
12029 /* Casting the address of something to a mode narrower
12030 than a word can cause avoid_constant_pool_reference()
12031 to return the pool reference itself. That's no good to
12032 us here. Lets just hope that we can use the
12033 constant pool value directly. */
12035 cop
= get_pool_constant (XEXP (op
, 0));
12037 push_minipool_fix (insn
, address
,
12038 recog_data
.operand_loc
[opno
],
12039 recog_data
.operand_mode
[opno
], cop
);
12050 /* Convert instructions to their cc-clobbering variant if possible, since
12051 that allows us to use smaller encodings. */
12054 thumb2_reorg (void)
12059 INIT_REG_SET (&live
);
12061 /* We are freeing block_for_insn in the toplev to keep compatibility
12062 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12063 compute_bb_for_insn ();
12069 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
12070 df_simulate_initialize_backwards (bb
, &live
);
12071 FOR_BB_INSNS_REVERSE (bb
, insn
)
12073 if (NONJUMP_INSN_P (insn
)
12074 && !REGNO_REG_SET_P (&live
, CC_REGNUM
))
12076 rtx pat
= PATTERN (insn
);
12077 if (GET_CODE (pat
) == SET
12078 && low_register_operand (XEXP (pat
, 0), SImode
)
12079 && thumb_16bit_operator (XEXP (pat
, 1), SImode
)
12080 && low_register_operand (XEXP (XEXP (pat
, 1), 0), SImode
)
12081 && low_register_operand (XEXP (XEXP (pat
, 1), 1), SImode
))
12083 rtx dst
= XEXP (pat
, 0);
12084 rtx src
= XEXP (pat
, 1);
12085 rtx op0
= XEXP (src
, 0);
12086 if (rtx_equal_p (dst
, op0
)
12087 || GET_CODE (src
) == PLUS
|| GET_CODE (src
) == MINUS
)
12089 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12090 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
12091 rtvec vec
= gen_rtvec (2, pat
, clobber
);
12092 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
12093 INSN_CODE (insn
) = -1;
12097 if (NONDEBUG_INSN_P (insn
))
12098 df_simulate_one_insn_backwards (bb
, insn
, &live
);
12101 CLEAR_REG_SET (&live
);
12104 /* Gcc puts the pool in the wrong place for ARM, since we can only
12105 load addresses a limited distance around the pc. We do some
12106 special munging to move the constant pool values to the correct
12107 point in the code. */
12112 HOST_WIDE_INT address
= 0;
12118 minipool_fix_head
= minipool_fix_tail
= NULL
;
12120 /* The first insn must always be a note, or the code below won't
12121 scan it properly. */
12122 insn
= get_insns ();
12123 gcc_assert (GET_CODE (insn
) == NOTE
);
12126 /* Scan all the insns and record the operands that will need fixing. */
12127 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
12129 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12130 && (arm_cirrus_insn_p (insn
)
12131 || GET_CODE (insn
) == JUMP_INSN
12132 || arm_memory_load_p (insn
)))
12133 cirrus_reorg (insn
);
12135 if (GET_CODE (insn
) == BARRIER
)
12136 push_minipool_barrier (insn
, address
);
12137 else if (INSN_P (insn
))
12141 note_invalid_constants (insn
, address
, true);
12142 address
+= get_attr_length (insn
);
12144 /* If the insn is a vector jump, add the size of the table
12145 and skip the table. */
12146 if ((table
= is_jump_table (insn
)) != NULL
)
12148 address
+= get_jump_table_size (table
);
12154 fix
= minipool_fix_head
;
12156 /* Now scan the fixups and perform the required changes. */
12161 Mfix
* last_added_fix
;
12162 Mfix
* last_barrier
= NULL
;
12165 /* Skip any further barriers before the next fix. */
12166 while (fix
&& GET_CODE (fix
->insn
) == BARRIER
)
12169 /* No more fixes. */
12173 last_added_fix
= NULL
;
12175 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
12177 if (GET_CODE (ftmp
->insn
) == BARRIER
)
12179 if (ftmp
->address
>= minipool_vector_head
->max_address
)
12182 last_barrier
= ftmp
;
12184 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
12187 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
12190 /* If we found a barrier, drop back to that; any fixes that we
12191 could have reached but come after the barrier will now go in
12192 the next mini-pool. */
12193 if (last_barrier
!= NULL
)
12195 /* Reduce the refcount for those fixes that won't go into this
12197 for (fdel
= last_barrier
->next
;
12198 fdel
&& fdel
!= ftmp
;
12201 fdel
->minipool
->refcount
--;
12202 fdel
->minipool
= NULL
;
12205 ftmp
= last_barrier
;
12209 /* ftmp is first fix that we can't fit into this pool and
12210 there no natural barriers that we could use. Insert a
12211 new barrier in the code somewhere between the previous
12212 fix and this one, and arrange to jump around it. */
12213 HOST_WIDE_INT max_address
;
12215 /* The last item on the list of fixes must be a barrier, so
12216 we can never run off the end of the list of fixes without
12217 last_barrier being set. */
12220 max_address
= minipool_vector_head
->max_address
;
12221 /* Check that there isn't another fix that is in range that
12222 we couldn't fit into this pool because the pool was
12223 already too large: we need to put the pool before such an
12224 instruction. The pool itself may come just after the
12225 fix because create_fix_barrier also allows space for a
12226 jump instruction. */
12227 if (ftmp
->address
< max_address
)
12228 max_address
= ftmp
->address
+ 1;
12230 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
12233 assign_minipool_offsets (last_barrier
);
12237 if (GET_CODE (ftmp
->insn
) != BARRIER
12238 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
12245 /* Scan over the fixes we have identified for this pool, fixing them
12246 up and adding the constants to the pool itself. */
12247 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
12248 this_fix
= this_fix
->next
)
12249 if (GET_CODE (this_fix
->insn
) != BARRIER
)
12252 = plus_constant (gen_rtx_LABEL_REF (VOIDmode
,
12253 minipool_vector_label
),
12254 this_fix
->minipool
->offset
);
12255 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
12258 dump_minipool (last_barrier
->insn
);
12262 /* From now on we must synthesize any constants that we can't handle
12263 directly. This can happen if the RTL gets split during final
12264 instruction generation. */
12265 after_arm_reorg
= 1;
12267 /* Free the minipool memory. */
12268 obstack_free (&minipool_obstack
, minipool_startobj
);
12271 /* Routines to output assembly language. */
12273 /* If the rtx is the correct value then return the string of the number.
12274 In this way we can ensure that valid double constants are generated even
12275 when cross compiling. */
12277 fp_immediate_constant (rtx x
)
12282 if (!fp_consts_inited
)
12285 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12286 for (i
= 0; i
< 8; i
++)
12287 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
12288 return strings_fp
[i
];
12290 gcc_unreachable ();
12293 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12294 static const char *
12295 fp_const_from_val (REAL_VALUE_TYPE
*r
)
12299 if (!fp_consts_inited
)
12302 for (i
= 0; i
< 8; i
++)
12303 if (REAL_VALUES_EQUAL (*r
, values_fp
[i
]))
12304 return strings_fp
[i
];
12306 gcc_unreachable ();
12309 /* Output the operands of a LDM/STM instruction to STREAM.
12310 MASK is the ARM register set mask of which only bits 0-15 are important.
12311 REG is the base register, either the frame pointer or the stack pointer,
12312 INSTR is the possibly suffixed load or store instruction.
12313 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12316 print_multi_reg (FILE *stream
, const char *instr
, unsigned reg
,
12317 unsigned long mask
, int rfe
)
12320 bool not_first
= FALSE
;
12322 gcc_assert (!rfe
|| (mask
& (1 << PC_REGNUM
)));
12323 fputc ('\t', stream
);
12324 asm_fprintf (stream
, instr
, reg
);
12325 fputc ('{', stream
);
12327 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
12328 if (mask
& (1 << i
))
12331 fprintf (stream
, ", ");
12333 asm_fprintf (stream
, "%r", i
);
12338 fprintf (stream
, "}^\n");
12340 fprintf (stream
, "}\n");
12344 /* Output a FLDMD instruction to STREAM.
12345 BASE if the register containing the address.
12346 REG and COUNT specify the register range.
12347 Extra registers may be added to avoid hardware bugs.
12349 We output FLDMD even for ARMv5 VFP implementations. Although
12350 FLDMD is technically not supported until ARMv6, it is believed
12351 that all VFP implementations support its use in this context. */
12354 vfp_output_fldmd (FILE * stream
, unsigned int base
, int reg
, int count
)
12358 /* Workaround ARM10 VFPr1 bug. */
12359 if (count
== 2 && !arm_arch6
)
12366 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12367 load into multiple parts if we have to handle more than 16 registers. */
12370 vfp_output_fldmd (stream
, base
, reg
, 16);
12371 vfp_output_fldmd (stream
, base
, reg
+ 16, count
- 16);
12375 fputc ('\t', stream
);
12376 asm_fprintf (stream
, "fldmfdd\t%r!, {", base
);
12378 for (i
= reg
; i
< reg
+ count
; i
++)
12381 fputs (", ", stream
);
12382 asm_fprintf (stream
, "d%d", i
);
12384 fputs ("}\n", stream
);
12389 /* Output the assembly for a store multiple. */
12392 vfp_output_fstmd (rtx
* operands
)
12399 strcpy (pattern
, "fstmfdd\t%m0!, {%P1");
12400 p
= strlen (pattern
);
12402 gcc_assert (GET_CODE (operands
[1]) == REG
);
12404 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
12405 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
12407 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
12409 strcpy (&pattern
[p
], "}");
12411 output_asm_insn (pattern
, operands
);
12416 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12417 number of bytes pushed. */
12420 vfp_emit_fstmd (int base_reg
, int count
)
12427 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12428 register pairs are stored by a store multiple insn. We avoid this
12429 by pushing an extra pair. */
12430 if (count
== 2 && !arm_arch6
)
12432 if (base_reg
== LAST_VFP_REGNUM
- 3)
12437 /* FSTMD may not store more than 16 doubleword registers at once. Split
12438 larger stores into multiple parts (up to a maximum of two, in
12443 /* NOTE: base_reg is an internal register number, so each D register
12445 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
12446 saved
+= vfp_emit_fstmd (base_reg
, 16);
12450 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
12451 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
12453 reg
= gen_rtx_REG (DFmode
, base_reg
);
12456 XVECEXP (par
, 0, 0)
12457 = gen_rtx_SET (VOIDmode
,
12460 gen_rtx_PRE_MODIFY (Pmode
,
12463 (stack_pointer_rtx
,
12466 gen_rtx_UNSPEC (BLKmode
,
12467 gen_rtvec (1, reg
),
12468 UNSPEC_PUSH_MULT
));
12470 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
12471 plus_constant (stack_pointer_rtx
, -(count
* 8)));
12472 RTX_FRAME_RELATED_P (tmp
) = 1;
12473 XVECEXP (dwarf
, 0, 0) = tmp
;
12475 tmp
= gen_rtx_SET (VOIDmode
,
12476 gen_frame_mem (DFmode
, stack_pointer_rtx
),
12478 RTX_FRAME_RELATED_P (tmp
) = 1;
12479 XVECEXP (dwarf
, 0, 1) = tmp
;
12481 for (i
= 1; i
< count
; i
++)
12483 reg
= gen_rtx_REG (DFmode
, base_reg
);
12485 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
12487 tmp
= gen_rtx_SET (VOIDmode
,
12488 gen_frame_mem (DFmode
,
12489 plus_constant (stack_pointer_rtx
,
12492 RTX_FRAME_RELATED_P (tmp
) = 1;
12493 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
12496 par
= emit_insn (par
);
12497 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
12498 RTX_FRAME_RELATED_P (par
) = 1;
12503 /* Emit a call instruction with pattern PAT. ADDR is the address of
12504 the call target. */
12507 arm_emit_call_insn (rtx pat
, rtx addr
)
12511 insn
= emit_call_insn (pat
);
12513 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12514 If the call might use such an entry, add a use of the PIC register
12515 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12516 if (TARGET_VXWORKS_RTP
12518 && GET_CODE (addr
) == SYMBOL_REF
12519 && (SYMBOL_REF_DECL (addr
)
12520 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
12521 : !SYMBOL_REF_LOCAL_P (addr
)))
12523 require_pic_register ();
12524 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
12528 /* Output a 'call' insn. */
12530 output_call (rtx
*operands
)
12532 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
12534 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12535 if (REGNO (operands
[0]) == LR_REGNUM
)
12537 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
12538 output_asm_insn ("mov%?\t%0, %|lr", operands
);
12541 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12543 if (TARGET_INTERWORK
|| arm_arch4t
)
12544 output_asm_insn ("bx%?\t%0", operands
);
12546 output_asm_insn ("mov%?\t%|pc, %0", operands
);
12551 /* Output a 'call' insn that is a reference in memory. This is
12552 disabled for ARMv5 and we prefer a blx instead because otherwise
12553 there's a significant performance overhead. */
12555 output_call_mem (rtx
*operands
)
12557 gcc_assert (!arm_arch5
);
12558 if (TARGET_INTERWORK
)
12560 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
12561 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12562 output_asm_insn ("bx%?\t%|ip", operands
);
12564 else if (regno_use_in (LR_REGNUM
, operands
[0]))
12566 /* LR is used in the memory address. We load the address in the
12567 first instruction. It's safe to use IP as the target of the
12568 load since the call will kill it anyway. */
12569 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
12570 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12572 output_asm_insn ("bx%?\t%|ip", operands
);
12574 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
12578 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12579 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
12586 /* Output a move from arm registers to an fpa registers.
12587 OPERANDS[0] is an fpa register.
12588 OPERANDS[1] is the first registers of an arm register pair. */
12590 output_mov_long_double_fpa_from_arm (rtx
*operands
)
12592 int arm_reg0
= REGNO (operands
[1]);
12595 gcc_assert (arm_reg0
!= IP_REGNUM
);
12597 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12598 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12599 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
12601 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
12602 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands
);
12607 /* Output a move from an fpa register to arm registers.
12608 OPERANDS[0] is the first registers of an arm register pair.
12609 OPERANDS[1] is an fpa register. */
12611 output_mov_long_double_arm_from_fpa (rtx
*operands
)
12613 int arm_reg0
= REGNO (operands
[0]);
12616 gcc_assert (arm_reg0
!= IP_REGNUM
);
12618 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12619 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12620 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
12622 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands
);
12623 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
12627 /* Output a move from arm registers to arm registers of a long double
12628 OPERANDS[0] is the destination.
12629 OPERANDS[1] is the source. */
12631 output_mov_long_double_arm_from_arm (rtx
*operands
)
12633 /* We have to be careful here because the two might overlap. */
12634 int dest_start
= REGNO (operands
[0]);
12635 int src_start
= REGNO (operands
[1]);
12639 if (dest_start
< src_start
)
12641 for (i
= 0; i
< 3; i
++)
12643 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
12644 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
12645 output_asm_insn ("mov%?\t%0, %1", ops
);
12650 for (i
= 2; i
>= 0; i
--)
12652 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
12653 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
12654 output_asm_insn ("mov%?\t%0, %1", ops
);
12662 arm_emit_movpair (rtx dest
, rtx src
)
12664 /* If the src is an immediate, simplify it. */
12665 if (CONST_INT_P (src
))
12667 HOST_WIDE_INT val
= INTVAL (src
);
12668 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
12669 if ((val
>> 16) & 0x0000ffff)
12670 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
12672 GEN_INT ((val
>> 16) & 0x0000ffff));
12675 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
12676 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
12679 /* Output a move from arm registers to an fpa registers.
12680 OPERANDS[0] is an fpa register.
12681 OPERANDS[1] is the first registers of an arm register pair. */
12683 output_mov_double_fpa_from_arm (rtx
*operands
)
12685 int arm_reg0
= REGNO (operands
[1]);
12688 gcc_assert (arm_reg0
!= IP_REGNUM
);
12690 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12691 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12692 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops
);
12693 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands
);
12697 /* Output a move from an fpa register to arm registers.
12698 OPERANDS[0] is the first registers of an arm register pair.
12699 OPERANDS[1] is an fpa register. */
12701 output_mov_double_arm_from_fpa (rtx
*operands
)
12703 int arm_reg0
= REGNO (operands
[0]);
12706 gcc_assert (arm_reg0
!= IP_REGNUM
);
12708 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12709 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12710 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands
);
12711 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops
);
12715 /* Output a move between double words.
12716 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12717 or MEM<-REG and all MEMs must be offsettable addresses. */
12719 output_move_double (rtx
*operands
)
12721 enum rtx_code code0
= GET_CODE (operands
[0]);
12722 enum rtx_code code1
= GET_CODE (operands
[1]);
12727 unsigned int reg0
= REGNO (operands
[0]);
12729 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
12731 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
12733 switch (GET_CODE (XEXP (operands
[1], 0)))
12737 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
12738 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
12740 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
12744 gcc_assert (TARGET_LDRD
);
12745 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
12750 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
12752 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
12757 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
12759 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
12763 gcc_assert (TARGET_LDRD
);
12764 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
12769 /* Autoicrement addressing modes should never have overlapping
12770 base and destination registers, and overlapping index registers
12771 are already prohibited, so this doesn't need to worry about
12773 otherops
[0] = operands
[0];
12774 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
12775 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
12777 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
12779 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
12781 /* Registers overlap so split out the increment. */
12782 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
12783 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
12787 /* Use a single insn if we can.
12788 FIXME: IWMMXT allows offsets larger than ldrd can
12789 handle, fix these up with a pair of ldr. */
12791 || GET_CODE (otherops
[2]) != CONST_INT
12792 || (INTVAL (otherops
[2]) > -256
12793 && INTVAL (otherops
[2]) < 256))
12794 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
12797 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
12798 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12804 /* Use a single insn if we can.
12805 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12806 fix these up with a pair of ldr. */
12808 || GET_CODE (otherops
[2]) != CONST_INT
12809 || (INTVAL (otherops
[2]) > -256
12810 && INTVAL (otherops
[2]) < 256))
12811 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
12814 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12815 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
12822 /* We might be able to use ldrd %0, %1 here. However the range is
12823 different to ldr/adr, and it is broken on some ARMv7-M
12824 implementations. */
12825 /* Use the second register of the pair to avoid problematic
12827 otherops
[1] = operands
[1];
12828 output_asm_insn ("adr%?\t%0, %1", otherops
);
12829 operands
[1] = otherops
[0];
12831 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
12833 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
12836 /* ??? This needs checking for thumb2. */
12838 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
12839 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
12841 otherops
[0] = operands
[0];
12842 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
12843 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
12845 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
12847 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
12849 switch ((int) INTVAL (otherops
[2]))
12852 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
12857 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
12862 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
12866 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
12867 operands
[1] = otherops
[0];
12869 && (GET_CODE (otherops
[2]) == REG
12871 || (GET_CODE (otherops
[2]) == CONST_INT
12872 && INTVAL (otherops
[2]) > -256
12873 && INTVAL (otherops
[2]) < 256)))
12875 if (reg_overlap_mentioned_p (operands
[0],
12879 /* Swap base and index registers over to
12880 avoid a conflict. */
12882 otherops
[1] = otherops
[2];
12885 /* If both registers conflict, it will usually
12886 have been fixed by a splitter. */
12887 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
12888 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
12890 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
12891 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
12895 otherops
[0] = operands
[0];
12896 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
12901 if (GET_CODE (otherops
[2]) == CONST_INT
)
12903 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
12904 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
12906 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
12909 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
12912 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
12915 return "ldr%(d%)\t%0, [%1]";
12917 return "ldm%(ia%)\t%1, %M0";
12921 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
12922 /* Take care of overlapping base/data reg. */
12923 if (reg_mentioned_p (operands
[0], operands
[1]))
12925 output_asm_insn ("ldr%?\t%0, %1", otherops
);
12926 output_asm_insn ("ldr%?\t%0, %1", operands
);
12930 output_asm_insn ("ldr%?\t%0, %1", operands
);
12931 output_asm_insn ("ldr%?\t%0, %1", otherops
);
12938 /* Constraints should ensure this. */
12939 gcc_assert (code0
== MEM
&& code1
== REG
);
12940 gcc_assert (REGNO (operands
[1]) != IP_REGNUM
);
12942 switch (GET_CODE (XEXP (operands
[0], 0)))
12946 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
12948 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
12952 gcc_assert (TARGET_LDRD
);
12953 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
12958 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
12960 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
12965 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
12967 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
12971 gcc_assert (TARGET_LDRD
);
12972 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
12977 otherops
[0] = operands
[1];
12978 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
12979 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
12981 /* IWMMXT allows offsets larger than ldrd can handle,
12982 fix these up with a pair of ldr. */
12984 && GET_CODE (otherops
[2]) == CONST_INT
12985 && (INTVAL(otherops
[2]) <= -256
12986 || INTVAL(otherops
[2]) >= 256))
12988 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
12990 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
12991 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
12995 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
12996 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
12999 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
13000 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
13002 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
13006 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
13007 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
13009 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
13012 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
13018 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
13024 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
13029 && (GET_CODE (otherops
[2]) == REG
13031 || (GET_CODE (otherops
[2]) == CONST_INT
13032 && INTVAL (otherops
[2]) > -256
13033 && INTVAL (otherops
[2]) < 256)))
13035 otherops
[0] = operands
[1];
13036 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
13037 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
13043 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
13044 otherops
[1] = operands
[1];
13045 output_asm_insn ("str%?\t%1, %0", operands
);
13046 output_asm_insn ("str%?\t%H1, %0", otherops
);
13053 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13054 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13057 output_move_quad (rtx
*operands
)
13059 if (REG_P (operands
[0]))
13061 /* Load, or reg->reg move. */
13063 if (MEM_P (operands
[1]))
13065 switch (GET_CODE (XEXP (operands
[1], 0)))
13068 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
13073 output_asm_insn ("adr%?\t%0, %1", operands
);
13074 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
13078 gcc_unreachable ();
13086 gcc_assert (REG_P (operands
[1]));
13088 dest
= REGNO (operands
[0]);
13089 src
= REGNO (operands
[1]);
13091 /* This seems pretty dumb, but hopefully GCC won't try to do it
13094 for (i
= 0; i
< 4; i
++)
13096 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
13097 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
13098 output_asm_insn ("mov%?\t%0, %1", ops
);
13101 for (i
= 3; i
>= 0; i
--)
13103 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
13104 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
13105 output_asm_insn ("mov%?\t%0, %1", ops
);
13111 gcc_assert (MEM_P (operands
[0]));
13112 gcc_assert (REG_P (operands
[1]));
13113 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
13115 switch (GET_CODE (XEXP (operands
[0], 0)))
13118 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
13122 gcc_unreachable ();
13129 /* Output a VFP load or store instruction. */
13132 output_move_vfp (rtx
*operands
)
13134 rtx reg
, mem
, addr
, ops
[2];
13135 int load
= REG_P (operands
[0]);
13136 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
13137 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
13140 enum machine_mode mode
;
13142 reg
= operands
[!load
];
13143 mem
= operands
[load
];
13145 mode
= GET_MODE (reg
);
13147 gcc_assert (REG_P (reg
));
13148 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
13149 gcc_assert (mode
== SFmode
13153 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
13154 gcc_assert (MEM_P (mem
));
13156 addr
= XEXP (mem
, 0);
13158 switch (GET_CODE (addr
))
13161 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13162 ops
[0] = XEXP (addr
, 0);
13167 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
13168 ops
[0] = XEXP (addr
, 0);
13173 templ
= "f%s%c%%?\t%%%s0, %%1%s";
13179 sprintf (buff
, templ
,
13180 load
? "ld" : "st",
13183 integer_p
? "\t%@ int" : "");
13184 output_asm_insn (buff
, ops
);
13189 /* Output a Neon quad-word load or store, or a load or store for
13190 larger structure modes.
13192 WARNING: The ordering of elements is weird in big-endian mode,
13193 because we use VSTM, as required by the EABI. GCC RTL defines
13194 element ordering based on in-memory order. This can be differ
13195 from the architectural ordering of elements within a NEON register.
13196 The intrinsics defined in arm_neon.h use the NEON register element
13197 ordering, not the GCC RTL element ordering.
13199 For example, the in-memory ordering of a big-endian a quadword
13200 vector with 16-bit elements when stored from register pair {d0,d1}
13201 will be (lowest address first, d0[N] is NEON register element N):
13203 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13205 When necessary, quadword registers (dN, dN+1) are moved to ARM
13206 registers from rN in the order:
13208 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13210 So that STM/LDM can be used on vectors in ARM registers, and the
13211 same memory layout will result as if VSTM/VLDM were used. */
13214 output_move_neon (rtx
*operands
)
13216 rtx reg
, mem
, addr
, ops
[2];
13217 int regno
, load
= REG_P (operands
[0]);
13220 enum machine_mode mode
;
13222 reg
= operands
[!load
];
13223 mem
= operands
[load
];
13225 mode
= GET_MODE (reg
);
13227 gcc_assert (REG_P (reg
));
13228 regno
= REGNO (reg
);
13229 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
13230 || NEON_REGNO_OK_FOR_QUAD (regno
));
13231 gcc_assert (VALID_NEON_DREG_MODE (mode
)
13232 || VALID_NEON_QREG_MODE (mode
)
13233 || VALID_NEON_STRUCT_MODE (mode
));
13234 gcc_assert (MEM_P (mem
));
13236 addr
= XEXP (mem
, 0);
13238 /* Strip off const from addresses like (const (plus (...))). */
13239 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
13240 addr
= XEXP (addr
, 0);
13242 switch (GET_CODE (addr
))
13245 templ
= "v%smia%%?\t%%0!, %%h1";
13246 ops
[0] = XEXP (addr
, 0);
13251 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13252 templ
= "v%smdb%%?\t%%0!, %%h1";
13253 ops
[0] = XEXP (addr
, 0);
13258 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13259 gcc_unreachable ();
13264 int nregs
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
13267 for (i
= 0; i
< nregs
; i
++)
13269 /* We're only using DImode here because it's a convenient size. */
13270 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
13271 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
13272 if (reg_overlap_mentioned_p (ops
[0], mem
))
13274 gcc_assert (overlap
== -1);
13279 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
13280 output_asm_insn (buff
, ops
);
13285 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
13286 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
13287 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
13288 output_asm_insn (buff
, ops
);
13295 templ
= "v%smia%%?\t%%m0, %%h1";
13300 sprintf (buff
, templ
, load
? "ld" : "st");
13301 output_asm_insn (buff
, ops
);
13306 /* Compute and return the length of neon_mov<mode>, where <mode> is
13307 one of VSTRUCT modes: EI, OI, CI or XI. */
13309 arm_attr_length_move_neon (rtx insn
)
13311 rtx reg
, mem
, addr
;
13313 enum machine_mode mode
;
13315 extract_insn_cached (insn
);
13317 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
13319 mode
= GET_MODE (recog_data
.operand
[0]);
13330 gcc_unreachable ();
13334 load
= REG_P (recog_data
.operand
[0]);
13335 reg
= recog_data
.operand
[!load
];
13336 mem
= recog_data
.operand
[load
];
13338 gcc_assert (MEM_P (mem
));
13340 mode
= GET_MODE (reg
);
13341 addr
= XEXP (mem
, 0);
13343 /* Strip off const from addresses like (const (plus (...))). */
13344 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
13345 addr
= XEXP (addr
, 0);
13347 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
13349 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
13356 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13360 arm_address_offset_is_imm (rtx insn
)
13364 extract_insn_cached (insn
);
13366 if (REG_P (recog_data
.operand
[0]))
13369 mem
= recog_data
.operand
[0];
13371 gcc_assert (MEM_P (mem
));
13373 addr
= XEXP (mem
, 0);
13375 if (GET_CODE (addr
) == REG
13376 || (GET_CODE (addr
) == PLUS
13377 && GET_CODE (XEXP (addr
, 0)) == REG
13378 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
))
13384 /* Output an ADD r, s, #n where n may be too big for one instruction.
13385 If adding zero to one register, output nothing. */
13387 output_add_immediate (rtx
*operands
)
13389 HOST_WIDE_INT n
= INTVAL (operands
[2]);
13391 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
13394 output_multi_immediate (operands
,
13395 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13398 output_multi_immediate (operands
,
13399 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13406 /* Output a multiple immediate operation.
13407 OPERANDS is the vector of operands referred to in the output patterns.
13408 INSTR1 is the output pattern to use for the first constant.
13409 INSTR2 is the output pattern to use for subsequent constants.
13410 IMMED_OP is the index of the constant slot in OPERANDS.
13411 N is the constant value. */
13412 static const char *
13413 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
13414 int immed_op
, HOST_WIDE_INT n
)
13416 #if HOST_BITS_PER_WIDE_INT > 32
13422 /* Quick and easy output. */
13423 operands
[immed_op
] = const0_rtx
;
13424 output_asm_insn (instr1
, operands
);
13429 const char * instr
= instr1
;
13431 /* Note that n is never zero here (which would give no output). */
13432 for (i
= 0; i
< 32; i
+= 2)
13436 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
13437 output_asm_insn (instr
, operands
);
13447 /* Return the name of a shifter operation. */
13448 static const char *
13449 arm_shift_nmem(enum rtx_code code
)
13454 return ARM_LSL_NAME
;
13470 /* Return the appropriate ARM instruction for the operation code.
13471 The returned result should not be overwritten. OP is the rtx of the
13472 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13475 arithmetic_instr (rtx op
, int shift_first_arg
)
13477 switch (GET_CODE (op
))
13483 return shift_first_arg
? "rsb" : "sub";
13498 return arm_shift_nmem(GET_CODE(op
));
13501 gcc_unreachable ();
13505 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13506 for the operation code. The returned result should not be overwritten.
13507 OP is the rtx code of the shift.
13508 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13510 static const char *
13511 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
13514 enum rtx_code code
= GET_CODE (op
);
13516 switch (GET_CODE (XEXP (op
, 1)))
13524 *amountp
= INTVAL (XEXP (op
, 1));
13528 gcc_unreachable ();
13534 gcc_assert (*amountp
!= -1);
13535 *amountp
= 32 - *amountp
;
13538 /* Fall through. */
13544 mnem
= arm_shift_nmem(code
);
13548 /* We never have to worry about the amount being other than a
13549 power of 2, since this case can never be reloaded from a reg. */
13550 gcc_assert (*amountp
!= -1);
13551 *amountp
= int_log2 (*amountp
);
13552 return ARM_LSL_NAME
;
13555 gcc_unreachable ();
13558 if (*amountp
!= -1)
13560 /* This is not 100% correct, but follows from the desire to merge
13561 multiplication by a power of 2 with the recognizer for a
13562 shift. >=32 is not a valid shift for "lsl", so we must try and
13563 output a shift that produces the correct arithmetical result.
13564 Using lsr #32 is identical except for the fact that the carry bit
13565 is not set correctly if we set the flags; but we never use the
13566 carry bit from such an operation, so we can ignore that. */
13567 if (code
== ROTATERT
)
13568 /* Rotate is just modulo 32. */
13570 else if (*amountp
!= (*amountp
& 31))
13572 if (code
== ASHIFT
)
13577 /* Shifts of 0 are no-ops. */
13585 /* Obtain the shift from the POWER of two. */
13587 static HOST_WIDE_INT
13588 int_log2 (HOST_WIDE_INT power
)
13590 HOST_WIDE_INT shift
= 0;
13592 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
13594 gcc_assert (shift
<= 31);
13601 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13602 because /bin/as is horribly restrictive. The judgement about
13603 whether or not each character is 'printable' (and can be output as
13604 is) or not (and must be printed with an octal escape) must be made
13605 with reference to the *host* character set -- the situation is
13606 similar to that discussed in the comments above pp_c_char in
13607 c-pretty-print.c. */
13609 #define MAX_ASCII_LEN 51
13612 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
13615 int len_so_far
= 0;
13617 fputs ("\t.ascii\t\"", stream
);
13619 for (i
= 0; i
< len
; i
++)
13623 if (len_so_far
>= MAX_ASCII_LEN
)
13625 fputs ("\"\n\t.ascii\t\"", stream
);
13631 if (c
== '\\' || c
== '\"')
13633 putc ('\\', stream
);
13641 fprintf (stream
, "\\%03o", c
);
13646 fputs ("\"\n", stream
);
13649 /* Compute the register save mask for registers 0 through 12
13650 inclusive. This code is used by arm_compute_save_reg_mask. */
13652 static unsigned long
13653 arm_compute_save_reg0_reg12_mask (void)
13655 unsigned long func_type
= arm_current_func_type ();
13656 unsigned long save_reg_mask
= 0;
13659 if (IS_INTERRUPT (func_type
))
13661 unsigned int max_reg
;
13662 /* Interrupt functions must not corrupt any registers,
13663 even call clobbered ones. If this is a leaf function
13664 we can just examine the registers used by the RTL, but
13665 otherwise we have to assume that whatever function is
13666 called might clobber anything, and so we have to save
13667 all the call-clobbered registers as well. */
13668 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
13669 /* FIQ handlers have registers r8 - r12 banked, so
13670 we only need to check r0 - r7, Normal ISRs only
13671 bank r14 and r15, so we must check up to r12.
13672 r13 is the stack pointer which is always preserved,
13673 so we do not need to consider it here. */
13678 for (reg
= 0; reg
<= max_reg
; reg
++)
13679 if (df_regs_ever_live_p (reg
)
13680 || (! current_function_is_leaf
&& call_used_regs
[reg
]))
13681 save_reg_mask
|= (1 << reg
);
13683 /* Also save the pic base register if necessary. */
13685 && !TARGET_SINGLE_PIC_BASE
13686 && arm_pic_register
!= INVALID_REGNUM
13687 && crtl
->uses_pic_offset_table
)
13688 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13690 else if (IS_VOLATILE(func_type
))
13692 /* For noreturn functions we historically omitted register saves
13693 altogether. However this really messes up debugging. As a
13694 compromise save just the frame pointers. Combined with the link
13695 register saved elsewhere this should be sufficient to get
13697 if (frame_pointer_needed
)
13698 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
13699 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
13700 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
13701 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
13702 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
13706 /* In the normal case we only need to save those registers
13707 which are call saved and which are used by this function. */
13708 for (reg
= 0; reg
<= 11; reg
++)
13709 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
13710 save_reg_mask
|= (1 << reg
);
13712 /* Handle the frame pointer as a special case. */
13713 if (frame_pointer_needed
)
13714 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
13716 /* If we aren't loading the PIC register,
13717 don't stack it even though it may be live. */
13719 && !TARGET_SINGLE_PIC_BASE
13720 && arm_pic_register
!= INVALID_REGNUM
13721 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
13722 || crtl
->uses_pic_offset_table
))
13723 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13725 /* The prologue will copy SP into R0, so save it. */
13726 if (IS_STACKALIGN (func_type
))
13727 save_reg_mask
|= 1;
13730 /* Save registers so the exception handler can modify them. */
13731 if (crtl
->calls_eh_return
)
13737 reg
= EH_RETURN_DATA_REGNO (i
);
13738 if (reg
== INVALID_REGNUM
)
13740 save_reg_mask
|= 1 << reg
;
13744 return save_reg_mask
;
13748 /* Compute the number of bytes used to store the static chain register on the
13749 stack, above the stack frame. We need to know this accurately to get the
13750 alignment of the rest of the stack frame correct. */
13752 static int arm_compute_static_chain_stack_bytes (void)
13754 unsigned long func_type
= arm_current_func_type ();
13755 int static_chain_stack_bytes
= 0;
13757 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
&&
13758 IS_NESTED (func_type
) &&
13759 df_regs_ever_live_p (3) && crtl
->args
.pretend_args_size
== 0)
13760 static_chain_stack_bytes
= 4;
13762 return static_chain_stack_bytes
;
13766 /* Compute a bit mask of which registers need to be
13767 saved on the stack for the current function.
13768 This is used by arm_get_frame_offsets, which may add extra registers. */
13770 static unsigned long
13771 arm_compute_save_reg_mask (void)
13773 unsigned int save_reg_mask
= 0;
13774 unsigned long func_type
= arm_current_func_type ();
13777 if (IS_NAKED (func_type
))
13778 /* This should never really happen. */
13781 /* If we are creating a stack frame, then we must save the frame pointer,
13782 IP (which will hold the old stack pointer), LR and the PC. */
13783 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
13785 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
13788 | (1 << PC_REGNUM
);
13790 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
13792 /* Decide if we need to save the link register.
13793 Interrupt routines have their own banked link register,
13794 so they never need to save it.
13795 Otherwise if we do not use the link register we do not need to save
13796 it. If we are pushing other registers onto the stack however, we
13797 can save an instruction in the epilogue by pushing the link register
13798 now and then popping it back into the PC. This incurs extra memory
13799 accesses though, so we only do it when optimizing for size, and only
13800 if we know that we will not need a fancy return sequence. */
13801 if (df_regs_ever_live_p (LR_REGNUM
)
13804 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
13805 && !crtl
->calls_eh_return
))
13806 save_reg_mask
|= 1 << LR_REGNUM
;
13808 if (cfun
->machine
->lr_save_eliminated
)
13809 save_reg_mask
&= ~ (1 << LR_REGNUM
);
13811 if (TARGET_REALLY_IWMMXT
13812 && ((bit_count (save_reg_mask
)
13813 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
13814 arm_compute_static_chain_stack_bytes())
13817 /* The total number of registers that are going to be pushed
13818 onto the stack is odd. We need to ensure that the stack
13819 is 64-bit aligned before we start to save iWMMXt registers,
13820 and also before we start to create locals. (A local variable
13821 might be a double or long long which we will load/store using
13822 an iWMMXt instruction). Therefore we need to push another
13823 ARM register, so that the stack will be 64-bit aligned. We
13824 try to avoid using the arg registers (r0 -r3) as they might be
13825 used to pass values in a tail call. */
13826 for (reg
= 4; reg
<= 12; reg
++)
13827 if ((save_reg_mask
& (1 << reg
)) == 0)
13831 save_reg_mask
|= (1 << reg
);
13834 cfun
->machine
->sibcall_blocked
= 1;
13835 save_reg_mask
|= (1 << 3);
13839 /* We may need to push an additional register for use initializing the
13840 PIC base register. */
13841 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
13842 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
13844 reg
= thumb_find_work_register (1 << 4);
13845 if (!call_used_regs
[reg
])
13846 save_reg_mask
|= (1 << reg
);
13849 return save_reg_mask
;
13853 /* Compute a bit mask of which registers need to be
13854 saved on the stack for the current function. */
13855 static unsigned long
13856 thumb1_compute_save_reg_mask (void)
13858 unsigned long mask
;
13862 for (reg
= 0; reg
< 12; reg
++)
13863 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13867 && !TARGET_SINGLE_PIC_BASE
13868 && arm_pic_register
!= INVALID_REGNUM
13869 && crtl
->uses_pic_offset_table
)
13870 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13872 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13873 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
13874 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
13876 /* LR will also be pushed if any lo regs are pushed. */
13877 if (mask
& 0xff || thumb_force_lr_save ())
13878 mask
|= (1 << LR_REGNUM
);
13880 /* Make sure we have a low work register if we need one.
13881 We will need one if we are going to push a high register,
13882 but we are not currently intending to push a low register. */
13883 if ((mask
& 0xff) == 0
13884 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
13886 /* Use thumb_find_work_register to choose which register
13887 we will use. If the register is live then we will
13888 have to push it. Use LAST_LO_REGNUM as our fallback
13889 choice for the register to select. */
13890 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
13891 /* Make sure the register returned by thumb_find_work_register is
13892 not part of the return value. */
13893 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
13894 reg
= LAST_LO_REGNUM
;
13896 if (! call_used_regs
[reg
])
13900 /* The 504 below is 8 bytes less than 512 because there are two possible
13901 alignment words. We can't tell here if they will be present or not so we
13902 have to play it safe and assume that they are. */
13903 if ((CALLER_INTERWORKING_SLOT_SIZE
+
13904 ROUND_UP_WORD (get_frame_size ()) +
13905 crtl
->outgoing_args_size
) >= 504)
13907 /* This is the same as the code in thumb1_expand_prologue() which
13908 determines which register to use for stack decrement. */
13909 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
13910 if (mask
& (1 << reg
))
13913 if (reg
> LAST_LO_REGNUM
)
13915 /* Make sure we have a register available for stack decrement. */
13916 mask
|= 1 << LAST_LO_REGNUM
;
13924 /* Return the number of bytes required to save VFP registers. */
13926 arm_get_vfp_saved_size (void)
13928 unsigned int regno
;
13933 /* Space for saved VFP registers. */
13934 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
13937 for (regno
= FIRST_VFP_REGNUM
;
13938 regno
< LAST_VFP_REGNUM
;
13941 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
13942 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
13946 /* Workaround ARM10 VFPr1 bug. */
13947 if (count
== 2 && !arm_arch6
)
13949 saved
+= count
* 8;
13958 if (count
== 2 && !arm_arch6
)
13960 saved
+= count
* 8;
13967 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13968 everything bar the final return instruction. */
13970 output_return_instruction (rtx operand
, int really_return
, int reverse
)
13972 char conditional
[10];
13975 unsigned long live_regs_mask
;
13976 unsigned long func_type
;
13977 arm_stack_offsets
*offsets
;
13979 func_type
= arm_current_func_type ();
13981 if (IS_NAKED (func_type
))
13984 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
13986 /* If this function was declared non-returning, and we have
13987 found a tail call, then we have to trust that the called
13988 function won't return. */
13993 /* Otherwise, trap an attempted return by aborting. */
13995 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
13997 assemble_external_libcall (ops
[1]);
13998 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
14004 gcc_assert (!cfun
->calls_alloca
|| really_return
);
14006 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
14008 cfun
->machine
->return_used_this_function
= 1;
14010 offsets
= arm_get_frame_offsets ();
14011 live_regs_mask
= offsets
->saved_regs_mask
;
14013 if (live_regs_mask
)
14015 const char * return_reg
;
14017 /* If we do not have any special requirements for function exit
14018 (e.g. interworking) then we can load the return address
14019 directly into the PC. Otherwise we must load it into LR. */
14021 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
14022 return_reg
= reg_names
[PC_REGNUM
];
14024 return_reg
= reg_names
[LR_REGNUM
];
14026 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
14028 /* There are three possible reasons for the IP register
14029 being saved. 1) a stack frame was created, in which case
14030 IP contains the old stack pointer, or 2) an ISR routine
14031 corrupted it, or 3) it was saved to align the stack on
14032 iWMMXt. In case 1, restore IP into SP, otherwise just
14034 if (frame_pointer_needed
)
14036 live_regs_mask
&= ~ (1 << IP_REGNUM
);
14037 live_regs_mask
|= (1 << SP_REGNUM
);
14040 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
14043 /* On some ARM architectures it is faster to use LDR rather than
14044 LDM to load a single register. On other architectures, the
14045 cost is the same. In 26 bit mode, or for exception handlers,
14046 we have to use LDM to load the PC so that the CPSR is also
14048 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
14049 if (live_regs_mask
== (1U << reg
))
14052 if (reg
<= LAST_ARM_REGNUM
14053 && (reg
!= LR_REGNUM
14055 || ! IS_INTERRUPT (func_type
)))
14057 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
14058 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
14065 /* Generate the load multiple instruction to restore the
14066 registers. Note we can get here, even if
14067 frame_pointer_needed is true, but only if sp already
14068 points to the base of the saved core registers. */
14069 if (live_regs_mask
& (1 << SP_REGNUM
))
14071 unsigned HOST_WIDE_INT stack_adjust
;
14073 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
14074 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
14076 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
14077 if (TARGET_UNIFIED_ASM
)
14078 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
14080 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
14083 /* If we can't use ldmib (SA110 bug),
14084 then try to pop r3 instead. */
14086 live_regs_mask
|= 1 << 3;
14088 if (TARGET_UNIFIED_ASM
)
14089 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
14091 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
14095 if (TARGET_UNIFIED_ASM
)
14096 sprintf (instr
, "pop%s\t{", conditional
);
14098 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
14100 p
= instr
+ strlen (instr
);
14102 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
14103 if (live_regs_mask
& (1 << reg
))
14105 int l
= strlen (reg_names
[reg
]);
14111 memcpy (p
, ", ", 2);
14115 memcpy (p
, "%|", 2);
14116 memcpy (p
+ 2, reg_names
[reg
], l
);
14120 if (live_regs_mask
& (1 << LR_REGNUM
))
14122 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
14123 /* If returning from an interrupt, restore the CPSR. */
14124 if (IS_INTERRUPT (func_type
))
14131 output_asm_insn (instr
, & operand
);
14133 /* See if we need to generate an extra instruction to
14134 perform the actual function return. */
14136 && func_type
!= ARM_FT_INTERWORKED
14137 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
14139 /* The return has already been handled
14140 by loading the LR into the PC. */
14147 switch ((int) ARM_FUNC_TYPE (func_type
))
14151 /* ??? This is wrong for unified assembly syntax. */
14152 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
14155 case ARM_FT_INTERWORKED
:
14156 sprintf (instr
, "bx%s\t%%|lr", conditional
);
14159 case ARM_FT_EXCEPTION
:
14160 /* ??? This is wrong for unified assembly syntax. */
14161 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
14165 /* Use bx if it's available. */
14166 if (arm_arch5
|| arm_arch4t
)
14167 sprintf (instr
, "bx%s\t%%|lr", conditional
);
14169 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
14173 output_asm_insn (instr
, & operand
);
14179 /* Write the function name into the code section, directly preceding
14180 the function prologue.
14182 Code will be output similar to this:
14184 .ascii "arm_poke_function_name", 0
14187 .word 0xff000000 + (t1 - t0)
14188 arm_poke_function_name
14190 stmfd sp!, {fp, ip, lr, pc}
14193 When performing a stack backtrace, code can inspect the value
14194 of 'pc' stored at 'fp' + 0. If the trace function then looks
14195 at location pc - 12 and the top 8 bits are set, then we know
14196 that there is a function name embedded immediately preceding this
14197 location and has length ((pc[-3]) & 0xff000000).
14199 We assume that pc is declared as a pointer to an unsigned long.
14201 It is of no benefit to output the function name if we are assembling
14202 a leaf function. These function types will not contain a stack
14203 backtrace structure, therefore it is not possible to determine the
14206 arm_poke_function_name (FILE *stream
, const char *name
)
14208 unsigned long alignlength
;
14209 unsigned long length
;
14212 length
= strlen (name
) + 1;
14213 alignlength
= ROUND_UP_WORD (length
);
14215 ASM_OUTPUT_ASCII (stream
, name
, length
);
14216 ASM_OUTPUT_ALIGN (stream
, 2);
14217 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
14218 assemble_aligned_integer (UNITS_PER_WORD
, x
);
14221 /* Place some comments into the assembler stream
14222 describing the current function. */
14224 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
14226 unsigned long func_type
;
14230 thumb1_output_function_prologue (f
, frame_size
);
14234 /* Sanity check. */
14235 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
14237 func_type
= arm_current_func_type ();
14239 switch ((int) ARM_FUNC_TYPE (func_type
))
14242 case ARM_FT_NORMAL
:
14244 case ARM_FT_INTERWORKED
:
14245 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
14248 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
14251 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
14253 case ARM_FT_EXCEPTION
:
14254 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
14258 if (IS_NAKED (func_type
))
14259 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14261 if (IS_VOLATILE (func_type
))
14262 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
14264 if (IS_NESTED (func_type
))
14265 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
14266 if (IS_STACKALIGN (func_type
))
14267 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14269 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14271 crtl
->args
.pretend_args_size
, frame_size
);
14273 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14274 frame_pointer_needed
,
14275 cfun
->machine
->uses_anonymous_args
);
14277 if (cfun
->machine
->lr_save_eliminated
)
14278 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
14280 if (crtl
->calls_eh_return
)
14281 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
14286 arm_output_epilogue (rtx sibling
)
14289 unsigned long saved_regs_mask
;
14290 unsigned long func_type
;
14291 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14292 frame that is $fp + 4 for a non-variadic function. */
14293 int floats_offset
= 0;
14295 FILE * f
= asm_out_file
;
14296 unsigned int lrm_count
= 0;
14297 int really_return
= (sibling
== NULL
);
14299 arm_stack_offsets
*offsets
;
14301 /* If we have already generated the return instruction
14302 then it is futile to generate anything else. */
14303 if (use_return_insn (FALSE
, sibling
) &&
14304 (cfun
->machine
->return_used_this_function
!= 0))
14307 func_type
= arm_current_func_type ();
14309 if (IS_NAKED (func_type
))
14310 /* Naked functions don't have epilogues. */
14313 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
14317 /* A volatile function should never return. Call abort. */
14318 op
= gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)" : "abort");
14319 assemble_external_libcall (op
);
14320 output_asm_insn ("bl\t%a0", &op
);
14325 /* If we are throwing an exception, then we really must be doing a
14326 return, so we can't tail-call. */
14327 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
14329 offsets
= arm_get_frame_offsets ();
14330 saved_regs_mask
= offsets
->saved_regs_mask
;
14333 lrm_count
= bit_count (saved_regs_mask
);
14335 floats_offset
= offsets
->saved_args
;
14336 /* Compute how far away the floats will be. */
14337 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
14338 if (saved_regs_mask
& (1 << reg
))
14339 floats_offset
+= 4;
14341 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
14343 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14344 int vfp_offset
= offsets
->frame
;
14346 if (TARGET_FPA_EMU2
)
14348 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14349 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14351 floats_offset
+= 12;
14352 asm_fprintf (f
, "\tldfe\t%r, [%r, #-%d]\n",
14353 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
14358 start_reg
= LAST_FPA_REGNUM
;
14360 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14362 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14364 floats_offset
+= 12;
14366 /* We can't unstack more than four registers at once. */
14367 if (start_reg
- reg
== 3)
14369 asm_fprintf (f
, "\tlfm\t%r, 4, [%r, #-%d]\n",
14370 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
14371 start_reg
= reg
- 1;
14376 if (reg
!= start_reg
)
14377 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
14378 reg
+ 1, start_reg
- reg
,
14379 FP_REGNUM
, floats_offset
- vfp_offset
);
14380 start_reg
= reg
- 1;
14384 /* Just in case the last register checked also needs unstacking. */
14385 if (reg
!= start_reg
)
14386 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
14387 reg
+ 1, start_reg
- reg
,
14388 FP_REGNUM
, floats_offset
- vfp_offset
);
14391 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14395 /* The fldmd insns do not have base+offset addressing
14396 modes, so we use IP to hold the address. */
14397 saved_size
= arm_get_vfp_saved_size ();
14399 if (saved_size
> 0)
14401 floats_offset
+= saved_size
;
14402 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", IP_REGNUM
,
14403 FP_REGNUM
, floats_offset
- vfp_offset
);
14405 start_reg
= FIRST_VFP_REGNUM
;
14406 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
14408 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
14409 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
14411 if (start_reg
!= reg
)
14412 vfp_output_fldmd (f
, IP_REGNUM
,
14413 (start_reg
- FIRST_VFP_REGNUM
) / 2,
14414 (reg
- start_reg
) / 2);
14415 start_reg
= reg
+ 2;
14418 if (start_reg
!= reg
)
14419 vfp_output_fldmd (f
, IP_REGNUM
,
14420 (start_reg
- FIRST_VFP_REGNUM
) / 2,
14421 (reg
- start_reg
) / 2);
14426 /* The frame pointer is guaranteed to be non-double-word aligned.
14427 This is because it is set to (old_stack_pointer - 4) and the
14428 old_stack_pointer was double word aligned. Thus the offset to
14429 the iWMMXt registers to be loaded must also be non-double-word
14430 sized, so that the resultant address *is* double-word aligned.
14431 We can ignore floats_offset since that was already included in
14432 the live_regs_mask. */
14433 lrm_count
+= (lrm_count
% 2 ? 2 : 1);
14435 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
14436 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14438 asm_fprintf (f
, "\twldrd\t%r, [%r, #-%d]\n",
14439 reg
, FP_REGNUM
, lrm_count
* 4);
14444 /* saved_regs_mask should contain the IP, which at the time of stack
14445 frame generation actually contains the old stack pointer. So a
14446 quick way to unwind the stack is just pop the IP register directly
14447 into the stack pointer. */
14448 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
14449 saved_regs_mask
&= ~ (1 << IP_REGNUM
);
14450 saved_regs_mask
|= (1 << SP_REGNUM
);
14452 /* There are two registers left in saved_regs_mask - LR and PC. We
14453 only need to restore the LR register (the return address), but to
14454 save time we can load it directly into the PC, unless we need a
14455 special function exit sequence, or we are not really returning. */
14457 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
14458 && !crtl
->calls_eh_return
)
14459 /* Delete the LR from the register mask, so that the LR on
14460 the stack is loaded into the PC in the register mask. */
14461 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
14463 saved_regs_mask
&= ~ (1 << PC_REGNUM
);
14465 /* We must use SP as the base register, because SP is one of the
14466 registers being restored. If an interrupt or page fault
14467 happens in the ldm instruction, the SP might or might not
14468 have been restored. That would be bad, as then SP will no
14469 longer indicate the safe area of stack, and we can get stack
14470 corruption. Using SP as the base register means that it will
14471 be reset correctly to the original value, should an interrupt
14472 occur. If the stack pointer already points at the right
14473 place, then omit the subtraction. */
14474 if (offsets
->outgoing_args
!= (1 + (int) bit_count (saved_regs_mask
))
14475 || cfun
->calls_alloca
)
14476 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", SP_REGNUM
, FP_REGNUM
,
14477 4 * bit_count (saved_regs_mask
));
14478 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
, 0);
14480 if (IS_INTERRUPT (func_type
))
14481 /* Interrupt handlers will have pushed the
14482 IP onto the stack, so restore it now. */
14483 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, 1 << IP_REGNUM
, 0);
14487 /* This branch is executed for ARM mode (non-apcs frames) and
14488 Thumb-2 mode. Frame layout is essentially the same for those
14489 cases, except that in ARM mode frame pointer points to the
14490 first saved register, while in Thumb-2 mode the frame pointer points
14491 to the last saved register.
14493 It is possible to make frame pointer point to last saved
14494 register in both cases, and remove some conditionals below.
14495 That means that fp setup in prologue would be just "mov fp, sp"
14496 and sp restore in epilogue would be just "mov sp, fp", whereas
14497 now we have to use add/sub in those cases. However, the value
14498 of that would be marginal, as both mov and add/sub are 32-bit
14499 in ARM mode, and it would require extra conditionals
14500 in arm_expand_prologue to distingish ARM-apcs-frame case
14501 (where frame pointer is required to point at first register)
14502 and ARM-non-apcs-frame. Therefore, such change is postponed
14503 until real need arise. */
14504 unsigned HOST_WIDE_INT amount
;
14506 /* Restore stack pointer if necessary. */
14507 if (TARGET_ARM
&& frame_pointer_needed
)
14509 operands
[0] = stack_pointer_rtx
;
14510 operands
[1] = hard_frame_pointer_rtx
;
14512 operands
[2] = GEN_INT (offsets
->frame
- offsets
->saved_regs
);
14513 output_add_immediate (operands
);
14517 if (frame_pointer_needed
)
14519 /* For Thumb-2 restore sp from the frame pointer.
14520 Operand restrictions mean we have to incrememnt FP, then copy
14522 amount
= offsets
->locals_base
- offsets
->saved_regs
;
14523 operands
[0] = hard_frame_pointer_rtx
;
14527 unsigned long count
;
14528 operands
[0] = stack_pointer_rtx
;
14529 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
14530 /* pop call clobbered registers if it avoids a
14531 separate stack adjustment. */
14532 count
= offsets
->saved_regs
- offsets
->saved_args
;
14535 && !crtl
->calls_eh_return
14536 && bit_count(saved_regs_mask
) * 4 == count
14537 && !IS_INTERRUPT (func_type
)
14538 && !crtl
->tail_call_emit
)
14540 unsigned long mask
;
14541 /* Preserve return values, of any size. */
14542 mask
= (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14544 mask
&= ~saved_regs_mask
;
14546 while (bit_count (mask
) * 4 > amount
)
14548 while ((mask
& (1 << reg
)) == 0)
14550 mask
&= ~(1 << reg
);
14552 if (bit_count (mask
) * 4 == amount
) {
14554 saved_regs_mask
|= mask
;
14561 operands
[1] = operands
[0];
14562 operands
[2] = GEN_INT (amount
);
14563 output_add_immediate (operands
);
14565 if (frame_pointer_needed
)
14566 asm_fprintf (f
, "\tmov\t%r, %r\n",
14567 SP_REGNUM
, HARD_FRAME_POINTER_REGNUM
);
14570 if (TARGET_FPA_EMU2
)
14572 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
14573 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14574 asm_fprintf (f
, "\tldfe\t%r, [%r], #12\n",
14579 start_reg
= FIRST_FPA_REGNUM
;
14581 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
14583 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14585 if (reg
- start_reg
== 3)
14587 asm_fprintf (f
, "\tlfmfd\t%r, 4, [%r]!\n",
14588 start_reg
, SP_REGNUM
);
14589 start_reg
= reg
+ 1;
14594 if (reg
!= start_reg
)
14595 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
14596 start_reg
, reg
- start_reg
,
14599 start_reg
= reg
+ 1;
14603 /* Just in case the last register checked also needs unstacking. */
14604 if (reg
!= start_reg
)
14605 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
14606 start_reg
, reg
- start_reg
, SP_REGNUM
);
14609 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14611 int end_reg
= LAST_VFP_REGNUM
+ 1;
14613 /* Scan the registers in reverse order. We need to match
14614 any groupings made in the prologue and generate matching
14616 for (reg
= LAST_VFP_REGNUM
- 1; reg
>= FIRST_VFP_REGNUM
; reg
-= 2)
14618 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
14619 && (!df_regs_ever_live_p (reg
+ 1)
14620 || call_used_regs
[reg
+ 1]))
14622 if (end_reg
> reg
+ 2)
14623 vfp_output_fldmd (f
, SP_REGNUM
,
14624 (reg
+ 2 - FIRST_VFP_REGNUM
) / 2,
14625 (end_reg
- (reg
+ 2)) / 2);
14629 if (end_reg
> reg
+ 2)
14630 vfp_output_fldmd (f
, SP_REGNUM
, 0,
14631 (end_reg
- (reg
+ 2)) / 2);
14635 for (reg
= FIRST_IWMMXT_REGNUM
; reg
<= LAST_IWMMXT_REGNUM
; reg
++)
14636 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14637 asm_fprintf (f
, "\twldrd\t%r, [%r], #8\n", reg
, SP_REGNUM
);
14639 /* If we can, restore the LR into the PC. */
14640 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
14641 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
14642 && !IS_STACKALIGN (func_type
)
14644 && crtl
->args
.pretend_args_size
== 0
14645 && saved_regs_mask
& (1 << LR_REGNUM
)
14646 && !crtl
->calls_eh_return
)
14648 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
14649 saved_regs_mask
|= (1 << PC_REGNUM
);
14650 rfe
= IS_INTERRUPT (func_type
);
14655 /* Load the registers off the stack. If we only have one register
14656 to load use the LDR instruction - it is faster. For Thumb-2
14657 always use pop and the assembler will pick the best instruction.*/
14658 if (TARGET_ARM
&& saved_regs_mask
== (1 << LR_REGNUM
)
14659 && !IS_INTERRUPT(func_type
))
14661 asm_fprintf (f
, "\tldr\t%r, [%r], #4\n", LR_REGNUM
, SP_REGNUM
);
14663 else if (saved_regs_mask
)
14665 if (saved_regs_mask
& (1 << SP_REGNUM
))
14666 /* Note - write back to the stack register is not enabled
14667 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14668 in the list of registers and if we add writeback the
14669 instruction becomes UNPREDICTABLE. */
14670 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
,
14672 else if (TARGET_ARM
)
14673 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, saved_regs_mask
,
14676 print_multi_reg (f
, "pop\t", SP_REGNUM
, saved_regs_mask
, 0);
14679 if (crtl
->args
.pretend_args_size
)
14681 /* Unwind the pre-pushed regs. */
14682 operands
[0] = operands
[1] = stack_pointer_rtx
;
14683 operands
[2] = GEN_INT (crtl
->args
.pretend_args_size
);
14684 output_add_immediate (operands
);
14688 /* We may have already restored PC directly from the stack. */
14689 if (!really_return
|| saved_regs_mask
& (1 << PC_REGNUM
))
14692 /* Stack adjustment for exception handler. */
14693 if (crtl
->calls_eh_return
)
14694 asm_fprintf (f
, "\tadd\t%r, %r, %r\n", SP_REGNUM
, SP_REGNUM
,
14695 ARM_EH_STACKADJ_REGNUM
);
14697 /* Generate the return instruction. */
14698 switch ((int) ARM_FUNC_TYPE (func_type
))
14702 asm_fprintf (f
, "\tsubs\t%r, %r, #4\n", PC_REGNUM
, LR_REGNUM
);
14705 case ARM_FT_EXCEPTION
:
14706 asm_fprintf (f
, "\tmovs\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
14709 case ARM_FT_INTERWORKED
:
14710 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
14714 if (IS_STACKALIGN (func_type
))
14716 /* See comment in arm_expand_prologue. */
14717 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, 0);
14719 if (arm_arch5
|| arm_arch4t
)
14720 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
14722 asm_fprintf (f
, "\tmov\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
14730 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
14731 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
14733 arm_stack_offsets
*offsets
;
14739 /* Emit any call-via-reg trampolines that are needed for v4t support
14740 of call_reg and call_value_reg type insns. */
14741 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
14743 rtx label
= cfun
->machine
->call_via
[regno
];
14747 switch_to_section (function_section (current_function_decl
));
14748 targetm
.asm_out
.internal_label (asm_out_file
, "L",
14749 CODE_LABEL_NUMBER (label
));
14750 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
14754 /* ??? Probably not safe to set this here, since it assumes that a
14755 function will be emitted as assembly immediately after we generate
14756 RTL for it. This does not happen for inline functions. */
14757 cfun
->machine
->return_used_this_function
= 0;
14759 else /* TARGET_32BIT */
14761 /* We need to take into account any stack-frame rounding. */
14762 offsets
= arm_get_frame_offsets ();
14764 gcc_assert (!use_return_insn (FALSE
, NULL
)
14765 || (cfun
->machine
->return_used_this_function
!= 0)
14766 || offsets
->saved_regs
== offsets
->outgoing_args
14767 || frame_pointer_needed
);
14769 /* Reset the ARM-specific per-function variables. */
14770 after_arm_reorg
= 0;
14774 /* Generate and emit an insn that we will recognize as a push_multi.
14775 Unfortunately, since this insn does not reflect very well the actual
14776 semantics of the operation, we need to annotate the insn for the benefit
14777 of DWARF2 frame unwind information. */
14779 emit_multi_reg_push (unsigned long mask
)
14782 int num_dwarf_regs
;
14786 int dwarf_par_index
;
14789 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
14790 if (mask
& (1 << i
))
14793 gcc_assert (num_regs
&& num_regs
<= 16);
14795 /* We don't record the PC in the dwarf frame information. */
14796 num_dwarf_regs
= num_regs
;
14797 if (mask
& (1 << PC_REGNUM
))
14800 /* For the body of the insn we are going to generate an UNSPEC in
14801 parallel with several USEs. This allows the insn to be recognized
14802 by the push_multi pattern in the arm.md file.
14804 The body of the insn looks something like this:
14807 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14808 (const_int:SI <num>)))
14809 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14815 For the frame note however, we try to be more explicit and actually
14816 show each register being stored into the stack frame, plus a (single)
14817 decrement of the stack pointer. We do it this way in order to be
14818 friendly to the stack unwinding code, which only wants to see a single
14819 stack decrement per instruction. The RTL we generate for the note looks
14820 something like this:
14823 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14824 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14825 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14826 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14830 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14831 instead we'd have a parallel expression detailing all
14832 the stores to the various memory addresses so that debug
14833 information is more up-to-date. Remember however while writing
14834 this to take care of the constraints with the push instruction.
14836 Note also that this has to be taken care of for the VFP registers.
14838 For more see PR43399. */
14840 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
14841 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
14842 dwarf_par_index
= 1;
14844 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
14846 if (mask
& (1 << i
))
14848 reg
= gen_rtx_REG (SImode
, i
);
14850 XVECEXP (par
, 0, 0)
14851 = gen_rtx_SET (VOIDmode
,
14854 gen_rtx_PRE_MODIFY (Pmode
,
14857 (stack_pointer_rtx
,
14860 gen_rtx_UNSPEC (BLKmode
,
14861 gen_rtvec (1, reg
),
14862 UNSPEC_PUSH_MULT
));
14864 if (i
!= PC_REGNUM
)
14866 tmp
= gen_rtx_SET (VOIDmode
,
14867 gen_frame_mem (SImode
, stack_pointer_rtx
),
14869 RTX_FRAME_RELATED_P (tmp
) = 1;
14870 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
14878 for (j
= 1, i
++; j
< num_regs
; i
++)
14880 if (mask
& (1 << i
))
14882 reg
= gen_rtx_REG (SImode
, i
);
14884 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
14886 if (i
!= PC_REGNUM
)
14889 = gen_rtx_SET (VOIDmode
,
14892 plus_constant (stack_pointer_rtx
,
14895 RTX_FRAME_RELATED_P (tmp
) = 1;
14896 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
14903 par
= emit_insn (par
);
14905 tmp
= gen_rtx_SET (VOIDmode
,
14907 plus_constant (stack_pointer_rtx
, -4 * num_regs
));
14908 RTX_FRAME_RELATED_P (tmp
) = 1;
14909 XVECEXP (dwarf
, 0, 0) = tmp
;
14911 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
14916 /* Calculate the size of the return value that is passed in registers. */
14918 arm_size_return_regs (void)
14920 enum machine_mode mode
;
14922 if (crtl
->return_rtx
!= 0)
14923 mode
= GET_MODE (crtl
->return_rtx
);
14925 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
14927 return GET_MODE_SIZE (mode
);
14931 emit_sfm (int base_reg
, int count
)
14938 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
14939 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
14941 reg
= gen_rtx_REG (XFmode
, base_reg
++);
14943 XVECEXP (par
, 0, 0)
14944 = gen_rtx_SET (VOIDmode
,
14947 gen_rtx_PRE_MODIFY (Pmode
,
14950 (stack_pointer_rtx
,
14953 gen_rtx_UNSPEC (BLKmode
,
14954 gen_rtvec (1, reg
),
14955 UNSPEC_PUSH_MULT
));
14956 tmp
= gen_rtx_SET (VOIDmode
,
14957 gen_frame_mem (XFmode
, stack_pointer_rtx
), reg
);
14958 RTX_FRAME_RELATED_P (tmp
) = 1;
14959 XVECEXP (dwarf
, 0, 1) = tmp
;
14961 for (i
= 1; i
< count
; i
++)
14963 reg
= gen_rtx_REG (XFmode
, base_reg
++);
14964 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
14966 tmp
= gen_rtx_SET (VOIDmode
,
14967 gen_frame_mem (XFmode
,
14968 plus_constant (stack_pointer_rtx
,
14971 RTX_FRAME_RELATED_P (tmp
) = 1;
14972 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
14975 tmp
= gen_rtx_SET (VOIDmode
,
14977 plus_constant (stack_pointer_rtx
, -12 * count
));
14979 RTX_FRAME_RELATED_P (tmp
) = 1;
14980 XVECEXP (dwarf
, 0, 0) = tmp
;
14982 par
= emit_insn (par
);
14983 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
14989 /* Return true if the current function needs to save/restore LR. */
14992 thumb_force_lr_save (void)
14994 return !cfun
->machine
->lr_save_eliminated
14995 && (!leaf_function_p ()
14996 || thumb_far_jump_used_p ()
14997 || df_regs_ever_live_p (LR_REGNUM
));
15001 /* Compute the distance from register FROM to register TO.
15002 These can be the arg pointer (26), the soft frame pointer (25),
15003 the stack pointer (13) or the hard frame pointer (11).
15004 In thumb mode r7 is used as the soft frame pointer, if needed.
15005 Typical stack layout looks like this:
15007 old stack pointer -> | |
15010 | | saved arguments for
15011 | | vararg functions
15014 hard FP & arg pointer -> | | \
15022 soft frame pointer -> | | /
15027 locals base pointer -> | | /
15032 current stack pointer -> | | /
15035 For a given function some or all of these stack components
15036 may not be needed, giving rise to the possibility of
15037 eliminating some of the registers.
15039 The values returned by this function must reflect the behavior
15040 of arm_expand_prologue() and arm_compute_save_reg_mask().
15042 The sign of the number returned reflects the direction of stack
15043 growth, so the values are positive for all eliminations except
15044 from the soft frame pointer to the hard frame pointer.
15046 SFP may point just inside the local variables block to ensure correct
15050 /* Calculate stack offsets. These are used to calculate register elimination
15051 offsets and in prologue/epilogue code. Also calculates which registers
15052 should be saved. */
15054 static arm_stack_offsets
*
15055 arm_get_frame_offsets (void)
15057 struct arm_stack_offsets
*offsets
;
15058 unsigned long func_type
;
15062 HOST_WIDE_INT frame_size
;
15065 offsets
= &cfun
->machine
->stack_offsets
;
15067 /* We need to know if we are a leaf function. Unfortunately, it
15068 is possible to be called after start_sequence has been called,
15069 which causes get_insns to return the insns for the sequence,
15070 not the function, which will cause leaf_function_p to return
15071 the incorrect result.
15073 to know about leaf functions once reload has completed, and the
15074 frame size cannot be changed after that time, so we can safely
15075 use the cached value. */
15077 if (reload_completed
)
15080 /* Initially this is the size of the local variables. It will translated
15081 into an offset once we have determined the size of preceding data. */
15082 frame_size
= ROUND_UP_WORD (get_frame_size ());
15084 leaf
= leaf_function_p ();
15086 /* Space for variadic functions. */
15087 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
15089 /* In Thumb mode this is incorrect, but never used. */
15090 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
15091 arm_compute_static_chain_stack_bytes();
15095 unsigned int regno
;
15097 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
15098 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
15099 saved
= core_saved
;
15101 /* We know that SP will be doubleword aligned on entry, and we must
15102 preserve that condition at any subroutine call. We also require the
15103 soft frame pointer to be doubleword aligned. */
15105 if (TARGET_REALLY_IWMMXT
)
15107 /* Check for the call-saved iWMMXt registers. */
15108 for (regno
= FIRST_IWMMXT_REGNUM
;
15109 regno
<= LAST_IWMMXT_REGNUM
;
15111 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
15115 func_type
= arm_current_func_type ();
15116 if (! IS_VOLATILE (func_type
))
15118 /* Space for saved FPA registers. */
15119 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
15120 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
15123 /* Space for saved VFP registers. */
15124 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15125 saved
+= arm_get_vfp_saved_size ();
15128 else /* TARGET_THUMB1 */
15130 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
15131 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
15132 saved
= core_saved
;
15133 if (TARGET_BACKTRACE
)
15137 /* Saved registers include the stack frame. */
15138 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
15139 arm_compute_static_chain_stack_bytes();
15140 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
15141 /* A leaf function does not need any stack alignment if it has nothing
15143 if (leaf
&& frame_size
== 0)
15145 offsets
->outgoing_args
= offsets
->soft_frame
;
15146 offsets
->locals_base
= offsets
->soft_frame
;
15150 /* Ensure SFP has the correct alignment. */
15151 if (ARM_DOUBLEWORD_ALIGN
15152 && (offsets
->soft_frame
& 7))
15154 offsets
->soft_frame
+= 4;
15155 /* Try to align stack by pushing an extra reg. Don't bother doing this
15156 when there is a stack frame as the alignment will be rolled into
15157 the normal stack adjustment. */
15158 if (frame_size
+ crtl
->outgoing_args_size
== 0)
15162 /* If it is safe to use r3, then do so. This sometimes
15163 generates better code on Thumb-2 by avoiding the need to
15164 use 32-bit push/pop instructions. */
15165 if (!crtl
->tail_call_emit
15166 && arm_size_return_regs () <= 12
15167 && (offsets
->saved_regs_mask
& (1 << 3)) == 0)
15172 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
15174 if ((offsets
->saved_regs_mask
& (1 << i
)) == 0)
15183 offsets
->saved_regs
+= 4;
15184 offsets
->saved_regs_mask
|= (1 << reg
);
15189 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
15190 offsets
->outgoing_args
= (offsets
->locals_base
15191 + crtl
->outgoing_args_size
);
15193 if (ARM_DOUBLEWORD_ALIGN
)
15195 /* Ensure SP remains doubleword aligned. */
15196 if (offsets
->outgoing_args
& 7)
15197 offsets
->outgoing_args
+= 4;
15198 gcc_assert (!(offsets
->outgoing_args
& 7));
15205 /* Calculate the relative offsets for the different stack pointers. Positive
15206 offsets are in the direction of stack growth. */
15209 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
15211 arm_stack_offsets
*offsets
;
15213 offsets
= arm_get_frame_offsets ();
15215 /* OK, now we have enough information to compute the distances.
15216 There must be an entry in these switch tables for each pair
15217 of registers in ELIMINABLE_REGS, even if some of the entries
15218 seem to be redundant or useless. */
15221 case ARG_POINTER_REGNUM
:
15224 case THUMB_HARD_FRAME_POINTER_REGNUM
:
15227 case FRAME_POINTER_REGNUM
:
15228 /* This is the reverse of the soft frame pointer
15229 to hard frame pointer elimination below. */
15230 return offsets
->soft_frame
- offsets
->saved_args
;
15232 case ARM_HARD_FRAME_POINTER_REGNUM
:
15233 /* This is only non-zero in the case where the static chain register
15234 is stored above the frame. */
15235 return offsets
->frame
- offsets
->saved_args
- 4;
15237 case STACK_POINTER_REGNUM
:
15238 /* If nothing has been pushed on the stack at all
15239 then this will return -4. This *is* correct! */
15240 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
15243 gcc_unreachable ();
15245 gcc_unreachable ();
15247 case FRAME_POINTER_REGNUM
:
15250 case THUMB_HARD_FRAME_POINTER_REGNUM
:
15253 case ARM_HARD_FRAME_POINTER_REGNUM
:
15254 /* The hard frame pointer points to the top entry in the
15255 stack frame. The soft frame pointer to the bottom entry
15256 in the stack frame. If there is no stack frame at all,
15257 then they are identical. */
15259 return offsets
->frame
- offsets
->soft_frame
;
15261 case STACK_POINTER_REGNUM
:
15262 return offsets
->outgoing_args
- offsets
->soft_frame
;
15265 gcc_unreachable ();
15267 gcc_unreachable ();
15270 /* You cannot eliminate from the stack pointer.
15271 In theory you could eliminate from the hard frame
15272 pointer to the stack pointer, but this will never
15273 happen, since if a stack frame is not needed the
15274 hard frame pointer will never be used. */
15275 gcc_unreachable ();
15279 /* Given FROM and TO register numbers, say whether this elimination is
15280 allowed. Frame pointer elimination is automatically handled.
15282 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15283 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15284 pointer, we must eliminate FRAME_POINTER_REGNUM into
15285 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15286 ARG_POINTER_REGNUM. */
15289 arm_can_eliminate (const int from
, const int to
)
15291 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
15292 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
15293 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
15294 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
15298 /* Emit RTL to save coprocessor registers on function entry. Returns the
15299 number of bytes pushed. */
15302 arm_save_coproc_regs(void)
15304 int saved_size
= 0;
15306 unsigned start_reg
;
15309 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
15310 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
15312 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
15313 insn
= gen_rtx_MEM (V2SImode
, insn
);
15314 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
15315 RTX_FRAME_RELATED_P (insn
) = 1;
15319 /* Save any floating point call-saved registers used by this
15321 if (TARGET_FPA_EMU2
)
15323 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
15324 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15326 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
15327 insn
= gen_rtx_MEM (XFmode
, insn
);
15328 insn
= emit_set_insn (insn
, gen_rtx_REG (XFmode
, reg
));
15329 RTX_FRAME_RELATED_P (insn
) = 1;
15335 start_reg
= LAST_FPA_REGNUM
;
15337 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
15339 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15341 if (start_reg
- reg
== 3)
15343 insn
= emit_sfm (reg
, 4);
15344 RTX_FRAME_RELATED_P (insn
) = 1;
15346 start_reg
= reg
- 1;
15351 if (start_reg
!= reg
)
15353 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
15354 RTX_FRAME_RELATED_P (insn
) = 1;
15355 saved_size
+= (start_reg
- reg
) * 12;
15357 start_reg
= reg
- 1;
15361 if (start_reg
!= reg
)
15363 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
15364 saved_size
+= (start_reg
- reg
) * 12;
15365 RTX_FRAME_RELATED_P (insn
) = 1;
15368 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15370 start_reg
= FIRST_VFP_REGNUM
;
15372 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
15374 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
15375 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
15377 if (start_reg
!= reg
)
15378 saved_size
+= vfp_emit_fstmd (start_reg
,
15379 (reg
- start_reg
) / 2);
15380 start_reg
= reg
+ 2;
15383 if (start_reg
!= reg
)
15384 saved_size
+= vfp_emit_fstmd (start_reg
,
15385 (reg
- start_reg
) / 2);
15391 /* Set the Thumb frame pointer from the stack pointer. */
15394 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
15396 HOST_WIDE_INT amount
;
15399 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
15401 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15402 stack_pointer_rtx
, GEN_INT (amount
)));
15405 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
15406 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15407 expects the first two operands to be the same. */
15410 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15412 hard_frame_pointer_rtx
));
15416 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15417 hard_frame_pointer_rtx
,
15418 stack_pointer_rtx
));
15420 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
15421 plus_constant (stack_pointer_rtx
, amount
));
15422 RTX_FRAME_RELATED_P (dwarf
) = 1;
15423 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15426 RTX_FRAME_RELATED_P (insn
) = 1;
15429 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15432 arm_expand_prologue (void)
15437 unsigned long live_regs_mask
;
15438 unsigned long func_type
;
15440 int saved_pretend_args
= 0;
15441 int saved_regs
= 0;
15442 unsigned HOST_WIDE_INT args_to_push
;
15443 arm_stack_offsets
*offsets
;
15445 func_type
= arm_current_func_type ();
15447 /* Naked functions don't have prologues. */
15448 if (IS_NAKED (func_type
))
15451 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15452 args_to_push
= crtl
->args
.pretend_args_size
;
15454 /* Compute which register we will have to save onto the stack. */
15455 offsets
= arm_get_frame_offsets ();
15456 live_regs_mask
= offsets
->saved_regs_mask
;
15458 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
15460 if (IS_STACKALIGN (func_type
))
15465 /* Handle a word-aligned stack pointer. We generate the following:
15470 <save and restore r0 in normal prologue/epilogue>
15474 The unwinder doesn't need to know about the stack realignment.
15475 Just tell it we saved SP in r0. */
15476 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
15478 r0
= gen_rtx_REG (SImode
, 0);
15479 r1
= gen_rtx_REG (SImode
, 1);
15480 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15481 compiler won't choke. */
15482 dwarf
= gen_rtx_UNSPEC (SImode
, rtvec_alloc (0), UNSPEC_STACK_ALIGN
);
15483 dwarf
= gen_rtx_SET (VOIDmode
, r0
, dwarf
);
15484 insn
= gen_movsi (r0
, stack_pointer_rtx
);
15485 RTX_FRAME_RELATED_P (insn
) = 1;
15486 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15488 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
15489 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
15492 /* For APCS frames, if IP register is clobbered
15493 when creating frame, save that register in a special
15495 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
15497 if (IS_INTERRUPT (func_type
))
15499 /* Interrupt functions must not corrupt any registers.
15500 Creating a frame pointer however, corrupts the IP
15501 register, so we must push it first. */
15502 insn
= emit_multi_reg_push (1 << IP_REGNUM
);
15504 /* Do not set RTX_FRAME_RELATED_P on this insn.
15505 The dwarf stack unwinding code only wants to see one
15506 stack decrement per function, and this is not it. If
15507 this instruction is labeled as being part of the frame
15508 creation sequence then dwarf2out_frame_debug_expr will
15509 die when it encounters the assignment of IP to FP
15510 later on, since the use of SP here establishes SP as
15511 the CFA register and not IP.
15513 Anyway this instruction is not really part of the stack
15514 frame creation although it is part of the prologue. */
15516 else if (IS_NESTED (func_type
))
15518 /* The Static chain register is the same as the IP register
15519 used as a scratch register during stack frame creation.
15520 To get around this need to find somewhere to store IP
15521 whilst the frame is being created. We try the following
15524 1. The last argument register.
15525 2. A slot on the stack above the frame. (This only
15526 works if the function is not a varargs function).
15527 3. Register r3, after pushing the argument registers
15530 Note - we only need to tell the dwarf2 backend about the SP
15531 adjustment in the second variant; the static chain register
15532 doesn't need to be unwound, as it doesn't contain a value
15533 inherited from the caller. */
15535 if (df_regs_ever_live_p (3) == false)
15536 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
15537 else if (args_to_push
== 0)
15541 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15544 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
15545 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
15548 /* Just tell the dwarf backend that we adjusted SP. */
15549 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
15550 plus_constant (stack_pointer_rtx
,
15552 RTX_FRAME_RELATED_P (insn
) = 1;
15553 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15557 /* Store the args on the stack. */
15558 if (cfun
->machine
->uses_anonymous_args
)
15559 insn
= emit_multi_reg_push
15560 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
15563 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15564 GEN_INT (- args_to_push
)));
15566 RTX_FRAME_RELATED_P (insn
) = 1;
15568 saved_pretend_args
= 1;
15569 fp_offset
= args_to_push
;
15572 /* Now reuse r3 to preserve IP. */
15573 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
15577 insn
= emit_set_insn (ip_rtx
,
15578 plus_constant (stack_pointer_rtx
, fp_offset
));
15579 RTX_FRAME_RELATED_P (insn
) = 1;
15584 /* Push the argument registers, or reserve space for them. */
15585 if (cfun
->machine
->uses_anonymous_args
)
15586 insn
= emit_multi_reg_push
15587 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
15590 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15591 GEN_INT (- args_to_push
)));
15592 RTX_FRAME_RELATED_P (insn
) = 1;
15595 /* If this is an interrupt service routine, and the link register
15596 is going to be pushed, and we're not generating extra
15597 push of IP (needed when frame is needed and frame layout if apcs),
15598 subtracting four from LR now will mean that the function return
15599 can be done with a single instruction. */
15600 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
15601 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
15602 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
15605 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
15607 emit_set_insn (lr
, plus_constant (lr
, -4));
15610 if (live_regs_mask
)
15612 saved_regs
+= bit_count (live_regs_mask
) * 4;
15613 if (optimize_size
&& !frame_pointer_needed
15614 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
15616 /* If no coprocessor registers are being pushed and we don't have
15617 to worry about a frame pointer then push extra registers to
15618 create the stack frame. This is done is a way that does not
15619 alter the frame layout, so is independent of the epilogue. */
15623 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
15625 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
15626 if (frame
&& n
* 4 >= frame
)
15629 live_regs_mask
|= (1 << n
) - 1;
15630 saved_regs
+= frame
;
15633 insn
= emit_multi_reg_push (live_regs_mask
);
15634 RTX_FRAME_RELATED_P (insn
) = 1;
15637 if (! IS_VOLATILE (func_type
))
15638 saved_regs
+= arm_save_coproc_regs ();
15640 if (frame_pointer_needed
&& TARGET_ARM
)
15642 /* Create the new frame pointer. */
15643 if (TARGET_APCS_FRAME
)
15645 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
15646 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
15647 RTX_FRAME_RELATED_P (insn
) = 1;
15649 if (IS_NESTED (func_type
))
15651 /* Recover the static chain register. */
15652 if (!df_regs_ever_live_p (3)
15653 || saved_pretend_args
)
15654 insn
= gen_rtx_REG (SImode
, 3);
15655 else /* if (crtl->args.pretend_args_size == 0) */
15657 insn
= plus_constant (hard_frame_pointer_rtx
, 4);
15658 insn
= gen_frame_mem (SImode
, insn
);
15660 emit_set_insn (ip_rtx
, insn
);
15661 /* Add a USE to stop propagate_one_insn() from barfing. */
15662 emit_insn (gen_prologue_use (ip_rtx
));
15667 insn
= GEN_INT (saved_regs
- 4);
15668 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15669 stack_pointer_rtx
, insn
));
15670 RTX_FRAME_RELATED_P (insn
) = 1;
15674 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
15676 /* This add can produce multiple insns for a large constant, so we
15677 need to get tricky. */
15678 rtx last
= get_last_insn ();
15680 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
15681 - offsets
->outgoing_args
);
15683 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15687 last
= last
? NEXT_INSN (last
) : get_insns ();
15688 RTX_FRAME_RELATED_P (last
) = 1;
15690 while (last
!= insn
);
15692 /* If the frame pointer is needed, emit a special barrier that
15693 will prevent the scheduler from moving stores to the frame
15694 before the stack adjustment. */
15695 if (frame_pointer_needed
)
15696 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
15697 hard_frame_pointer_rtx
));
15701 if (frame_pointer_needed
&& TARGET_THUMB2
)
15702 thumb_set_frame_pointer (offsets
);
15704 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
15706 unsigned long mask
;
15708 mask
= live_regs_mask
;
15709 mask
&= THUMB2_WORK_REGS
;
15710 if (!IS_NESTED (func_type
))
15711 mask
|= (1 << IP_REGNUM
);
15712 arm_load_pic_register (mask
);
15715 /* If we are profiling, make sure no instructions are scheduled before
15716 the call to mcount. Similarly if the user has requested no
15717 scheduling in the prolog. Similarly if we want non-call exceptions
15718 using the EABI unwinder, to prevent faulting instructions from being
15719 swapped with a stack adjustment. */
15720 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
15721 || (ARM_EABI_UNWIND_TABLES
&& cfun
->can_throw_non_call_exceptions
))
15722 emit_insn (gen_blockage ());
15724 /* If the link register is being kept alive, with the return address in it,
15725 then make sure that it does not get reused by the ce2 pass. */
15726 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
15727 cfun
->machine
->lr_save_eliminated
= 1;
15730 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15732 arm_print_condition (FILE *stream
)
15734 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
15736 /* Branch conversion is not implemented for Thumb-2. */
15739 output_operand_lossage ("predicated Thumb instruction");
15742 if (current_insn_predicate
!= NULL
)
15744 output_operand_lossage
15745 ("predicated instruction in conditional sequence");
15749 fputs (arm_condition_codes
[arm_current_cc
], stream
);
15751 else if (current_insn_predicate
)
15753 enum arm_cond_code code
;
15757 output_operand_lossage ("predicated Thumb instruction");
15761 code
= get_arm_condition_code (current_insn_predicate
);
15762 fputs (arm_condition_codes
[code
], stream
);
15767 /* If CODE is 'd', then the X is a condition operand and the instruction
15768 should only be executed if the condition is true.
15769 if CODE is 'D', then the X is a condition operand and the instruction
15770 should only be executed if the condition is false: however, if the mode
15771 of the comparison is CCFPEmode, then always execute the instruction -- we
15772 do this because in these circumstances !GE does not necessarily imply LT;
15773 in these cases the instruction pattern will take care to make sure that
15774 an instruction containing %d will follow, thereby undoing the effects of
15775 doing this instruction unconditionally.
15776 If CODE is 'N' then X is a floating point operand that must be negated
15778 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15779 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15781 arm_print_operand (FILE *stream
, rtx x
, int code
)
15786 fputs (ASM_COMMENT_START
, stream
);
15790 fputs (user_label_prefix
, stream
);
15794 fputs (REGISTER_PREFIX
, stream
);
15798 arm_print_condition (stream
);
15802 /* Nothing in unified syntax, otherwise the current condition code. */
15803 if (!TARGET_UNIFIED_ASM
)
15804 arm_print_condition (stream
);
15808 /* The current condition code in unified syntax, otherwise nothing. */
15809 if (TARGET_UNIFIED_ASM
)
15810 arm_print_condition (stream
);
15814 /* The current condition code for a condition code setting instruction.
15815 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15816 if (TARGET_UNIFIED_ASM
)
15818 fputc('s', stream
);
15819 arm_print_condition (stream
);
15823 arm_print_condition (stream
);
15824 fputc('s', stream
);
15829 /* If the instruction is conditionally executed then print
15830 the current condition code, otherwise print 's'. */
15831 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
15832 if (current_insn_predicate
)
15833 arm_print_condition (stream
);
15835 fputc('s', stream
);
15838 /* %# is a "break" sequence. It doesn't output anything, but is used to
15839 separate e.g. operand numbers from following text, if that text consists
15840 of further digits which we don't want to be part of the operand
15848 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15849 r
= real_value_negate (&r
);
15850 fprintf (stream
, "%s", fp_const_from_val (&r
));
15854 /* An integer or symbol address without a preceding # sign. */
15856 switch (GET_CODE (x
))
15859 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
15863 output_addr_const (stream
, x
);
15867 gcc_unreachable ();
15872 if (GET_CODE (x
) == CONST_INT
)
15875 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
15876 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
15880 putc ('~', stream
);
15881 output_addr_const (stream
, x
);
15886 /* The low 16 bits of an immediate constant. */
15887 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
15891 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
15894 /* Truncate Cirrus shift counts. */
15896 if (GET_CODE (x
) == CONST_INT
)
15898 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 0x3f);
15901 arm_print_operand (stream
, x
, 0);
15905 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
15913 if (!shift_operator (x
, SImode
))
15915 output_operand_lossage ("invalid shift operand");
15919 shift
= shift_op (x
, &val
);
15923 fprintf (stream
, ", %s ", shift
);
15925 arm_print_operand (stream
, XEXP (x
, 1), 0);
15927 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
15932 /* An explanation of the 'Q', 'R' and 'H' register operands:
15934 In a pair of registers containing a DI or DF value the 'Q'
15935 operand returns the register number of the register containing
15936 the least significant part of the value. The 'R' operand returns
15937 the register number of the register containing the most
15938 significant part of the value.
15940 The 'H' operand returns the higher of the two register numbers.
15941 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15942 same as the 'Q' operand, since the most significant part of the
15943 value is held in the lower number register. The reverse is true
15944 on systems where WORDS_BIG_ENDIAN is false.
15946 The purpose of these operands is to distinguish between cases
15947 where the endian-ness of the values is important (for example
15948 when they are added together), and cases where the endian-ness
15949 is irrelevant, but the order of register operations is important.
15950 For example when loading a value from memory into a register
15951 pair, the endian-ness does not matter. Provided that the value
15952 from the lower memory address is put into the lower numbered
15953 register, and the value from the higher address is put into the
15954 higher numbered register, the load will work regardless of whether
15955 the value being loaded is big-wordian or little-wordian. The
15956 order of the two register loads can matter however, if the address
15957 of the memory location is actually held in one of the registers
15958 being overwritten by the load.
15960 The 'Q' and 'R' constraints are also available for 64-bit
15963 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
15965 rtx part
= gen_lowpart (SImode
, x
);
15966 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
15970 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15972 output_operand_lossage ("invalid operand for code '%c'", code
);
15976 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
15980 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
15982 enum machine_mode mode
= GET_MODE (x
);
15985 if (mode
== VOIDmode
)
15987 part
= gen_highpart_mode (SImode
, mode
, x
);
15988 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
15992 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15994 output_operand_lossage ("invalid operand for code '%c'", code
);
15998 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
16002 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16004 output_operand_lossage ("invalid operand for code '%c'", code
);
16008 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
16012 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16014 output_operand_lossage ("invalid operand for code '%c'", code
);
16018 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
16022 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16024 output_operand_lossage ("invalid operand for code '%c'", code
);
16028 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
16032 asm_fprintf (stream
, "%r",
16033 GET_CODE (XEXP (x
, 0)) == REG
16034 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
16038 asm_fprintf (stream
, "{%r-%r}",
16040 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
16043 /* Like 'M', but writing doubleword vector registers, for use by Neon
16047 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
16048 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
16050 asm_fprintf (stream
, "{d%d}", regno
);
16052 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
16057 /* CONST_TRUE_RTX means always -- that's the default. */
16058 if (x
== const_true_rtx
)
16061 if (!COMPARISON_P (x
))
16063 output_operand_lossage ("invalid operand for code '%c'", code
);
16067 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
16072 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16073 want to do that. */
16074 if (x
== const_true_rtx
)
16076 output_operand_lossage ("instruction never executed");
16079 if (!COMPARISON_P (x
))
16081 output_operand_lossage ("invalid operand for code '%c'", code
);
16085 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
16086 (get_arm_condition_code (x
))],
16090 /* Cirrus registers can be accessed in a variety of ways:
16091 single floating point (f)
16092 double floating point (d)
16094 64bit integer (dx). */
16095 case 'W': /* Cirrus register in F mode. */
16096 case 'X': /* Cirrus register in D mode. */
16097 case 'Y': /* Cirrus register in FX mode. */
16098 case 'Z': /* Cirrus register in DX mode. */
16099 gcc_assert (GET_CODE (x
) == REG
16100 && REGNO_REG_CLASS (REGNO (x
)) == CIRRUS_REGS
);
16102 fprintf (stream
, "mv%s%s",
16104 : code
== 'X' ? "d"
16105 : code
== 'Y' ? "fx" : "dx", reg_names
[REGNO (x
)] + 2);
16109 /* Print cirrus register in the mode specified by the register's mode. */
16112 int mode
= GET_MODE (x
);
16114 if (GET_CODE (x
) != REG
|| REGNO_REG_CLASS (REGNO (x
)) != CIRRUS_REGS
)
16116 output_operand_lossage ("invalid operand for code '%c'", code
);
16120 fprintf (stream
, "mv%s%s",
16121 mode
== DFmode
? "d"
16122 : mode
== SImode
? "fx"
16123 : mode
== DImode
? "dx"
16124 : "f", reg_names
[REGNO (x
)] + 2);
16130 if (GET_CODE (x
) != REG
16131 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
16132 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
16133 /* Bad value for wCG register number. */
16135 output_operand_lossage ("invalid operand for code '%c'", code
);
16140 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
16143 /* Print an iWMMXt control register name. */
16145 if (GET_CODE (x
) != CONST_INT
16147 || INTVAL (x
) >= 16)
16148 /* Bad value for wC register number. */
16150 output_operand_lossage ("invalid operand for code '%c'", code
);
16156 static const char * wc_reg_names
[16] =
16158 "wCID", "wCon", "wCSSF", "wCASF",
16159 "wC4", "wC5", "wC6", "wC7",
16160 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16161 "wC12", "wC13", "wC14", "wC15"
16164 fprintf (stream
, wc_reg_names
[INTVAL (x
)]);
16168 /* Print the high single-precision register of a VFP double-precision
16172 int mode
= GET_MODE (x
);
16175 if (GET_MODE_SIZE (mode
) != 8 || GET_CODE (x
) != REG
)
16177 output_operand_lossage ("invalid operand for code '%c'", code
);
16182 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
16184 output_operand_lossage ("invalid operand for code '%c'", code
);
16188 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
16192 /* Print a VFP/Neon double precision or quad precision register name. */
16196 int mode
= GET_MODE (x
);
16197 int is_quad
= (code
== 'q');
16200 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
16202 output_operand_lossage ("invalid operand for code '%c'", code
);
16206 if (GET_CODE (x
) != REG
16207 || !IS_VFP_REGNUM (REGNO (x
)))
16209 output_operand_lossage ("invalid operand for code '%c'", code
);
16214 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
16215 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
16217 output_operand_lossage ("invalid operand for code '%c'", code
);
16221 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
16222 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
16226 /* These two codes print the low/high doubleword register of a Neon quad
16227 register, respectively. For pair-structure types, can also print
16228 low/high quadword registers. */
16232 int mode
= GET_MODE (x
);
16235 if ((GET_MODE_SIZE (mode
) != 16
16236 && GET_MODE_SIZE (mode
) != 32) || GET_CODE (x
) != REG
)
16238 output_operand_lossage ("invalid operand for code '%c'", code
);
16243 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
16245 output_operand_lossage ("invalid operand for code '%c'", code
);
16249 if (GET_MODE_SIZE (mode
) == 16)
16250 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
16251 + (code
== 'f' ? 1 : 0));
16253 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
16254 + (code
== 'f' ? 1 : 0));
16258 /* Print a VFPv3 floating-point constant, represented as an integer
16262 int index
= vfp3_const_double_index (x
);
16263 gcc_assert (index
!= -1);
16264 fprintf (stream
, "%d", index
);
16268 /* Print bits representing opcode features for Neon.
16270 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16271 and polynomials as unsigned.
16273 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16275 Bit 2 is 1 for rounding functions, 0 otherwise. */
16277 /* Identify the type as 's', 'u', 'p' or 'f'. */
16280 HOST_WIDE_INT bits
= INTVAL (x
);
16281 fputc ("uspf"[bits
& 3], stream
);
16285 /* Likewise, but signed and unsigned integers are both 'i'. */
16288 HOST_WIDE_INT bits
= INTVAL (x
);
16289 fputc ("iipf"[bits
& 3], stream
);
16293 /* As for 'T', but emit 'u' instead of 'p'. */
16296 HOST_WIDE_INT bits
= INTVAL (x
);
16297 fputc ("usuf"[bits
& 3], stream
);
16301 /* Bit 2: rounding (vs none). */
16304 HOST_WIDE_INT bits
= INTVAL (x
);
16305 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
16309 /* Memory operand for vld1/vst1 instruction. */
16313 bool postinc
= FALSE
;
16314 gcc_assert (GET_CODE (x
) == MEM
);
16315 addr
= XEXP (x
, 0);
16316 if (GET_CODE (addr
) == POST_INC
)
16319 addr
= XEXP (addr
, 0);
16321 asm_fprintf (stream
, "[%r]", REGNO (addr
));
16323 fputs("!", stream
);
16331 gcc_assert (GET_CODE (x
) == MEM
);
16332 addr
= XEXP (x
, 0);
16333 gcc_assert (GET_CODE (addr
) == REG
);
16334 asm_fprintf (stream
, "[%r]", REGNO (addr
));
16338 /* Translate an S register number into a D register number and element index. */
16341 int mode
= GET_MODE (x
);
16344 if (GET_MODE_SIZE (mode
) != 4 || GET_CODE (x
) != REG
)
16346 output_operand_lossage ("invalid operand for code '%c'", code
);
16351 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
16353 output_operand_lossage ("invalid operand for code '%c'", code
);
16357 regno
= regno
- FIRST_VFP_REGNUM
;
16358 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
16362 /* Register specifier for vld1.16/vst1.16. Translate the S register
16363 number into a D register number and element index. */
16366 int mode
= GET_MODE (x
);
16369 if (GET_MODE_SIZE (mode
) != 2 || GET_CODE (x
) != REG
)
16371 output_operand_lossage ("invalid operand for code '%c'", code
);
16376 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
16378 output_operand_lossage ("invalid operand for code '%c'", code
);
16382 regno
= regno
- FIRST_VFP_REGNUM
;
16383 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
16390 output_operand_lossage ("missing operand");
16394 switch (GET_CODE (x
))
16397 asm_fprintf (stream
, "%r", REGNO (x
));
16401 output_memory_reference_mode
= GET_MODE (x
);
16402 output_address (XEXP (x
, 0));
16409 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
16410 sizeof (fpstr
), 0, 1);
16411 fprintf (stream
, "#%s", fpstr
);
16414 fprintf (stream
, "#%s", fp_immediate_constant (x
));
16418 gcc_assert (GET_CODE (x
) != NEG
);
16419 fputc ('#', stream
);
16420 if (GET_CODE (x
) == HIGH
)
16422 fputs (":lower16:", stream
);
16426 output_addr_const (stream
, x
);
16432 /* Target hook for printing a memory address. */
16434 arm_print_operand_address (FILE *stream
, rtx x
)
16438 int is_minus
= GET_CODE (x
) == MINUS
;
16440 if (GET_CODE (x
) == REG
)
16441 asm_fprintf (stream
, "[%r, #0]", REGNO (x
));
16442 else if (GET_CODE (x
) == PLUS
|| is_minus
)
16444 rtx base
= XEXP (x
, 0);
16445 rtx index
= XEXP (x
, 1);
16446 HOST_WIDE_INT offset
= 0;
16447 if (GET_CODE (base
) != REG
16448 || (GET_CODE (index
) == REG
&& REGNO (index
) == SP_REGNUM
))
16450 /* Ensure that BASE is a register. */
16451 /* (one of them must be). */
16452 /* Also ensure the SP is not used as in index register. */
16457 switch (GET_CODE (index
))
16460 offset
= INTVAL (index
);
16463 asm_fprintf (stream
, "[%r, #%wd]",
16464 REGNO (base
), offset
);
16468 asm_fprintf (stream
, "[%r, %s%r]",
16469 REGNO (base
), is_minus
? "-" : "",
16479 asm_fprintf (stream
, "[%r, %s%r",
16480 REGNO (base
), is_minus
? "-" : "",
16481 REGNO (XEXP (index
, 0)));
16482 arm_print_operand (stream
, index
, 'S');
16483 fputs ("]", stream
);
16488 gcc_unreachable ();
16491 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
16492 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
16494 extern enum machine_mode output_memory_reference_mode
;
16496 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
16498 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
16499 asm_fprintf (stream
, "[%r, #%s%d]!",
16500 REGNO (XEXP (x
, 0)),
16501 GET_CODE (x
) == PRE_DEC
? "-" : "",
16502 GET_MODE_SIZE (output_memory_reference_mode
));
16504 asm_fprintf (stream
, "[%r], #%s%d",
16505 REGNO (XEXP (x
, 0)),
16506 GET_CODE (x
) == POST_DEC
? "-" : "",
16507 GET_MODE_SIZE (output_memory_reference_mode
));
16509 else if (GET_CODE (x
) == PRE_MODIFY
)
16511 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
16512 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
16513 asm_fprintf (stream
, "#%wd]!",
16514 INTVAL (XEXP (XEXP (x
, 1), 1)));
16516 asm_fprintf (stream
, "%r]!",
16517 REGNO (XEXP (XEXP (x
, 1), 1)));
16519 else if (GET_CODE (x
) == POST_MODIFY
)
16521 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
16522 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
16523 asm_fprintf (stream
, "#%wd",
16524 INTVAL (XEXP (XEXP (x
, 1), 1)));
16526 asm_fprintf (stream
, "%r",
16527 REGNO (XEXP (XEXP (x
, 1), 1)));
16529 else output_addr_const (stream
, x
);
16533 if (GET_CODE (x
) == REG
)
16534 asm_fprintf (stream
, "[%r]", REGNO (x
));
16535 else if (GET_CODE (x
) == POST_INC
)
16536 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
16537 else if (GET_CODE (x
) == PLUS
)
16539 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
16540 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
16541 asm_fprintf (stream
, "[%r, #%wd]",
16542 REGNO (XEXP (x
, 0)),
16543 INTVAL (XEXP (x
, 1)));
16545 asm_fprintf (stream
, "[%r, %r]",
16546 REGNO (XEXP (x
, 0)),
16547 REGNO (XEXP (x
, 1)));
16550 output_addr_const (stream
, x
);
16554 /* Target hook for indicating whether a punctuation character for
16555 TARGET_PRINT_OPERAND is valid. */
16557 arm_print_operand_punct_valid_p (unsigned char code
)
16559 return (code
== '@' || code
== '|' || code
== '.'
16560 || code
== '(' || code
== ')' || code
== '#'
16561 || (TARGET_32BIT
&& (code
== '?'))
16562 || (TARGET_THUMB2
&& (code
== '!'))
16563 || (TARGET_THUMB
&& (code
== '_')));
16566 /* Target hook for assembling integer objects. The ARM version needs to
16567 handle word-sized values specially. */
16569 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
16571 enum machine_mode mode
;
16573 if (size
== UNITS_PER_WORD
&& aligned_p
)
16575 fputs ("\t.word\t", asm_out_file
);
16576 output_addr_const (asm_out_file
, x
);
16578 /* Mark symbols as position independent. We only do this in the
16579 .text segment, not in the .data segment. */
16580 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
16581 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
16583 /* See legitimize_pic_address for an explanation of the
16584 TARGET_VXWORKS_RTP check. */
16585 if (TARGET_VXWORKS_RTP
16586 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
16587 fputs ("(GOT)", asm_out_file
);
16589 fputs ("(GOTOFF)", asm_out_file
);
16591 fputc ('\n', asm_out_file
);
16595 mode
= GET_MODE (x
);
16597 if (arm_vector_mode_supported_p (mode
))
16601 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
16603 units
= CONST_VECTOR_NUNITS (x
);
16604 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
16606 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16607 for (i
= 0; i
< units
; i
++)
16609 rtx elt
= CONST_VECTOR_ELT (x
, i
);
16611 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
16614 for (i
= 0; i
< units
; i
++)
16616 rtx elt
= CONST_VECTOR_ELT (x
, i
);
16617 REAL_VALUE_TYPE rval
;
16619 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
16622 (rval
, GET_MODE_INNER (mode
),
16623 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
16629 return default_assemble_integer (x
, size
, aligned_p
);
16633 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
16637 if (!TARGET_AAPCS_BASED
)
16640 default_named_section_asm_out_constructor
16641 : default_named_section_asm_out_destructor
) (symbol
, priority
);
16645 /* Put these in the .init_array section, using a special relocation. */
16646 if (priority
!= DEFAULT_INIT_PRIORITY
)
16649 sprintf (buf
, "%s.%.5u",
16650 is_ctor
? ".init_array" : ".fini_array",
16652 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
16659 switch_to_section (s
);
16660 assemble_align (POINTER_SIZE
);
16661 fputs ("\t.word\t", asm_out_file
);
16662 output_addr_const (asm_out_file
, symbol
);
16663 fputs ("(target1)\n", asm_out_file
);
16666 /* Add a function to the list of static constructors. */
16669 arm_elf_asm_constructor (rtx symbol
, int priority
)
16671 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
16674 /* Add a function to the list of static destructors. */
16677 arm_elf_asm_destructor (rtx symbol
, int priority
)
16679 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
16682 /* A finite state machine takes care of noticing whether or not instructions
16683 can be conditionally executed, and thus decrease execution time and code
16684 size by deleting branch instructions. The fsm is controlled by
16685 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16687 /* The state of the fsm controlling condition codes are:
16688 0: normal, do nothing special
16689 1: make ASM_OUTPUT_OPCODE not output this instruction
16690 2: make ASM_OUTPUT_OPCODE not output this instruction
16691 3: make instructions conditional
16692 4: make instructions conditional
16694 State transitions (state->state by whom under condition):
16695 0 -> 1 final_prescan_insn if the `target' is a label
16696 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16697 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16698 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16699 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16700 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16701 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16702 (the target insn is arm_target_insn).
16704 If the jump clobbers the conditions then we use states 2 and 4.
16706 A similar thing can be done with conditional return insns.
16708 XXX In case the `target' is an unconditional branch, this conditionalising
16709 of the instructions always reduces code size, but not always execution
16710 time. But then, I want to reduce the code size to somewhere near what
16711 /bin/cc produces. */
16713 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16714 instructions. When a COND_EXEC instruction is seen the subsequent
16715 instructions are scanned so that multiple conditional instructions can be
16716 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16717 specify the length and true/false mask for the IT block. These will be
16718 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16720 /* Returns the index of the ARM condition code string in
16721 `arm_condition_codes'. COMPARISON should be an rtx like
16722 `(eq (...) (...))'. */
16723 static enum arm_cond_code
16724 get_arm_condition_code (rtx comparison
)
16726 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
16727 enum arm_cond_code code
;
16728 enum rtx_code comp_code
= GET_CODE (comparison
);
16730 if (GET_MODE_CLASS (mode
) != MODE_CC
)
16731 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
16732 XEXP (comparison
, 1));
16736 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
16737 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
16738 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
16739 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
16740 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
16741 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
16742 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
16743 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
16744 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
16745 case CC_DLTUmode
: code
= ARM_CC
;
16748 gcc_assert (comp_code
== EQ
|| comp_code
== NE
);
16750 if (comp_code
== EQ
)
16751 return ARM_INVERSE_CONDITION_CODE (code
);
16757 case NE
: return ARM_NE
;
16758 case EQ
: return ARM_EQ
;
16759 case GE
: return ARM_PL
;
16760 case LT
: return ARM_MI
;
16761 default: gcc_unreachable ();
16767 case NE
: return ARM_NE
;
16768 case EQ
: return ARM_EQ
;
16769 default: gcc_unreachable ();
16775 case NE
: return ARM_MI
;
16776 case EQ
: return ARM_PL
;
16777 default: gcc_unreachable ();
16782 /* These encodings assume that AC=1 in the FPA system control
16783 byte. This allows us to handle all cases except UNEQ and
16787 case GE
: return ARM_GE
;
16788 case GT
: return ARM_GT
;
16789 case LE
: return ARM_LS
;
16790 case LT
: return ARM_MI
;
16791 case NE
: return ARM_NE
;
16792 case EQ
: return ARM_EQ
;
16793 case ORDERED
: return ARM_VC
;
16794 case UNORDERED
: return ARM_VS
;
16795 case UNLT
: return ARM_LT
;
16796 case UNLE
: return ARM_LE
;
16797 case UNGT
: return ARM_HI
;
16798 case UNGE
: return ARM_PL
;
16799 /* UNEQ and LTGT do not have a representation. */
16800 case UNEQ
: /* Fall through. */
16801 case LTGT
: /* Fall through. */
16802 default: gcc_unreachable ();
16808 case NE
: return ARM_NE
;
16809 case EQ
: return ARM_EQ
;
16810 case GE
: return ARM_LE
;
16811 case GT
: return ARM_LT
;
16812 case LE
: return ARM_GE
;
16813 case LT
: return ARM_GT
;
16814 case GEU
: return ARM_LS
;
16815 case GTU
: return ARM_CC
;
16816 case LEU
: return ARM_CS
;
16817 case LTU
: return ARM_HI
;
16818 default: gcc_unreachable ();
16824 case LTU
: return ARM_CS
;
16825 case GEU
: return ARM_CC
;
16826 default: gcc_unreachable ();
16832 case NE
: return ARM_NE
;
16833 case EQ
: return ARM_EQ
;
16834 case GEU
: return ARM_CS
;
16835 case GTU
: return ARM_HI
;
16836 case LEU
: return ARM_LS
;
16837 case LTU
: return ARM_CC
;
16838 default: gcc_unreachable ();
16844 case GE
: return ARM_GE
;
16845 case LT
: return ARM_LT
;
16846 case GEU
: return ARM_CS
;
16847 case LTU
: return ARM_CC
;
16848 default: gcc_unreachable ();
16854 case NE
: return ARM_NE
;
16855 case EQ
: return ARM_EQ
;
16856 case GE
: return ARM_GE
;
16857 case GT
: return ARM_GT
;
16858 case LE
: return ARM_LE
;
16859 case LT
: return ARM_LT
;
16860 case GEU
: return ARM_CS
;
16861 case GTU
: return ARM_HI
;
16862 case LEU
: return ARM_LS
;
16863 case LTU
: return ARM_CC
;
16864 default: gcc_unreachable ();
16867 default: gcc_unreachable ();
16871 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
16874 thumb2_final_prescan_insn (rtx insn
)
16876 rtx first_insn
= insn
;
16877 rtx body
= PATTERN (insn
);
16879 enum arm_cond_code code
;
16883 /* Remove the previous insn from the count of insns to be output. */
16884 if (arm_condexec_count
)
16885 arm_condexec_count
--;
16887 /* Nothing to do if we are already inside a conditional block. */
16888 if (arm_condexec_count
)
16891 if (GET_CODE (body
) != COND_EXEC
)
16894 /* Conditional jumps are implemented directly. */
16895 if (GET_CODE (insn
) == JUMP_INSN
)
16898 predicate
= COND_EXEC_TEST (body
);
16899 arm_current_cc
= get_arm_condition_code (predicate
);
16901 n
= get_attr_ce_count (insn
);
16902 arm_condexec_count
= 1;
16903 arm_condexec_mask
= (1 << n
) - 1;
16904 arm_condexec_masklen
= n
;
16905 /* See if subsequent instructions can be combined into the same block. */
16908 insn
= next_nonnote_insn (insn
);
16910 /* Jumping into the middle of an IT block is illegal, so a label or
16911 barrier terminates the block. */
16912 if (GET_CODE (insn
) != INSN
&& GET_CODE(insn
) != JUMP_INSN
)
16915 body
= PATTERN (insn
);
16916 /* USE and CLOBBER aren't really insns, so just skip them. */
16917 if (GET_CODE (body
) == USE
16918 || GET_CODE (body
) == CLOBBER
)
16921 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16922 if (GET_CODE (body
) != COND_EXEC
)
16924 /* Allow up to 4 conditionally executed instructions in a block. */
16925 n
= get_attr_ce_count (insn
);
16926 if (arm_condexec_masklen
+ n
> 4)
16929 predicate
= COND_EXEC_TEST (body
);
16930 code
= get_arm_condition_code (predicate
);
16931 mask
= (1 << n
) - 1;
16932 if (arm_current_cc
== code
)
16933 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
16934 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
16937 arm_condexec_count
++;
16938 arm_condexec_masklen
+= n
;
16940 /* A jump must be the last instruction in a conditional block. */
16941 if (GET_CODE(insn
) == JUMP_INSN
)
16944 /* Restore recog_data (getting the attributes of other insns can
16945 destroy this array, but final.c assumes that it remains intact
16946 across this call). */
16947 extract_constrain_insn_cached (first_insn
);
16951 arm_final_prescan_insn (rtx insn
)
16953 /* BODY will hold the body of INSN. */
16954 rtx body
= PATTERN (insn
);
16956 /* This will be 1 if trying to repeat the trick, and things need to be
16957 reversed if it appears to fail. */
16960 /* If we start with a return insn, we only succeed if we find another one. */
16961 int seeking_return
= 0;
16963 /* START_INSN will hold the insn from where we start looking. This is the
16964 first insn after the following code_label if REVERSE is true. */
16965 rtx start_insn
= insn
;
16967 /* If in state 4, check if the target branch is reached, in order to
16968 change back to state 0. */
16969 if (arm_ccfsm_state
== 4)
16971 if (insn
== arm_target_insn
)
16973 arm_target_insn
= NULL
;
16974 arm_ccfsm_state
= 0;
16979 /* If in state 3, it is possible to repeat the trick, if this insn is an
16980 unconditional branch to a label, and immediately following this branch
16981 is the previous target label which is only used once, and the label this
16982 branch jumps to is not too far off. */
16983 if (arm_ccfsm_state
== 3)
16985 if (simplejump_p (insn
))
16987 start_insn
= next_nonnote_insn (start_insn
);
16988 if (GET_CODE (start_insn
) == BARRIER
)
16990 /* XXX Isn't this always a barrier? */
16991 start_insn
= next_nonnote_insn (start_insn
);
16993 if (GET_CODE (start_insn
) == CODE_LABEL
16994 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
16995 && LABEL_NUSES (start_insn
) == 1)
17000 else if (GET_CODE (body
) == RETURN
)
17002 start_insn
= next_nonnote_insn (start_insn
);
17003 if (GET_CODE (start_insn
) == BARRIER
)
17004 start_insn
= next_nonnote_insn (start_insn
);
17005 if (GET_CODE (start_insn
) == CODE_LABEL
17006 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
17007 && LABEL_NUSES (start_insn
) == 1)
17010 seeking_return
= 1;
17019 gcc_assert (!arm_ccfsm_state
|| reverse
);
17020 if (GET_CODE (insn
) != JUMP_INSN
)
17023 /* This jump might be paralleled with a clobber of the condition codes
17024 the jump should always come first */
17025 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
17026 body
= XVECEXP (body
, 0, 0);
17029 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
17030 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
17033 int fail
= FALSE
, succeed
= FALSE
;
17034 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17035 int then_not_else
= TRUE
;
17036 rtx this_insn
= start_insn
, label
= 0;
17038 /* Register the insn jumped to. */
17041 if (!seeking_return
)
17042 label
= XEXP (SET_SRC (body
), 0);
17044 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
17045 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
17046 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
17048 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
17049 then_not_else
= FALSE
;
17051 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == RETURN
)
17052 seeking_return
= 1;
17053 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == RETURN
)
17055 seeking_return
= 1;
17056 then_not_else
= FALSE
;
17059 gcc_unreachable ();
17061 /* See how many insns this branch skips, and what kind of insns. If all
17062 insns are okay, and the label or unconditional branch to the same
17063 label is not too far away, succeed. */
17064 for (insns_skipped
= 0;
17065 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
17069 this_insn
= next_nonnote_insn (this_insn
);
17073 switch (GET_CODE (this_insn
))
17076 /* Succeed if it is the target label, otherwise fail since
17077 control falls in from somewhere else. */
17078 if (this_insn
== label
)
17080 arm_ccfsm_state
= 1;
17088 /* Succeed if the following insn is the target label.
17090 If return insns are used then the last insn in a function
17091 will be a barrier. */
17092 this_insn
= next_nonnote_insn (this_insn
);
17093 if (this_insn
&& this_insn
== label
)
17095 arm_ccfsm_state
= 1;
17103 /* The AAPCS says that conditional calls should not be
17104 used since they make interworking inefficient (the
17105 linker can't transform BL<cond> into BLX). That's
17106 only a problem if the machine has BLX. */
17113 /* Succeed if the following insn is the target label, or
17114 if the following two insns are a barrier and the
17116 this_insn
= next_nonnote_insn (this_insn
);
17117 if (this_insn
&& GET_CODE (this_insn
) == BARRIER
)
17118 this_insn
= next_nonnote_insn (this_insn
);
17120 if (this_insn
&& this_insn
== label
17121 && insns_skipped
< max_insns_skipped
)
17123 arm_ccfsm_state
= 1;
17131 /* If this is an unconditional branch to the same label, succeed.
17132 If it is to another label, do nothing. If it is conditional,
17134 /* XXX Probably, the tests for SET and the PC are
17137 scanbody
= PATTERN (this_insn
);
17138 if (GET_CODE (scanbody
) == SET
17139 && GET_CODE (SET_DEST (scanbody
)) == PC
)
17141 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
17142 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
17144 arm_ccfsm_state
= 2;
17147 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
17150 /* Fail if a conditional return is undesirable (e.g. on a
17151 StrongARM), but still allow this if optimizing for size. */
17152 else if (GET_CODE (scanbody
) == RETURN
17153 && !use_return_insn (TRUE
, NULL
)
17156 else if (GET_CODE (scanbody
) == RETURN
17159 arm_ccfsm_state
= 2;
17162 else if (GET_CODE (scanbody
) == PARALLEL
)
17164 switch (get_attr_conds (this_insn
))
17174 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
17179 /* Instructions using or affecting the condition codes make it
17181 scanbody
= PATTERN (this_insn
);
17182 if (!(GET_CODE (scanbody
) == SET
17183 || GET_CODE (scanbody
) == PARALLEL
)
17184 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
17187 /* A conditional cirrus instruction must be followed by
17188 a non Cirrus instruction. However, since we
17189 conditionalize instructions in this function and by
17190 the time we get here we can't add instructions
17191 (nops), because shorten_branches() has already been
17192 called, we will disable conditionalizing Cirrus
17193 instructions to be safe. */
17194 if (GET_CODE (scanbody
) != USE
17195 && GET_CODE (scanbody
) != CLOBBER
17196 && get_attr_cirrus (this_insn
) != CIRRUS_NOT
)
17206 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
17207 arm_target_label
= CODE_LABEL_NUMBER (label
);
17210 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
17212 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
17214 this_insn
= next_nonnote_insn (this_insn
);
17215 gcc_assert (!this_insn
17216 || (GET_CODE (this_insn
) != BARRIER
17217 && GET_CODE (this_insn
) != CODE_LABEL
));
17221 /* Oh, dear! we ran off the end.. give up. */
17222 extract_constrain_insn_cached (insn
);
17223 arm_ccfsm_state
= 0;
17224 arm_target_insn
= NULL
;
17227 arm_target_insn
= this_insn
;
17230 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17233 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
17235 if (reverse
|| then_not_else
)
17236 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
17239 /* Restore recog_data (getting the attributes of other insns can
17240 destroy this array, but final.c assumes that it remains intact
17241 across this call. */
17242 extract_constrain_insn_cached (insn
);
17246 /* Output IT instructions. */
17248 thumb2_asm_output_opcode (FILE * stream
)
17253 if (arm_condexec_mask
)
17255 for (n
= 0; n
< arm_condexec_masklen
; n
++)
17256 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
17258 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
17259 arm_condition_codes
[arm_current_cc
]);
17260 arm_condexec_mask
= 0;
17264 /* Returns true if REGNO is a valid register
17265 for holding a quantity of type MODE. */
17267 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
17269 if (GET_MODE_CLASS (mode
) == MODE_CC
)
17270 return (regno
== CC_REGNUM
17271 || (TARGET_HARD_FLOAT
&& TARGET_VFP
17272 && regno
== VFPCC_REGNUM
));
17275 /* For the Thumb we only allow values bigger than SImode in
17276 registers 0 - 6, so that there is always a second low
17277 register available to hold the upper part of the value.
17278 We probably we ought to ensure that the register is the
17279 start of an even numbered register pair. */
17280 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
17282 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
17283 && IS_CIRRUS_REGNUM (regno
))
17284 /* We have outlawed SI values in Cirrus registers because they
17285 reside in the lower 32 bits, but SF values reside in the
17286 upper 32 bits. This causes gcc all sorts of grief. We can't
17287 even split the registers into pairs because Cirrus SI values
17288 get sign extended to 64bits-- aldyh. */
17289 return (GET_MODE_CLASS (mode
) == MODE_FLOAT
) || (mode
== DImode
);
17291 if (TARGET_HARD_FLOAT
&& TARGET_VFP
17292 && IS_VFP_REGNUM (regno
))
17294 if (mode
== SFmode
|| mode
== SImode
)
17295 return VFP_REGNO_OK_FOR_SINGLE (regno
);
17297 if (mode
== DFmode
)
17298 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
17300 /* VFP registers can hold HFmode values, but there is no point in
17301 putting them there unless we have hardware conversion insns. */
17302 if (mode
== HFmode
)
17303 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
17306 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
17307 || (VALID_NEON_QREG_MODE (mode
)
17308 && NEON_REGNO_OK_FOR_QUAD (regno
))
17309 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
17310 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
17311 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
17312 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
17313 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
17318 if (TARGET_REALLY_IWMMXT
)
17320 if (IS_IWMMXT_GR_REGNUM (regno
))
17321 return mode
== SImode
;
17323 if (IS_IWMMXT_REGNUM (regno
))
17324 return VALID_IWMMXT_REG_MODE (mode
);
17327 /* We allow almost any value to be stored in the general registers.
17328 Restrict doubleword quantities to even register pairs so that we can
17329 use ldrd. Do not allow very large Neon structure opaque modes in
17330 general registers; they would use too many. */
17331 if (regno
<= LAST_ARM_REGNUM
)
17332 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
17333 && ARM_NUM_REGS (mode
) <= 4;
17335 if (regno
== FRAME_POINTER_REGNUM
17336 || regno
== ARG_POINTER_REGNUM
)
17337 /* We only allow integers in the fake hard registers. */
17338 return GET_MODE_CLASS (mode
) == MODE_INT
;
17340 /* The only registers left are the FPA registers
17341 which we only allow to hold FP values. */
17342 return (TARGET_HARD_FLOAT
&& TARGET_FPA
17343 && GET_MODE_CLASS (mode
) == MODE_FLOAT
17344 && regno
>= FIRST_FPA_REGNUM
17345 && regno
<= LAST_FPA_REGNUM
);
17348 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17349 not used in arm mode. */
17352 arm_regno_class (int regno
)
17356 if (regno
== STACK_POINTER_REGNUM
)
17358 if (regno
== CC_REGNUM
)
17365 if (TARGET_THUMB2
&& regno
< 8)
17368 if ( regno
<= LAST_ARM_REGNUM
17369 || regno
== FRAME_POINTER_REGNUM
17370 || regno
== ARG_POINTER_REGNUM
)
17371 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
17373 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
17374 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
17376 if (IS_CIRRUS_REGNUM (regno
))
17377 return CIRRUS_REGS
;
17379 if (IS_VFP_REGNUM (regno
))
17381 if (regno
<= D7_VFP_REGNUM
)
17382 return VFP_D0_D7_REGS
;
17383 else if (regno
<= LAST_LO_VFP_REGNUM
)
17384 return VFP_LO_REGS
;
17386 return VFP_HI_REGS
;
17389 if (IS_IWMMXT_REGNUM (regno
))
17390 return IWMMXT_REGS
;
17392 if (IS_IWMMXT_GR_REGNUM (regno
))
17393 return IWMMXT_GR_REGS
;
17398 /* Handle a special case when computing the offset
17399 of an argument from the frame pointer. */
17401 arm_debugger_arg_offset (int value
, rtx addr
)
17405 /* We are only interested if dbxout_parms() failed to compute the offset. */
17409 /* We can only cope with the case where the address is held in a register. */
17410 if (GET_CODE (addr
) != REG
)
17413 /* If we are using the frame pointer to point at the argument, then
17414 an offset of 0 is correct. */
17415 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
17418 /* If we are using the stack pointer to point at the
17419 argument, then an offset of 0 is correct. */
17420 /* ??? Check this is consistent with thumb2 frame layout. */
17421 if ((TARGET_THUMB
|| !frame_pointer_needed
)
17422 && REGNO (addr
) == SP_REGNUM
)
17425 /* Oh dear. The argument is pointed to by a register rather
17426 than being held in a register, or being stored at a known
17427 offset from the frame pointer. Since GDB only understands
17428 those two kinds of argument we must translate the address
17429 held in the register into an offset from the frame pointer.
17430 We do this by searching through the insns for the function
17431 looking to see where this register gets its value. If the
17432 register is initialized from the frame pointer plus an offset
17433 then we are in luck and we can continue, otherwise we give up.
17435 This code is exercised by producing debugging information
17436 for a function with arguments like this:
17438 double func (double a, double b, int c, double d) {return d;}
17440 Without this code the stab for parameter 'd' will be set to
17441 an offset of 0 from the frame pointer, rather than 8. */
17443 /* The if() statement says:
17445 If the insn is a normal instruction
17446 and if the insn is setting the value in a register
17447 and if the register being set is the register holding the address of the argument
17448 and if the address is computing by an addition
17449 that involves adding to a register
17450 which is the frame pointer
17455 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
17457 if ( GET_CODE (insn
) == INSN
17458 && GET_CODE (PATTERN (insn
)) == SET
17459 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
17460 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
17461 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 0)) == REG
17462 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17463 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 1)) == CONST_INT
17466 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
17475 warning (0, "unable to compute real location of stacked parameter");
17476 value
= 8; /* XXX magic hack */
17482 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17485 if ((MASK) & insn_flags) \
17486 add_builtin_function ((NAME), (TYPE), (CODE), \
17487 BUILT_IN_MD, NULL, NULL_TREE); \
17491 struct builtin_description
17493 const unsigned int mask
;
17494 const enum insn_code icode
;
17495 const char * const name
;
17496 const enum arm_builtins code
;
17497 const enum rtx_code comparison
;
17498 const unsigned int flag
;
17501 static const struct builtin_description bdesc_2arg
[] =
17503 #define IWMMXT_BUILTIN(code, string, builtin) \
17504 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17505 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17507 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
17508 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
17509 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
17510 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
17511 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
17512 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
17513 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
17514 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
17515 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
17516 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
17517 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
17518 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
17519 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
17520 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
17521 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
17522 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
17523 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
17524 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
17525 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
17526 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
17527 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
17528 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
17529 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
17530 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
17531 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
17532 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
17533 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
17534 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
17535 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
17536 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
17537 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
17538 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
17539 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
17540 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
17541 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
17542 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
17543 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
17544 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
17545 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
17546 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
17547 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
17548 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
17549 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
17550 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
17551 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
17552 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
17553 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
17554 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
17555 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
17556 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
17557 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
17558 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
17559 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
17560 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
17561 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
17562 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
17563 IWMMXT_BUILTIN (iwmmxt_wmadds
, "wmadds", WMADDS
)
17564 IWMMXT_BUILTIN (iwmmxt_wmaddu
, "wmaddu", WMADDU
)
17566 #define IWMMXT_BUILTIN2(code, builtin) \
17567 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17569 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
17570 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
17571 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
17572 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
17573 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
17574 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
17575 IWMMXT_BUILTIN2 (ashlv4hi3_di
, WSLLH
)
17576 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt
, WSLLHI
)
17577 IWMMXT_BUILTIN2 (ashlv2si3_di
, WSLLW
)
17578 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt
, WSLLWI
)
17579 IWMMXT_BUILTIN2 (ashldi3_di
, WSLLD
)
17580 IWMMXT_BUILTIN2 (ashldi3_iwmmxt
, WSLLDI
)
17581 IWMMXT_BUILTIN2 (lshrv4hi3_di
, WSRLH
)
17582 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt
, WSRLHI
)
17583 IWMMXT_BUILTIN2 (lshrv2si3_di
, WSRLW
)
17584 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt
, WSRLWI
)
17585 IWMMXT_BUILTIN2 (lshrdi3_di
, WSRLD
)
17586 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt
, WSRLDI
)
17587 IWMMXT_BUILTIN2 (ashrv4hi3_di
, WSRAH
)
17588 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt
, WSRAHI
)
17589 IWMMXT_BUILTIN2 (ashrv2si3_di
, WSRAW
)
17590 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt
, WSRAWI
)
17591 IWMMXT_BUILTIN2 (ashrdi3_di
, WSRAD
)
17592 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt
, WSRADI
)
17593 IWMMXT_BUILTIN2 (rorv4hi3_di
, WRORH
)
17594 IWMMXT_BUILTIN2 (rorv4hi3
, WRORHI
)
17595 IWMMXT_BUILTIN2 (rorv2si3_di
, WRORW
)
17596 IWMMXT_BUILTIN2 (rorv2si3
, WRORWI
)
17597 IWMMXT_BUILTIN2 (rordi3_di
, WRORD
)
17598 IWMMXT_BUILTIN2 (rordi3
, WRORDI
)
17599 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
17600 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
17603 static const struct builtin_description bdesc_1arg
[] =
17605 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
17606 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
17607 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
17608 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
17609 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
17610 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
17611 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
17612 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
17613 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
17614 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
17615 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
17616 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
17617 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
17618 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
17619 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
17620 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
17621 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
17622 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
17625 /* Set up all the iWMMXt builtins. This is
17626 not called if TARGET_IWMMXT is zero. */
17629 arm_init_iwmmxt_builtins (void)
17631 const struct builtin_description
* d
;
17633 tree endlink
= void_list_node
;
17635 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
17636 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
17637 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
17640 = build_function_type (integer_type_node
,
17641 tree_cons (NULL_TREE
, integer_type_node
, endlink
));
17642 tree v8qi_ftype_v8qi_v8qi_int
17643 = build_function_type (V8QI_type_node
,
17644 tree_cons (NULL_TREE
, V8QI_type_node
,
17645 tree_cons (NULL_TREE
, V8QI_type_node
,
17646 tree_cons (NULL_TREE
,
17649 tree v4hi_ftype_v4hi_int
17650 = build_function_type (V4HI_type_node
,
17651 tree_cons (NULL_TREE
, V4HI_type_node
,
17652 tree_cons (NULL_TREE
, integer_type_node
,
17654 tree v2si_ftype_v2si_int
17655 = build_function_type (V2SI_type_node
,
17656 tree_cons (NULL_TREE
, V2SI_type_node
,
17657 tree_cons (NULL_TREE
, integer_type_node
,
17659 tree v2si_ftype_di_di
17660 = build_function_type (V2SI_type_node
,
17661 tree_cons (NULL_TREE
, long_long_integer_type_node
,
17662 tree_cons (NULL_TREE
, long_long_integer_type_node
,
17664 tree di_ftype_di_int
17665 = build_function_type (long_long_integer_type_node
,
17666 tree_cons (NULL_TREE
, long_long_integer_type_node
,
17667 tree_cons (NULL_TREE
, integer_type_node
,
17669 tree di_ftype_di_int_int
17670 = build_function_type (long_long_integer_type_node
,
17671 tree_cons (NULL_TREE
, long_long_integer_type_node
,
17672 tree_cons (NULL_TREE
, integer_type_node
,
17673 tree_cons (NULL_TREE
,
17676 tree int_ftype_v8qi
17677 = build_function_type (integer_type_node
,
17678 tree_cons (NULL_TREE
, V8QI_type_node
,
17680 tree int_ftype_v4hi
17681 = build_function_type (integer_type_node
,
17682 tree_cons (NULL_TREE
, V4HI_type_node
,
17684 tree int_ftype_v2si
17685 = build_function_type (integer_type_node
,
17686 tree_cons (NULL_TREE
, V2SI_type_node
,
17688 tree int_ftype_v8qi_int
17689 = build_function_type (integer_type_node
,
17690 tree_cons (NULL_TREE
, V8QI_type_node
,
17691 tree_cons (NULL_TREE
, integer_type_node
,
17693 tree int_ftype_v4hi_int
17694 = build_function_type (integer_type_node
,
17695 tree_cons (NULL_TREE
, V4HI_type_node
,
17696 tree_cons (NULL_TREE
, integer_type_node
,
17698 tree int_ftype_v2si_int
17699 = build_function_type (integer_type_node
,
17700 tree_cons (NULL_TREE
, V2SI_type_node
,
17701 tree_cons (NULL_TREE
, integer_type_node
,
17703 tree v8qi_ftype_v8qi_int_int
17704 = build_function_type (V8QI_type_node
,
17705 tree_cons (NULL_TREE
, V8QI_type_node
,
17706 tree_cons (NULL_TREE
, integer_type_node
,
17707 tree_cons (NULL_TREE
,
17710 tree v4hi_ftype_v4hi_int_int
17711 = build_function_type (V4HI_type_node
,
17712 tree_cons (NULL_TREE
, V4HI_type_node
,
17713 tree_cons (NULL_TREE
, integer_type_node
,
17714 tree_cons (NULL_TREE
,
17717 tree v2si_ftype_v2si_int_int
17718 = build_function_type (V2SI_type_node
,
17719 tree_cons (NULL_TREE
, V2SI_type_node
,
17720 tree_cons (NULL_TREE
, integer_type_node
,
17721 tree_cons (NULL_TREE
,
17724 /* Miscellaneous. */
17725 tree v8qi_ftype_v4hi_v4hi
17726 = build_function_type (V8QI_type_node
,
17727 tree_cons (NULL_TREE
, V4HI_type_node
,
17728 tree_cons (NULL_TREE
, V4HI_type_node
,
17730 tree v4hi_ftype_v2si_v2si
17731 = build_function_type (V4HI_type_node
,
17732 tree_cons (NULL_TREE
, V2SI_type_node
,
17733 tree_cons (NULL_TREE
, V2SI_type_node
,
17735 tree v2si_ftype_v4hi_v4hi
17736 = build_function_type (V2SI_type_node
,
17737 tree_cons (NULL_TREE
, V4HI_type_node
,
17738 tree_cons (NULL_TREE
, V4HI_type_node
,
17740 tree v2si_ftype_v8qi_v8qi
17741 = build_function_type (V2SI_type_node
,
17742 tree_cons (NULL_TREE
, V8QI_type_node
,
17743 tree_cons (NULL_TREE
, V8QI_type_node
,
17745 tree v4hi_ftype_v4hi_di
17746 = build_function_type (V4HI_type_node
,
17747 tree_cons (NULL_TREE
, V4HI_type_node
,
17748 tree_cons (NULL_TREE
,
17749 long_long_integer_type_node
,
17751 tree v2si_ftype_v2si_di
17752 = build_function_type (V2SI_type_node
,
17753 tree_cons (NULL_TREE
, V2SI_type_node
,
17754 tree_cons (NULL_TREE
,
17755 long_long_integer_type_node
,
17757 tree void_ftype_int_int
17758 = build_function_type (void_type_node
,
17759 tree_cons (NULL_TREE
, integer_type_node
,
17760 tree_cons (NULL_TREE
, integer_type_node
,
17763 = build_function_type (long_long_unsigned_type_node
, endlink
);
17765 = build_function_type (long_long_integer_type_node
,
17766 tree_cons (NULL_TREE
, V8QI_type_node
,
17769 = build_function_type (long_long_integer_type_node
,
17770 tree_cons (NULL_TREE
, V4HI_type_node
,
17773 = build_function_type (long_long_integer_type_node
,
17774 tree_cons (NULL_TREE
, V2SI_type_node
,
17776 tree v2si_ftype_v4hi
17777 = build_function_type (V2SI_type_node
,
17778 tree_cons (NULL_TREE
, V4HI_type_node
,
17780 tree v4hi_ftype_v8qi
17781 = build_function_type (V4HI_type_node
,
17782 tree_cons (NULL_TREE
, V8QI_type_node
,
17785 tree di_ftype_di_v4hi_v4hi
17786 = build_function_type (long_long_unsigned_type_node
,
17787 tree_cons (NULL_TREE
,
17788 long_long_unsigned_type_node
,
17789 tree_cons (NULL_TREE
, V4HI_type_node
,
17790 tree_cons (NULL_TREE
,
17794 tree di_ftype_v4hi_v4hi
17795 = build_function_type (long_long_unsigned_type_node
,
17796 tree_cons (NULL_TREE
, V4HI_type_node
,
17797 tree_cons (NULL_TREE
, V4HI_type_node
,
17800 /* Normal vector binops. */
17801 tree v8qi_ftype_v8qi_v8qi
17802 = build_function_type (V8QI_type_node
,
17803 tree_cons (NULL_TREE
, V8QI_type_node
,
17804 tree_cons (NULL_TREE
, V8QI_type_node
,
17806 tree v4hi_ftype_v4hi_v4hi
17807 = build_function_type (V4HI_type_node
,
17808 tree_cons (NULL_TREE
, V4HI_type_node
,
17809 tree_cons (NULL_TREE
, V4HI_type_node
,
17811 tree v2si_ftype_v2si_v2si
17812 = build_function_type (V2SI_type_node
,
17813 tree_cons (NULL_TREE
, V2SI_type_node
,
17814 tree_cons (NULL_TREE
, V2SI_type_node
,
17816 tree di_ftype_di_di
17817 = build_function_type (long_long_unsigned_type_node
,
17818 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
17819 tree_cons (NULL_TREE
,
17820 long_long_unsigned_type_node
,
17823 /* Add all builtins that are more or less simple operations on two
17825 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
17827 /* Use one of the operands; the target can have a different mode for
17828 mask-generating compares. */
17829 enum machine_mode mode
;
17835 mode
= insn_data
[d
->icode
].operand
[1].mode
;
17840 type
= v8qi_ftype_v8qi_v8qi
;
17843 type
= v4hi_ftype_v4hi_v4hi
;
17846 type
= v2si_ftype_v2si_v2si
;
17849 type
= di_ftype_di_di
;
17853 gcc_unreachable ();
17856 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
17859 /* Add the remaining MMX insns with somewhat more complicated types. */
17860 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wzero", di_ftype_void
, ARM_BUILTIN_WZERO
);
17861 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_setwcx", void_ftype_int_int
, ARM_BUILTIN_SETWCX
);
17862 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_getwcx", int_ftype_int
, ARM_BUILTIN_GETWCX
);
17864 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSLLH
);
17865 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSLLW
);
17866 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wslld", di_ftype_di_di
, ARM_BUILTIN_WSLLD
);
17867 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSLLHI
);
17868 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSLLWI
);
17869 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wslldi", di_ftype_di_int
, ARM_BUILTIN_WSLLDI
);
17871 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSRLH
);
17872 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSRLW
);
17873 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrld", di_ftype_di_di
, ARM_BUILTIN_WSRLD
);
17874 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSRLHI
);
17875 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSRLWI
);
17876 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrldi", di_ftype_di_int
, ARM_BUILTIN_WSRLDI
);
17878 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSRAH
);
17879 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsraw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSRAW
);
17880 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrad", di_ftype_di_di
, ARM_BUILTIN_WSRAD
);
17881 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSRAHI
);
17882 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrawi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSRAWI
);
17883 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsradi", di_ftype_di_int
, ARM_BUILTIN_WSRADI
);
17885 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WRORH
);
17886 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorw", v2si_ftype_v2si_di
, ARM_BUILTIN_WRORW
);
17887 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrord", di_ftype_di_di
, ARM_BUILTIN_WRORD
);
17888 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WRORHI
);
17889 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WRORWI
);
17890 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrordi", di_ftype_di_int
, ARM_BUILTIN_WRORDI
);
17892 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSHUFH
);
17894 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi
, ARM_BUILTIN_WSADB
);
17895 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi
, ARM_BUILTIN_WSADH
);
17896 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi
, ARM_BUILTIN_WSADBZ
);
17897 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi
, ARM_BUILTIN_WSADHZ
);
17899 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsb", int_ftype_v8qi_int
, ARM_BUILTIN_TEXTRMSB
);
17900 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsh", int_ftype_v4hi_int
, ARM_BUILTIN_TEXTRMSH
);
17901 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsw", int_ftype_v2si_int
, ARM_BUILTIN_TEXTRMSW
);
17902 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmub", int_ftype_v8qi_int
, ARM_BUILTIN_TEXTRMUB
);
17903 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmuh", int_ftype_v4hi_int
, ARM_BUILTIN_TEXTRMUH
);
17904 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmuw", int_ftype_v2si_int
, ARM_BUILTIN_TEXTRMUW
);
17905 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int
, ARM_BUILTIN_TINSRB
);
17906 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int
, ARM_BUILTIN_TINSRH
);
17907 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int
, ARM_BUILTIN_TINSRW
);
17909 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_waccb", di_ftype_v8qi
, ARM_BUILTIN_WACCB
);
17910 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wacch", di_ftype_v4hi
, ARM_BUILTIN_WACCH
);
17911 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_waccw", di_ftype_v2si
, ARM_BUILTIN_WACCW
);
17913 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskb", int_ftype_v8qi
, ARM_BUILTIN_TMOVMSKB
);
17914 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskh", int_ftype_v4hi
, ARM_BUILTIN_TMOVMSKH
);
17915 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskw", int_ftype_v2si
, ARM_BUILTIN_TMOVMSKW
);
17917 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi
, ARM_BUILTIN_WPACKHSS
);
17918 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi
, ARM_BUILTIN_WPACKHUS
);
17919 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si
, ARM_BUILTIN_WPACKWUS
);
17920 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si
, ARM_BUILTIN_WPACKWSS
);
17921 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackdus", v2si_ftype_di_di
, ARM_BUILTIN_WPACKDUS
);
17922 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackdss", v2si_ftype_di_di
, ARM_BUILTIN_WPACKDSS
);
17924 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKEHUB
);
17925 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKEHUH
);
17926 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehuw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKEHUW
);
17927 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKEHSB
);
17928 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKEHSH
);
17929 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKEHSW
);
17930 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKELUB
);
17931 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKELUH
);
17932 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckeluw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKELUW
);
17933 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKELSB
);
17934 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKELSH
);
17935 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKELSW
);
17937 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi
, ARM_BUILTIN_WMACS
);
17938 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi
, ARM_BUILTIN_WMACSZ
);
17939 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi
, ARM_BUILTIN_WMACU
);
17940 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi
, ARM_BUILTIN_WMACUZ
);
17942 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int
, ARM_BUILTIN_WALIGN
);
17943 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmia", di_ftype_di_int_int
, ARM_BUILTIN_TMIA
);
17944 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiaph", di_ftype_di_int_int
, ARM_BUILTIN_TMIAPH
);
17945 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiabb", di_ftype_di_int_int
, ARM_BUILTIN_TMIABB
);
17946 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiabt", di_ftype_di_int_int
, ARM_BUILTIN_TMIABT
);
17947 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiatb", di_ftype_di_int_int
, ARM_BUILTIN_TMIATB
);
17948 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiatt", di_ftype_di_int_int
, ARM_BUILTIN_TMIATT
);
17952 arm_init_tls_builtins (void)
17956 ftype
= build_function_type (ptr_type_node
, void_list_node
);
17957 decl
= add_builtin_function ("__builtin_thread_pointer", ftype
,
17958 ARM_BUILTIN_THREAD_POINTER
, BUILT_IN_MD
,
17960 TREE_NOTHROW (decl
) = 1;
17961 TREE_READONLY (decl
) = 1;
17964 enum neon_builtin_type_bits
{
17980 #define v8qi_UP T_V8QI
17981 #define v4hi_UP T_V4HI
17982 #define v2si_UP T_V2SI
17983 #define v2sf_UP T_V2SF
17985 #define v16qi_UP T_V16QI
17986 #define v8hi_UP T_V8HI
17987 #define v4si_UP T_V4SI
17988 #define v4sf_UP T_V4SF
17989 #define v2di_UP T_V2DI
17994 #define UP(X) X##_UP
18029 NEON_LOADSTRUCTLANE
,
18031 NEON_STORESTRUCTLANE
,
18040 const neon_itype itype
;
18042 const enum insn_code codes
[T_MAX
];
18043 const unsigned int num_vars
;
18044 unsigned int base_fcode
;
18045 } neon_builtin_datum
;
18047 #define CF(N,X) CODE_FOR_neon_##N##X
18049 #define VAR1(T, N, A) \
18050 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18051 #define VAR2(T, N, A, B) \
18052 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18053 #define VAR3(T, N, A, B, C) \
18054 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18055 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18056 #define VAR4(T, N, A, B, C, D) \
18057 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18058 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18059 #define VAR5(T, N, A, B, C, D, E) \
18060 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18061 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18062 #define VAR6(T, N, A, B, C, D, E, F) \
18063 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18064 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18065 #define VAR7(T, N, A, B, C, D, E, F, G) \
18066 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18067 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18069 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18070 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18072 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18073 CF (N, G), CF (N, H) }, 8, 0
18074 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18075 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18076 | UP (H) | UP (I), \
18077 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18078 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18079 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18080 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18081 | UP (H) | UP (I) | UP (J), \
18082 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18083 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18085 /* The mode entries in the following table correspond to the "key" type of the
18086 instruction variant, i.e. equivalent to that which would be specified after
18087 the assembler mnemonic, which usually refers to the last vector operand.
18088 (Signed/unsigned/polynomial types are not differentiated between though, and
18089 are all mapped onto the same mode for a given element size.) The modes
18090 listed per instruction should be the same as those defined for that
18091 instruction's pattern in neon.md.
18092 WARNING: Variants should be listed in the same increasing order as
18093 neon_builtin_type_bits. */
18095 static neon_builtin_datum neon_builtin_data
[] =
18097 { VAR10 (BINOP
, vadd
,
18098 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18099 { VAR3 (BINOP
, vaddl
, v8qi
, v4hi
, v2si
) },
18100 { VAR3 (BINOP
, vaddw
, v8qi
, v4hi
, v2si
) },
18101 { VAR6 (BINOP
, vhadd
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18102 { VAR8 (BINOP
, vqadd
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18103 { VAR3 (BINOP
, vaddhn
, v8hi
, v4si
, v2di
) },
18104 { VAR8 (BINOP
, vmul
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18105 { VAR8 (TERNOP
, vmla
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18106 { VAR3 (TERNOP
, vmlal
, v8qi
, v4hi
, v2si
) },
18107 { VAR8 (TERNOP
, vmls
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18108 { VAR3 (TERNOP
, vmlsl
, v8qi
, v4hi
, v2si
) },
18109 { VAR4 (BINOP
, vqdmulh
, v4hi
, v2si
, v8hi
, v4si
) },
18110 { VAR2 (TERNOP
, vqdmlal
, v4hi
, v2si
) },
18111 { VAR2 (TERNOP
, vqdmlsl
, v4hi
, v2si
) },
18112 { VAR3 (BINOP
, vmull
, v8qi
, v4hi
, v2si
) },
18113 { VAR2 (SCALARMULL
, vmull_n
, v4hi
, v2si
) },
18114 { VAR2 (LANEMULL
, vmull_lane
, v4hi
, v2si
) },
18115 { VAR2 (SCALARMULL
, vqdmull_n
, v4hi
, v2si
) },
18116 { VAR2 (LANEMULL
, vqdmull_lane
, v4hi
, v2si
) },
18117 { VAR4 (SCALARMULH
, vqdmulh_n
, v4hi
, v2si
, v8hi
, v4si
) },
18118 { VAR4 (LANEMULH
, vqdmulh_lane
, v4hi
, v2si
, v8hi
, v4si
) },
18119 { VAR2 (BINOP
, vqdmull
, v4hi
, v2si
) },
18120 { VAR8 (BINOP
, vshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18121 { VAR8 (BINOP
, vqshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18122 { VAR8 (SHIFTIMM
, vshr_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18123 { VAR3 (SHIFTIMM
, vshrn_n
, v8hi
, v4si
, v2di
) },
18124 { VAR3 (SHIFTIMM
, vqshrn_n
, v8hi
, v4si
, v2di
) },
18125 { VAR3 (SHIFTIMM
, vqshrun_n
, v8hi
, v4si
, v2di
) },
18126 { VAR8 (SHIFTIMM
, vshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18127 { VAR8 (SHIFTIMM
, vqshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18128 { VAR8 (SHIFTIMM
, vqshlu_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18129 { VAR3 (SHIFTIMM
, vshll_n
, v8qi
, v4hi
, v2si
) },
18130 { VAR8 (SHIFTACC
, vsra_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18131 { VAR10 (BINOP
, vsub
,
18132 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18133 { VAR3 (BINOP
, vsubl
, v8qi
, v4hi
, v2si
) },
18134 { VAR3 (BINOP
, vsubw
, v8qi
, v4hi
, v2si
) },
18135 { VAR8 (BINOP
, vqsub
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18136 { VAR6 (BINOP
, vhsub
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18137 { VAR3 (BINOP
, vsubhn
, v8hi
, v4si
, v2di
) },
18138 { VAR8 (BINOP
, vceq
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18139 { VAR8 (BINOP
, vcge
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18140 { VAR8 (BINOP
, vcgt
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18141 { VAR2 (BINOP
, vcage
, v2sf
, v4sf
) },
18142 { VAR2 (BINOP
, vcagt
, v2sf
, v4sf
) },
18143 { VAR6 (BINOP
, vtst
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18144 { VAR8 (BINOP
, vabd
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18145 { VAR3 (BINOP
, vabdl
, v8qi
, v4hi
, v2si
) },
18146 { VAR6 (TERNOP
, vaba
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18147 { VAR3 (TERNOP
, vabal
, v8qi
, v4hi
, v2si
) },
18148 { VAR8 (BINOP
, vmax
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18149 { VAR8 (BINOP
, vmin
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18150 { VAR4 (BINOP
, vpadd
, v8qi
, v4hi
, v2si
, v2sf
) },
18151 { VAR6 (UNOP
, vpaddl
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18152 { VAR6 (BINOP
, vpadal
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18153 { VAR4 (BINOP
, vpmax
, v8qi
, v4hi
, v2si
, v2sf
) },
18154 { VAR4 (BINOP
, vpmin
, v8qi
, v4hi
, v2si
, v2sf
) },
18155 { VAR2 (BINOP
, vrecps
, v2sf
, v4sf
) },
18156 { VAR2 (BINOP
, vrsqrts
, v2sf
, v4sf
) },
18157 { VAR8 (SHIFTINSERT
, vsri_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18158 { VAR8 (SHIFTINSERT
, vsli_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18159 { VAR8 (UNOP
, vabs
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18160 { VAR6 (UNOP
, vqabs
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18161 { VAR8 (UNOP
, vneg
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18162 { VAR6 (UNOP
, vqneg
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18163 { VAR6 (UNOP
, vcls
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18164 { VAR6 (UNOP
, vclz
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18165 { VAR2 (UNOP
, vcnt
, v8qi
, v16qi
) },
18166 { VAR4 (UNOP
, vrecpe
, v2si
, v2sf
, v4si
, v4sf
) },
18167 { VAR4 (UNOP
, vrsqrte
, v2si
, v2sf
, v4si
, v4sf
) },
18168 { VAR6 (UNOP
, vmvn
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18169 /* FIXME: vget_lane supports more variants than this! */
18170 { VAR10 (GETLANE
, vget_lane
,
18171 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18172 { VAR10 (SETLANE
, vset_lane
,
18173 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18174 { VAR5 (CREATE
, vcreate
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18175 { VAR10 (DUP
, vdup_n
,
18176 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18177 { VAR10 (DUPLANE
, vdup_lane
,
18178 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18179 { VAR5 (COMBINE
, vcombine
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18180 { VAR5 (SPLIT
, vget_high
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18181 { VAR5 (SPLIT
, vget_low
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18182 { VAR3 (UNOP
, vmovn
, v8hi
, v4si
, v2di
) },
18183 { VAR3 (UNOP
, vqmovn
, v8hi
, v4si
, v2di
) },
18184 { VAR3 (UNOP
, vqmovun
, v8hi
, v4si
, v2di
) },
18185 { VAR3 (UNOP
, vmovl
, v8qi
, v4hi
, v2si
) },
18186 { VAR6 (LANEMUL
, vmul_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18187 { VAR6 (LANEMAC
, vmla_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18188 { VAR2 (LANEMAC
, vmlal_lane
, v4hi
, v2si
) },
18189 { VAR2 (LANEMAC
, vqdmlal_lane
, v4hi
, v2si
) },
18190 { VAR6 (LANEMAC
, vmls_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18191 { VAR2 (LANEMAC
, vmlsl_lane
, v4hi
, v2si
) },
18192 { VAR2 (LANEMAC
, vqdmlsl_lane
, v4hi
, v2si
) },
18193 { VAR6 (SCALARMUL
, vmul_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18194 { VAR6 (SCALARMAC
, vmla_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18195 { VAR2 (SCALARMAC
, vmlal_n
, v4hi
, v2si
) },
18196 { VAR2 (SCALARMAC
, vqdmlal_n
, v4hi
, v2si
) },
18197 { VAR6 (SCALARMAC
, vmls_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18198 { VAR2 (SCALARMAC
, vmlsl_n
, v4hi
, v2si
) },
18199 { VAR2 (SCALARMAC
, vqdmlsl_n
, v4hi
, v2si
) },
18200 { VAR10 (BINOP
, vext
,
18201 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18202 { VAR8 (UNOP
, vrev64
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18203 { VAR4 (UNOP
, vrev32
, v8qi
, v4hi
, v16qi
, v8hi
) },
18204 { VAR2 (UNOP
, vrev16
, v8qi
, v16qi
) },
18205 { VAR4 (CONVERT
, vcvt
, v2si
, v2sf
, v4si
, v4sf
) },
18206 { VAR4 (FIXCONV
, vcvt_n
, v2si
, v2sf
, v4si
, v4sf
) },
18207 { VAR10 (SELECT
, vbsl
,
18208 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18209 { VAR1 (VTBL
, vtbl1
, v8qi
) },
18210 { VAR1 (VTBL
, vtbl2
, v8qi
) },
18211 { VAR1 (VTBL
, vtbl3
, v8qi
) },
18212 { VAR1 (VTBL
, vtbl4
, v8qi
) },
18213 { VAR1 (VTBX
, vtbx1
, v8qi
) },
18214 { VAR1 (VTBX
, vtbx2
, v8qi
) },
18215 { VAR1 (VTBX
, vtbx3
, v8qi
) },
18216 { VAR1 (VTBX
, vtbx4
, v8qi
) },
18217 { VAR8 (RESULTPAIR
, vtrn
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18218 { VAR8 (RESULTPAIR
, vzip
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18219 { VAR8 (RESULTPAIR
, vuzp
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18220 { VAR5 (REINTERP
, vreinterpretv8qi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18221 { VAR5 (REINTERP
, vreinterpretv4hi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18222 { VAR5 (REINTERP
, vreinterpretv2si
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18223 { VAR5 (REINTERP
, vreinterpretv2sf
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18224 { VAR5 (REINTERP
, vreinterpretdi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18225 { VAR5 (REINTERP
, vreinterpretv16qi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18226 { VAR5 (REINTERP
, vreinterpretv8hi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18227 { VAR5 (REINTERP
, vreinterpretv4si
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18228 { VAR5 (REINTERP
, vreinterpretv4sf
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18229 { VAR5 (REINTERP
, vreinterpretv2di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18230 { VAR10 (LOAD1
, vld1
,
18231 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18232 { VAR10 (LOAD1LANE
, vld1_lane
,
18233 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18234 { VAR10 (LOAD1
, vld1_dup
,
18235 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18236 { VAR10 (STORE1
, vst1
,
18237 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18238 { VAR10 (STORE1LANE
, vst1_lane
,
18239 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18240 { VAR9 (LOADSTRUCT
,
18241 vld2
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18242 { VAR7 (LOADSTRUCTLANE
, vld2_lane
,
18243 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18244 { VAR5 (LOADSTRUCT
, vld2_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18245 { VAR9 (STORESTRUCT
, vst2
,
18246 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18247 { VAR7 (STORESTRUCTLANE
, vst2_lane
,
18248 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18249 { VAR9 (LOADSTRUCT
,
18250 vld3
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18251 { VAR7 (LOADSTRUCTLANE
, vld3_lane
,
18252 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18253 { VAR5 (LOADSTRUCT
, vld3_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18254 { VAR9 (STORESTRUCT
, vst3
,
18255 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18256 { VAR7 (STORESTRUCTLANE
, vst3_lane
,
18257 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18258 { VAR9 (LOADSTRUCT
, vld4
,
18259 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18260 { VAR7 (LOADSTRUCTLANE
, vld4_lane
,
18261 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18262 { VAR5 (LOADSTRUCT
, vld4_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18263 { VAR9 (STORESTRUCT
, vst4
,
18264 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18265 { VAR7 (STORESTRUCTLANE
, vst4_lane
,
18266 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18267 { VAR10 (LOGICBINOP
, vand
,
18268 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18269 { VAR10 (LOGICBINOP
, vorr
,
18270 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18271 { VAR10 (BINOP
, veor
,
18272 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18273 { VAR10 (LOGICBINOP
, vbic
,
18274 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18275 { VAR10 (LOGICBINOP
, vorn
,
18276 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) }
18292 arm_init_neon_builtins (void)
18294 unsigned int i
, fcode
= ARM_BUILTIN_NEON_BASE
;
18296 tree neon_intQI_type_node
;
18297 tree neon_intHI_type_node
;
18298 tree neon_polyQI_type_node
;
18299 tree neon_polyHI_type_node
;
18300 tree neon_intSI_type_node
;
18301 tree neon_intDI_type_node
;
18302 tree neon_float_type_node
;
18304 tree intQI_pointer_node
;
18305 tree intHI_pointer_node
;
18306 tree intSI_pointer_node
;
18307 tree intDI_pointer_node
;
18308 tree float_pointer_node
;
18310 tree const_intQI_node
;
18311 tree const_intHI_node
;
18312 tree const_intSI_node
;
18313 tree const_intDI_node
;
18314 tree const_float_node
;
18316 tree const_intQI_pointer_node
;
18317 tree const_intHI_pointer_node
;
18318 tree const_intSI_pointer_node
;
18319 tree const_intDI_pointer_node
;
18320 tree const_float_pointer_node
;
18322 tree V8QI_type_node
;
18323 tree V4HI_type_node
;
18324 tree V2SI_type_node
;
18325 tree V2SF_type_node
;
18326 tree V16QI_type_node
;
18327 tree V8HI_type_node
;
18328 tree V4SI_type_node
;
18329 tree V4SF_type_node
;
18330 tree V2DI_type_node
;
18332 tree intUQI_type_node
;
18333 tree intUHI_type_node
;
18334 tree intUSI_type_node
;
18335 tree intUDI_type_node
;
18337 tree intEI_type_node
;
18338 tree intOI_type_node
;
18339 tree intCI_type_node
;
18340 tree intXI_type_node
;
18342 tree V8QI_pointer_node
;
18343 tree V4HI_pointer_node
;
18344 tree V2SI_pointer_node
;
18345 tree V2SF_pointer_node
;
18346 tree V16QI_pointer_node
;
18347 tree V8HI_pointer_node
;
18348 tree V4SI_pointer_node
;
18349 tree V4SF_pointer_node
;
18350 tree V2DI_pointer_node
;
18352 tree void_ftype_pv8qi_v8qi_v8qi
;
18353 tree void_ftype_pv4hi_v4hi_v4hi
;
18354 tree void_ftype_pv2si_v2si_v2si
;
18355 tree void_ftype_pv2sf_v2sf_v2sf
;
18356 tree void_ftype_pdi_di_di
;
18357 tree void_ftype_pv16qi_v16qi_v16qi
;
18358 tree void_ftype_pv8hi_v8hi_v8hi
;
18359 tree void_ftype_pv4si_v4si_v4si
;
18360 tree void_ftype_pv4sf_v4sf_v4sf
;
18361 tree void_ftype_pv2di_v2di_v2di
;
18363 tree reinterp_ftype_dreg
[5][5];
18364 tree reinterp_ftype_qreg
[5][5];
18365 tree dreg_types
[5], qreg_types
[5];
18367 /* Create distinguished type nodes for NEON vector element types,
18368 and pointers to values of such types, so we can detect them later. */
18369 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
18370 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
18371 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
18372 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
18373 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
18374 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
18375 neon_float_type_node
= make_node (REAL_TYPE
);
18376 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
18377 layout_type (neon_float_type_node
);
18379 /* Define typedefs which exactly correspond to the modes we are basing vector
18380 types on. If you change these names you'll need to change
18381 the table used by arm_mangle_type too. */
18382 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
18383 "__builtin_neon_qi");
18384 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
18385 "__builtin_neon_hi");
18386 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
18387 "__builtin_neon_si");
18388 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
18389 "__builtin_neon_sf");
18390 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
18391 "__builtin_neon_di");
18392 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
18393 "__builtin_neon_poly8");
18394 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
18395 "__builtin_neon_poly16");
18397 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
18398 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
18399 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
18400 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
18401 float_pointer_node
= build_pointer_type (neon_float_type_node
);
18403 /* Next create constant-qualified versions of the above types. */
18404 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
18406 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
18408 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
18410 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
18412 const_float_node
= build_qualified_type (neon_float_type_node
,
18415 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
18416 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
18417 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
18418 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
18419 const_float_pointer_node
= build_pointer_type (const_float_node
);
18421 /* Now create vector types based on our NEON element types. */
18422 /* 64-bit vectors. */
18424 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
18426 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
18428 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
18430 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
18431 /* 128-bit vectors. */
18433 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
18435 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
18437 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
18439 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
18441 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
18443 /* Unsigned integer types for various mode sizes. */
18444 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
18445 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
18446 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
18447 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
18449 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
18450 "__builtin_neon_uqi");
18451 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
18452 "__builtin_neon_uhi");
18453 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
18454 "__builtin_neon_usi");
18455 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
18456 "__builtin_neon_udi");
18458 /* Opaque integer types for structures of vectors. */
18459 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
18460 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
18461 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
18462 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
18464 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
18465 "__builtin_neon_ti");
18466 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
18467 "__builtin_neon_ei");
18468 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
18469 "__builtin_neon_oi");
18470 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
18471 "__builtin_neon_ci");
18472 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
18473 "__builtin_neon_xi");
18475 /* Pointers to vector types. */
18476 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
18477 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
18478 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
18479 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
18480 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
18481 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
18482 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
18483 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
18484 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
18486 /* Operations which return results as pairs. */
18487 void_ftype_pv8qi_v8qi_v8qi
=
18488 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
18489 V8QI_type_node
, NULL
);
18490 void_ftype_pv4hi_v4hi_v4hi
=
18491 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
18492 V4HI_type_node
, NULL
);
18493 void_ftype_pv2si_v2si_v2si
=
18494 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
18495 V2SI_type_node
, NULL
);
18496 void_ftype_pv2sf_v2sf_v2sf
=
18497 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
18498 V2SF_type_node
, NULL
);
18499 void_ftype_pdi_di_di
=
18500 build_function_type_list (void_type_node
, intDI_pointer_node
,
18501 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
18502 void_ftype_pv16qi_v16qi_v16qi
=
18503 build_function_type_list (void_type_node
, V16QI_pointer_node
,
18504 V16QI_type_node
, V16QI_type_node
, NULL
);
18505 void_ftype_pv8hi_v8hi_v8hi
=
18506 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
18507 V8HI_type_node
, NULL
);
18508 void_ftype_pv4si_v4si_v4si
=
18509 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
18510 V4SI_type_node
, NULL
);
18511 void_ftype_pv4sf_v4sf_v4sf
=
18512 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
18513 V4SF_type_node
, NULL
);
18514 void_ftype_pv2di_v2di_v2di
=
18515 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
18516 V2DI_type_node
, NULL
);
18518 dreg_types
[0] = V8QI_type_node
;
18519 dreg_types
[1] = V4HI_type_node
;
18520 dreg_types
[2] = V2SI_type_node
;
18521 dreg_types
[3] = V2SF_type_node
;
18522 dreg_types
[4] = neon_intDI_type_node
;
18524 qreg_types
[0] = V16QI_type_node
;
18525 qreg_types
[1] = V8HI_type_node
;
18526 qreg_types
[2] = V4SI_type_node
;
18527 qreg_types
[3] = V4SF_type_node
;
18528 qreg_types
[4] = V2DI_type_node
;
18530 for (i
= 0; i
< 5; i
++)
18533 for (j
= 0; j
< 5; j
++)
18535 reinterp_ftype_dreg
[i
][j
]
18536 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
18537 reinterp_ftype_qreg
[i
][j
]
18538 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
18542 for (i
= 0; i
< ARRAY_SIZE (neon_builtin_data
); i
++)
18544 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
18545 unsigned int j
, codeidx
= 0;
18547 d
->base_fcode
= fcode
;
18549 for (j
= 0; j
< T_MAX
; j
++)
18551 const char* const modenames
[] = {
18552 "v8qi", "v4hi", "v2si", "v2sf", "di",
18553 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18557 enum insn_code icode
;
18558 int is_load
= 0, is_store
= 0;
18560 if ((d
->bits
& (1 << j
)) == 0)
18563 icode
= d
->codes
[codeidx
++];
18568 case NEON_LOAD1LANE
:
18569 case NEON_LOADSTRUCT
:
18570 case NEON_LOADSTRUCTLANE
:
18572 /* Fall through. */
18574 case NEON_STORE1LANE
:
18575 case NEON_STORESTRUCT
:
18576 case NEON_STORESTRUCTLANE
:
18579 /* Fall through. */
18582 case NEON_LOGICBINOP
:
18583 case NEON_SHIFTINSERT
:
18590 case NEON_SHIFTIMM
:
18591 case NEON_SHIFTACC
:
18597 case NEON_LANEMULL
:
18598 case NEON_LANEMULH
:
18600 case NEON_SCALARMUL
:
18601 case NEON_SCALARMULL
:
18602 case NEON_SCALARMULH
:
18603 case NEON_SCALARMAC
:
18609 tree return_type
= void_type_node
, args
= void_list_node
;
18611 /* Build a function type directly from the insn_data for this
18612 builtin. The build_function_type() function takes care of
18613 removing duplicates for us. */
18614 for (k
= insn_data
[icode
].n_operands
- 1; k
>= 0; k
--)
18618 if (is_load
&& k
== 1)
18620 /* Neon load patterns always have the memory operand
18621 (a SImode pointer) in the operand 1 position. We
18622 want a const pointer to the element type in that
18624 gcc_assert (insn_data
[icode
].operand
[k
].mode
== SImode
);
18630 eltype
= const_intQI_pointer_node
;
18635 eltype
= const_intHI_pointer_node
;
18640 eltype
= const_intSI_pointer_node
;
18645 eltype
= const_float_pointer_node
;
18650 eltype
= const_intDI_pointer_node
;
18653 default: gcc_unreachable ();
18656 else if (is_store
&& k
== 0)
18658 /* Similarly, Neon store patterns use operand 0 as
18659 the memory location to store to (a SImode pointer).
18660 Use a pointer to the element type of the store in
18662 gcc_assert (insn_data
[icode
].operand
[k
].mode
== SImode
);
18668 eltype
= intQI_pointer_node
;
18673 eltype
= intHI_pointer_node
;
18678 eltype
= intSI_pointer_node
;
18683 eltype
= float_pointer_node
;
18688 eltype
= intDI_pointer_node
;
18691 default: gcc_unreachable ();
18696 switch (insn_data
[icode
].operand
[k
].mode
)
18698 case VOIDmode
: eltype
= void_type_node
; break;
18700 case QImode
: eltype
= neon_intQI_type_node
; break;
18701 case HImode
: eltype
= neon_intHI_type_node
; break;
18702 case SImode
: eltype
= neon_intSI_type_node
; break;
18703 case SFmode
: eltype
= neon_float_type_node
; break;
18704 case DImode
: eltype
= neon_intDI_type_node
; break;
18705 case TImode
: eltype
= intTI_type_node
; break;
18706 case EImode
: eltype
= intEI_type_node
; break;
18707 case OImode
: eltype
= intOI_type_node
; break;
18708 case CImode
: eltype
= intCI_type_node
; break;
18709 case XImode
: eltype
= intXI_type_node
; break;
18710 /* 64-bit vectors. */
18711 case V8QImode
: eltype
= V8QI_type_node
; break;
18712 case V4HImode
: eltype
= V4HI_type_node
; break;
18713 case V2SImode
: eltype
= V2SI_type_node
; break;
18714 case V2SFmode
: eltype
= V2SF_type_node
; break;
18715 /* 128-bit vectors. */
18716 case V16QImode
: eltype
= V16QI_type_node
; break;
18717 case V8HImode
: eltype
= V8HI_type_node
; break;
18718 case V4SImode
: eltype
= V4SI_type_node
; break;
18719 case V4SFmode
: eltype
= V4SF_type_node
; break;
18720 case V2DImode
: eltype
= V2DI_type_node
; break;
18721 default: gcc_unreachable ();
18725 if (k
== 0 && !is_store
)
18726 return_type
= eltype
;
18728 args
= tree_cons (NULL_TREE
, eltype
, args
);
18731 ftype
= build_function_type (return_type
, args
);
18735 case NEON_RESULTPAIR
:
18737 switch (insn_data
[icode
].operand
[1].mode
)
18739 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
18740 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
18741 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
18742 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
18743 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
18744 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
18745 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
18746 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
18747 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
18748 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
18749 default: gcc_unreachable ();
18754 case NEON_REINTERP
:
18756 /* We iterate over 5 doubleword types, then 5 quadword
18759 switch (insn_data
[icode
].operand
[0].mode
)
18761 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
18762 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
18763 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
18764 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
18765 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
18766 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
18767 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
18768 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
18769 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
18770 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
18771 default: gcc_unreachable ();
18777 gcc_unreachable ();
18780 gcc_assert (ftype
!= NULL
);
18782 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[j
]);
18784 add_builtin_function (namebuf
, ftype
, fcode
++, BUILT_IN_MD
, NULL
,
18791 arm_init_fp16_builtins (void)
18793 tree fp16_type
= make_node (REAL_TYPE
);
18794 TYPE_PRECISION (fp16_type
) = 16;
18795 layout_type (fp16_type
);
18796 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
18800 arm_init_builtins (void)
18802 arm_init_tls_builtins ();
18804 if (TARGET_REALLY_IWMMXT
)
18805 arm_init_iwmmxt_builtins ();
18808 arm_init_neon_builtins ();
18810 if (arm_fp16_format
)
18811 arm_init_fp16_builtins ();
18814 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18816 static const char *
18817 arm_invalid_parameter_type (const_tree t
)
18819 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
18820 return N_("function parameters cannot have __fp16 type");
18824 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
18826 static const char *
18827 arm_invalid_return_type (const_tree t
)
18829 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
18830 return N_("functions cannot return __fp16 type");
18834 /* Implement TARGET_PROMOTED_TYPE. */
18837 arm_promoted_type (const_tree t
)
18839 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
18840 return float_type_node
;
18844 /* Implement TARGET_CONVERT_TO_TYPE.
18845 Specifically, this hook implements the peculiarity of the ARM
18846 half-precision floating-point C semantics that requires conversions between
18847 __fp16 to or from double to do an intermediate conversion to float. */
18850 arm_convert_to_type (tree type
, tree expr
)
18852 tree fromtype
= TREE_TYPE (expr
);
18853 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
18855 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
18856 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
18857 return convert (type
, convert (float_type_node
, expr
));
18861 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
18862 This simply adds HFmode as a supported mode; even though we don't
18863 implement arithmetic on this type directly, it's supported by
18864 optabs conversions, much the way the double-word arithmetic is
18865 special-cased in the default hook. */
18868 arm_scalar_mode_supported_p (enum machine_mode mode
)
18870 if (mode
== HFmode
)
18871 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
18873 return default_scalar_mode_supported_p (mode
);
18876 /* Errors in the source file can cause expand_expr to return const0_rtx
18877 where we expect a vector. To avoid crashing, use one of the vector
18878 clear instructions. */
18881 safe_vector_operand (rtx x
, enum machine_mode mode
)
18883 if (x
!= const0_rtx
)
18885 x
= gen_reg_rtx (mode
);
18887 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
18888 : gen_rtx_SUBREG (DImode
, x
, 0)));
18892 /* Subroutine of arm_expand_builtin to take care of binop insns. */
18895 arm_expand_binop_builtin (enum insn_code icode
,
18896 tree exp
, rtx target
)
18899 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18900 tree arg1
= CALL_EXPR_ARG (exp
, 1);
18901 rtx op0
= expand_normal (arg0
);
18902 rtx op1
= expand_normal (arg1
);
18903 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18904 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18905 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18907 if (VECTOR_MODE_P (mode0
))
18908 op0
= safe_vector_operand (op0
, mode0
);
18909 if (VECTOR_MODE_P (mode1
))
18910 op1
= safe_vector_operand (op1
, mode1
);
18913 || GET_MODE (target
) != tmode
18914 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18915 target
= gen_reg_rtx (tmode
);
18917 gcc_assert (GET_MODE (op0
) == mode0
&& GET_MODE (op1
) == mode1
);
18919 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18920 op0
= copy_to_mode_reg (mode0
, op0
);
18921 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18922 op1
= copy_to_mode_reg (mode1
, op1
);
18924 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18931 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18934 arm_expand_unop_builtin (enum insn_code icode
,
18935 tree exp
, rtx target
, int do_load
)
18938 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18939 rtx op0
= expand_normal (arg0
);
18940 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18941 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18944 || GET_MODE (target
) != tmode
18945 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18946 target
= gen_reg_rtx (tmode
);
18948 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18951 if (VECTOR_MODE_P (mode0
))
18952 op0
= safe_vector_operand (op0
, mode0
);
18954 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18955 op0
= copy_to_mode_reg (mode0
, op0
);
18958 pat
= GEN_FCN (icode
) (target
, op0
);
18966 neon_builtin_compare (const void *a
, const void *b
)
18968 const neon_builtin_datum
*const key
= (const neon_builtin_datum
*) a
;
18969 const neon_builtin_datum
*const memb
= (const neon_builtin_datum
*) b
;
18970 unsigned int soughtcode
= key
->base_fcode
;
18972 if (soughtcode
>= memb
->base_fcode
18973 && soughtcode
< memb
->base_fcode
+ memb
->num_vars
)
18975 else if (soughtcode
< memb
->base_fcode
)
18981 static enum insn_code
18982 locate_neon_builtin_icode (int fcode
, neon_itype
*itype
)
18984 neon_builtin_datum key
, *found
;
18987 key
.base_fcode
= fcode
;
18988 found
= (neon_builtin_datum
*)
18989 bsearch (&key
, &neon_builtin_data
[0], ARRAY_SIZE (neon_builtin_data
),
18990 sizeof (neon_builtin_data
[0]), neon_builtin_compare
);
18991 gcc_assert (found
);
18992 idx
= fcode
- (int) found
->base_fcode
;
18993 gcc_assert (idx
>= 0 && idx
< T_MAX
&& idx
< (int)found
->num_vars
);
18996 *itype
= found
->itype
;
18998 return found
->codes
[idx
];
19002 NEON_ARG_COPY_TO_REG
,
19007 #define NEON_MAX_BUILTIN_ARGS 5
19009 /* Expand a Neon builtin. */
19011 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
19016 tree arg
[NEON_MAX_BUILTIN_ARGS
];
19017 rtx op
[NEON_MAX_BUILTIN_ARGS
];
19018 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19019 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
19024 || GET_MODE (target
) != tmode
19025 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
19026 target
= gen_reg_rtx (tmode
);
19028 va_start (ap
, exp
);
19032 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
19034 if (thisarg
== NEON_ARG_STOP
)
19038 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
19039 op
[argc
] = expand_normal (arg
[argc
]);
19040 mode
[argc
] = insn_data
[icode
].operand
[argc
+ have_retval
].mode
;
19044 case NEON_ARG_COPY_TO_REG
:
19045 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19046 if (!(*insn_data
[icode
].operand
[argc
+ have_retval
].predicate
)
19047 (op
[argc
], mode
[argc
]))
19048 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
19051 case NEON_ARG_CONSTANT
:
19052 /* FIXME: This error message is somewhat unhelpful. */
19053 if (!(*insn_data
[icode
].operand
[argc
+ have_retval
].predicate
)
19054 (op
[argc
], mode
[argc
]))
19055 error ("argument must be a constant");
19058 case NEON_ARG_STOP
:
19059 gcc_unreachable ();
19072 pat
= GEN_FCN (icode
) (target
, op
[0]);
19076 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
19080 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
19084 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
19088 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
19092 gcc_unreachable ();
19098 pat
= GEN_FCN (icode
) (op
[0]);
19102 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
19106 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
19110 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
19114 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
19118 gcc_unreachable ();
19129 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19130 constants defined per-instruction or per instruction-variant. Instead, the
19131 required info is looked up in the table neon_builtin_data. */
19133 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
19136 enum insn_code icode
= locate_neon_builtin_icode (fcode
, &itype
);
19143 return arm_expand_neon_args (target
, icode
, 1, exp
,
19144 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19148 case NEON_SCALARMUL
:
19149 case NEON_SCALARMULL
:
19150 case NEON_SCALARMULH
:
19151 case NEON_SHIFTINSERT
:
19152 case NEON_LOGICBINOP
:
19153 return arm_expand_neon_args (target
, icode
, 1, exp
,
19154 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19158 return arm_expand_neon_args (target
, icode
, 1, exp
,
19159 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19160 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19164 case NEON_SHIFTIMM
:
19165 return arm_expand_neon_args (target
, icode
, 1, exp
,
19166 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
19170 return arm_expand_neon_args (target
, icode
, 1, exp
,
19171 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19175 case NEON_REINTERP
:
19176 return arm_expand_neon_args (target
, icode
, 1, exp
,
19177 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19181 return arm_expand_neon_args (target
, icode
, 1, exp
,
19182 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19184 case NEON_RESULTPAIR
:
19185 return arm_expand_neon_args (target
, icode
, 0, exp
,
19186 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19190 case NEON_LANEMULL
:
19191 case NEON_LANEMULH
:
19192 return arm_expand_neon_args (target
, icode
, 1, exp
,
19193 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19194 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19197 return arm_expand_neon_args (target
, icode
, 1, exp
,
19198 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19199 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19201 case NEON_SHIFTACC
:
19202 return arm_expand_neon_args (target
, icode
, 1, exp
,
19203 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19204 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19206 case NEON_SCALARMAC
:
19207 return arm_expand_neon_args (target
, icode
, 1, exp
,
19208 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19209 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19213 return arm_expand_neon_args (target
, icode
, 1, exp
,
19214 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19218 case NEON_LOADSTRUCT
:
19219 return arm_expand_neon_args (target
, icode
, 1, exp
,
19220 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19222 case NEON_LOAD1LANE
:
19223 case NEON_LOADSTRUCTLANE
:
19224 return arm_expand_neon_args (target
, icode
, 1, exp
,
19225 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19229 case NEON_STORESTRUCT
:
19230 return arm_expand_neon_args (target
, icode
, 0, exp
,
19231 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19233 case NEON_STORE1LANE
:
19234 case NEON_STORESTRUCTLANE
:
19235 return arm_expand_neon_args (target
, icode
, 0, exp
,
19236 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19240 gcc_unreachable ();
19243 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19245 neon_reinterpret (rtx dest
, rtx src
)
19247 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
19250 /* Emit code to place a Neon pair result in memory locations (with equal
19253 neon_emit_pair_result_insn (enum machine_mode mode
,
19254 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
19257 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
19258 rtx tmp1
= gen_reg_rtx (mode
);
19259 rtx tmp2
= gen_reg_rtx (mode
);
19261 emit_insn (intfn (tmp1
, op1
, tmp2
, op2
));
19263 emit_move_insn (mem
, tmp1
);
19264 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
19265 emit_move_insn (mem
, tmp2
);
19268 /* Set up operands for a register copy from src to dest, taking care not to
19269 clobber registers in the process.
19270 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19271 be called with a large N, so that should be OK. */
19274 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
19276 unsigned int copied
= 0, opctr
= 0;
19277 unsigned int done
= (1 << count
) - 1;
19280 while (copied
!= done
)
19282 for (i
= 0; i
< count
; i
++)
19286 for (j
= 0; good
&& j
< count
; j
++)
19287 if (i
!= j
&& (copied
& (1 << j
)) == 0
19288 && reg_overlap_mentioned_p (src
[j
], dest
[i
]))
19293 operands
[opctr
++] = dest
[i
];
19294 operands
[opctr
++] = src
[i
];
19300 gcc_assert (opctr
== count
* 2);
19303 /* Expand an expression EXP that calls a built-in function,
19304 with result going to TARGET if that's convenient
19305 (and in mode MODE if that's convenient).
19306 SUBTARGET may be used as the target for computing one of EXP's operands.
19307 IGNORE is nonzero if the value is to be ignored. */
19310 arm_expand_builtin (tree exp
,
19312 rtx subtarget ATTRIBUTE_UNUSED
,
19313 enum machine_mode mode ATTRIBUTE_UNUSED
,
19314 int ignore ATTRIBUTE_UNUSED
)
19316 const struct builtin_description
* d
;
19317 enum insn_code icode
;
19318 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
19326 int fcode
= DECL_FUNCTION_CODE (fndecl
);
19328 enum machine_mode tmode
;
19329 enum machine_mode mode0
;
19330 enum machine_mode mode1
;
19331 enum machine_mode mode2
;
19333 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
19334 return arm_expand_neon_builtin (fcode
, exp
, target
);
19338 case ARM_BUILTIN_TEXTRMSB
:
19339 case ARM_BUILTIN_TEXTRMUB
:
19340 case ARM_BUILTIN_TEXTRMSH
:
19341 case ARM_BUILTIN_TEXTRMUH
:
19342 case ARM_BUILTIN_TEXTRMSW
:
19343 case ARM_BUILTIN_TEXTRMUW
:
19344 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
19345 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
19346 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
19347 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
19348 : CODE_FOR_iwmmxt_textrmw
);
19350 arg0
= CALL_EXPR_ARG (exp
, 0);
19351 arg1
= CALL_EXPR_ARG (exp
, 1);
19352 op0
= expand_normal (arg0
);
19353 op1
= expand_normal (arg1
);
19354 tmode
= insn_data
[icode
].operand
[0].mode
;
19355 mode0
= insn_data
[icode
].operand
[1].mode
;
19356 mode1
= insn_data
[icode
].operand
[2].mode
;
19358 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19359 op0
= copy_to_mode_reg (mode0
, op0
);
19360 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19362 /* @@@ better error message */
19363 error ("selector must be an immediate");
19364 return gen_reg_rtx (tmode
);
19367 || GET_MODE (target
) != tmode
19368 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19369 target
= gen_reg_rtx (tmode
);
19370 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19376 case ARM_BUILTIN_TINSRB
:
19377 case ARM_BUILTIN_TINSRH
:
19378 case ARM_BUILTIN_TINSRW
:
19379 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
19380 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
19381 : CODE_FOR_iwmmxt_tinsrw
);
19382 arg0
= CALL_EXPR_ARG (exp
, 0);
19383 arg1
= CALL_EXPR_ARG (exp
, 1);
19384 arg2
= CALL_EXPR_ARG (exp
, 2);
19385 op0
= expand_normal (arg0
);
19386 op1
= expand_normal (arg1
);
19387 op2
= expand_normal (arg2
);
19388 tmode
= insn_data
[icode
].operand
[0].mode
;
19389 mode0
= insn_data
[icode
].operand
[1].mode
;
19390 mode1
= insn_data
[icode
].operand
[2].mode
;
19391 mode2
= insn_data
[icode
].operand
[3].mode
;
19393 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19394 op0
= copy_to_mode_reg (mode0
, op0
);
19395 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19396 op1
= copy_to_mode_reg (mode1
, op1
);
19397 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
19399 /* @@@ better error message */
19400 error ("selector must be an immediate");
19404 || GET_MODE (target
) != tmode
19405 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19406 target
= gen_reg_rtx (tmode
);
19407 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19413 case ARM_BUILTIN_SETWCX
:
19414 arg0
= CALL_EXPR_ARG (exp
, 0);
19415 arg1
= CALL_EXPR_ARG (exp
, 1);
19416 op0
= force_reg (SImode
, expand_normal (arg0
));
19417 op1
= expand_normal (arg1
);
19418 emit_insn (gen_iwmmxt_tmcr (op1
, op0
));
19421 case ARM_BUILTIN_GETWCX
:
19422 arg0
= CALL_EXPR_ARG (exp
, 0);
19423 op0
= expand_normal (arg0
);
19424 target
= gen_reg_rtx (SImode
);
19425 emit_insn (gen_iwmmxt_tmrc (target
, op0
));
19428 case ARM_BUILTIN_WSHUFH
:
19429 icode
= CODE_FOR_iwmmxt_wshufh
;
19430 arg0
= CALL_EXPR_ARG (exp
, 0);
19431 arg1
= CALL_EXPR_ARG (exp
, 1);
19432 op0
= expand_normal (arg0
);
19433 op1
= expand_normal (arg1
);
19434 tmode
= insn_data
[icode
].operand
[0].mode
;
19435 mode1
= insn_data
[icode
].operand
[1].mode
;
19436 mode2
= insn_data
[icode
].operand
[2].mode
;
19438 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19439 op0
= copy_to_mode_reg (mode1
, op0
);
19440 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19442 /* @@@ better error message */
19443 error ("mask must be an immediate");
19447 || GET_MODE (target
) != tmode
19448 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19449 target
= gen_reg_rtx (tmode
);
19450 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19456 case ARM_BUILTIN_WSADB
:
19457 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb
, exp
, target
);
19458 case ARM_BUILTIN_WSADH
:
19459 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh
, exp
, target
);
19460 case ARM_BUILTIN_WSADBZ
:
19461 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
19462 case ARM_BUILTIN_WSADHZ
:
19463 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
19465 /* Several three-argument builtins. */
19466 case ARM_BUILTIN_WMACS
:
19467 case ARM_BUILTIN_WMACU
:
19468 case ARM_BUILTIN_WALIGN
:
19469 case ARM_BUILTIN_TMIA
:
19470 case ARM_BUILTIN_TMIAPH
:
19471 case ARM_BUILTIN_TMIATT
:
19472 case ARM_BUILTIN_TMIATB
:
19473 case ARM_BUILTIN_TMIABT
:
19474 case ARM_BUILTIN_TMIABB
:
19475 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
19476 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
19477 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
19478 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
19479 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
19480 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
19481 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
19482 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
19483 : CODE_FOR_iwmmxt_walign
);
19484 arg0
= CALL_EXPR_ARG (exp
, 0);
19485 arg1
= CALL_EXPR_ARG (exp
, 1);
19486 arg2
= CALL_EXPR_ARG (exp
, 2);
19487 op0
= expand_normal (arg0
);
19488 op1
= expand_normal (arg1
);
19489 op2
= expand_normal (arg2
);
19490 tmode
= insn_data
[icode
].operand
[0].mode
;
19491 mode0
= insn_data
[icode
].operand
[1].mode
;
19492 mode1
= insn_data
[icode
].operand
[2].mode
;
19493 mode2
= insn_data
[icode
].operand
[3].mode
;
19495 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19496 op0
= copy_to_mode_reg (mode0
, op0
);
19497 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19498 op1
= copy_to_mode_reg (mode1
, op1
);
19499 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
19500 op2
= copy_to_mode_reg (mode2
, op2
);
19502 || GET_MODE (target
) != tmode
19503 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19504 target
= gen_reg_rtx (tmode
);
19505 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19511 case ARM_BUILTIN_WZERO
:
19512 target
= gen_reg_rtx (DImode
);
19513 emit_insn (gen_iwmmxt_clrdi (target
));
19516 case ARM_BUILTIN_THREAD_POINTER
:
19517 return arm_load_tp (target
);
19523 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19524 if (d
->code
== (const enum arm_builtins
) fcode
)
19525 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
19527 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19528 if (d
->code
== (const enum arm_builtins
) fcode
)
19529 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
19531 /* @@@ Should really do something sensible here. */
19535 /* Return the number (counting from 0) of
19536 the least significant set bit in MASK. */
19539 number_of_first_bit_set (unsigned mask
)
19544 (mask
& (1 << bit
)) == 0;
19551 /* Emit code to push or pop registers to or from the stack. F is the
19552 assembly file. MASK is the registers to push or pop. PUSH is
19553 nonzero if we should push, and zero if we should pop. For debugging
19554 output, if pushing, adjust CFA_OFFSET by the amount of space added
19555 to the stack. REAL_REGS should have the same number of bits set as
19556 MASK, and will be used instead (in the same order) to describe which
19557 registers were saved - this is used to mark the save slots when we
19558 push high registers after moving them to low registers. */
19560 thumb_pushpop (FILE *f
, unsigned long mask
, int push
, int *cfa_offset
,
19561 unsigned long real_regs
)
19564 int lo_mask
= mask
& 0xFF;
19565 int pushed_words
= 0;
19569 if (lo_mask
== 0 && !push
&& (mask
& (1 << PC_REGNUM
)))
19571 /* Special case. Do not generate a POP PC statement here, do it in
19573 thumb_exit (f
, -1);
19577 if (ARM_EABI_UNWIND_TABLES
&& push
)
19579 fprintf (f
, "\t.save\t{");
19580 for (regno
= 0; regno
< 15; regno
++)
19582 if (real_regs
& (1 << regno
))
19584 if (real_regs
& ((1 << regno
) -1))
19586 asm_fprintf (f
, "%r", regno
);
19589 fprintf (f
, "}\n");
19592 fprintf (f
, "\t%s\t{", push
? "push" : "pop");
19594 /* Look at the low registers first. */
19595 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
19599 asm_fprintf (f
, "%r", regno
);
19601 if ((lo_mask
& ~1) != 0)
19608 if (push
&& (mask
& (1 << LR_REGNUM
)))
19610 /* Catch pushing the LR. */
19614 asm_fprintf (f
, "%r", LR_REGNUM
);
19618 else if (!push
&& (mask
& (1 << PC_REGNUM
)))
19620 /* Catch popping the PC. */
19621 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
19622 || crtl
->calls_eh_return
)
19624 /* The PC is never poped directly, instead
19625 it is popped into r3 and then BX is used. */
19626 fprintf (f
, "}\n");
19628 thumb_exit (f
, -1);
19637 asm_fprintf (f
, "%r", PC_REGNUM
);
19641 fprintf (f
, "}\n");
19643 if (push
&& pushed_words
&& dwarf2out_do_frame ())
19645 char *l
= dwarf2out_cfi_label (false);
19646 int pushed_mask
= real_regs
;
19648 *cfa_offset
+= pushed_words
* 4;
19649 dwarf2out_def_cfa (l
, SP_REGNUM
, *cfa_offset
);
19652 pushed_mask
= real_regs
;
19653 for (regno
= 0; regno
<= 14; regno
++, pushed_mask
>>= 1)
19655 if (pushed_mask
& 1)
19656 dwarf2out_reg_save (l
, regno
, 4 * pushed_words
++ - *cfa_offset
);
19661 /* Generate code to return from a thumb function.
19662 If 'reg_containing_return_addr' is -1, then the return address is
19663 actually on the stack, at the stack pointer. */
19665 thumb_exit (FILE *f
, int reg_containing_return_addr
)
19667 unsigned regs_available_for_popping
;
19668 unsigned regs_to_pop
;
19670 unsigned available
;
19674 int restore_a4
= FALSE
;
19676 /* Compute the registers we need to pop. */
19680 if (reg_containing_return_addr
== -1)
19682 regs_to_pop
|= 1 << LR_REGNUM
;
19686 if (TARGET_BACKTRACE
)
19688 /* Restore the (ARM) frame pointer and stack pointer. */
19689 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
19693 /* If there is nothing to pop then just emit the BX instruction and
19695 if (pops_needed
== 0)
19697 if (crtl
->calls_eh_return
)
19698 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
19700 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
19703 /* Otherwise if we are not supporting interworking and we have not created
19704 a backtrace structure and the function was not entered in ARM mode then
19705 just pop the return address straight into the PC. */
19706 else if (!TARGET_INTERWORK
19707 && !TARGET_BACKTRACE
19708 && !is_called_in_ARM_mode (current_function_decl
)
19709 && !crtl
->calls_eh_return
)
19711 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
19715 /* Find out how many of the (return) argument registers we can corrupt. */
19716 regs_available_for_popping
= 0;
19718 /* If returning via __builtin_eh_return, the bottom three registers
19719 all contain information needed for the return. */
19720 if (crtl
->calls_eh_return
)
19724 /* If we can deduce the registers used from the function's
19725 return value. This is more reliable that examining
19726 df_regs_ever_live_p () because that will be set if the register is
19727 ever used in the function, not just if the register is used
19728 to hold a return value. */
19730 if (crtl
->return_rtx
!= 0)
19731 mode
= GET_MODE (crtl
->return_rtx
);
19733 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
19735 size
= GET_MODE_SIZE (mode
);
19739 /* In a void function we can use any argument register.
19740 In a function that returns a structure on the stack
19741 we can use the second and third argument registers. */
19742 if (mode
== VOIDmode
)
19743 regs_available_for_popping
=
19744 (1 << ARG_REGISTER (1))
19745 | (1 << ARG_REGISTER (2))
19746 | (1 << ARG_REGISTER (3));
19748 regs_available_for_popping
=
19749 (1 << ARG_REGISTER (2))
19750 | (1 << ARG_REGISTER (3));
19752 else if (size
<= 4)
19753 regs_available_for_popping
=
19754 (1 << ARG_REGISTER (2))
19755 | (1 << ARG_REGISTER (3));
19756 else if (size
<= 8)
19757 regs_available_for_popping
=
19758 (1 << ARG_REGISTER (3));
19761 /* Match registers to be popped with registers into which we pop them. */
19762 for (available
= regs_available_for_popping
,
19763 required
= regs_to_pop
;
19764 required
!= 0 && available
!= 0;
19765 available
&= ~(available
& - available
),
19766 required
&= ~(required
& - required
))
19769 /* If we have any popping registers left over, remove them. */
19771 regs_available_for_popping
&= ~available
;
19773 /* Otherwise if we need another popping register we can use
19774 the fourth argument register. */
19775 else if (pops_needed
)
19777 /* If we have not found any free argument registers and
19778 reg a4 contains the return address, we must move it. */
19779 if (regs_available_for_popping
== 0
19780 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
19782 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
19783 reg_containing_return_addr
= LR_REGNUM
;
19785 else if (size
> 12)
19787 /* Register a4 is being used to hold part of the return value,
19788 but we have dire need of a free, low register. */
19791 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
19794 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
19796 /* The fourth argument register is available. */
19797 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
19803 /* Pop as many registers as we can. */
19804 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
19805 regs_available_for_popping
);
19807 /* Process the registers we popped. */
19808 if (reg_containing_return_addr
== -1)
19810 /* The return address was popped into the lowest numbered register. */
19811 regs_to_pop
&= ~(1 << LR_REGNUM
);
19813 reg_containing_return_addr
=
19814 number_of_first_bit_set (regs_available_for_popping
);
19816 /* Remove this register for the mask of available registers, so that
19817 the return address will not be corrupted by further pops. */
19818 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
19821 /* If we popped other registers then handle them here. */
19822 if (regs_available_for_popping
)
19826 /* Work out which register currently contains the frame pointer. */
19827 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
19829 /* Move it into the correct place. */
19830 asm_fprintf (f
, "\tmov\t%r, %r\n",
19831 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
19833 /* (Temporarily) remove it from the mask of popped registers. */
19834 regs_available_for_popping
&= ~(1 << frame_pointer
);
19835 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
19837 if (regs_available_for_popping
)
19841 /* We popped the stack pointer as well,
19842 find the register that contains it. */
19843 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
19845 /* Move it into the stack register. */
19846 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
19848 /* At this point we have popped all necessary registers, so
19849 do not worry about restoring regs_available_for_popping
19850 to its correct value:
19852 assert (pops_needed == 0)
19853 assert (regs_available_for_popping == (1 << frame_pointer))
19854 assert (regs_to_pop == (1 << STACK_POINTER)) */
19858 /* Since we have just move the popped value into the frame
19859 pointer, the popping register is available for reuse, and
19860 we know that we still have the stack pointer left to pop. */
19861 regs_available_for_popping
|= (1 << frame_pointer
);
19865 /* If we still have registers left on the stack, but we no longer have
19866 any registers into which we can pop them, then we must move the return
19867 address into the link register and make available the register that
19869 if (regs_available_for_popping
== 0 && pops_needed
> 0)
19871 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
19873 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
19874 reg_containing_return_addr
);
19876 reg_containing_return_addr
= LR_REGNUM
;
19879 /* If we have registers left on the stack then pop some more.
19880 We know that at most we will want to pop FP and SP. */
19881 if (pops_needed
> 0)
19886 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
19887 regs_available_for_popping
);
19889 /* We have popped either FP or SP.
19890 Move whichever one it is into the correct register. */
19891 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
19892 move_to
= number_of_first_bit_set (regs_to_pop
);
19894 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
19896 regs_to_pop
&= ~(1 << move_to
);
19901 /* If we still have not popped everything then we must have only
19902 had one register available to us and we are now popping the SP. */
19903 if (pops_needed
> 0)
19907 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
19908 regs_available_for_popping
);
19910 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
19912 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
19914 assert (regs_to_pop == (1 << STACK_POINTER))
19915 assert (pops_needed == 1)
19919 /* If necessary restore the a4 register. */
19922 if (reg_containing_return_addr
!= LR_REGNUM
)
19924 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
19925 reg_containing_return_addr
= LR_REGNUM
;
19928 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
19931 if (crtl
->calls_eh_return
)
19932 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
19934 /* Return to caller. */
19935 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
19938 /* Scan INSN just before assembler is output for it.
19939 For Thumb-1, we track the status of the condition codes; this
19940 information is used in the cbranchsi4_insn pattern. */
19942 thumb1_final_prescan_insn (rtx insn
)
19944 if (flag_print_asm_name
)
19945 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
19946 INSN_ADDRESSES (INSN_UID (insn
)));
19947 /* Don't overwrite the previous setter when we get to a cbranch. */
19948 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
19950 enum attr_conds conds
;
19952 if (cfun
->machine
->thumb1_cc_insn
)
19954 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
19955 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
19958 conds
= get_attr_conds (insn
);
19959 if (conds
== CONDS_SET
)
19961 rtx set
= single_set (insn
);
19962 cfun
->machine
->thumb1_cc_insn
= insn
;
19963 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
19964 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
19965 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
19966 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
19968 rtx src1
= XEXP (SET_SRC (set
), 1);
19969 if (src1
== const0_rtx
)
19970 cfun
->machine
->thumb1_cc_mode
= CCmode
;
19973 else if (conds
!= CONDS_NOCOND
)
19974 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
19979 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
19981 unsigned HOST_WIDE_INT mask
= 0xff;
19984 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
19985 if (val
== 0) /* XXX */
19988 for (i
= 0; i
< 25; i
++)
19989 if ((val
& (mask
<< i
)) == val
)
19995 /* Returns nonzero if the current function contains,
19996 or might contain a far jump. */
19998 thumb_far_jump_used_p (void)
20002 /* This test is only important for leaf functions. */
20003 /* assert (!leaf_function_p ()); */
20005 /* If we have already decided that far jumps may be used,
20006 do not bother checking again, and always return true even if
20007 it turns out that they are not being used. Once we have made
20008 the decision that far jumps are present (and that hence the link
20009 register will be pushed onto the stack) we cannot go back on it. */
20010 if (cfun
->machine
->far_jump_used
)
20013 /* If this function is not being called from the prologue/epilogue
20014 generation code then it must be being called from the
20015 INITIAL_ELIMINATION_OFFSET macro. */
20016 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
20018 /* In this case we know that we are being asked about the elimination
20019 of the arg pointer register. If that register is not being used,
20020 then there are no arguments on the stack, and we do not have to
20021 worry that a far jump might force the prologue to push the link
20022 register, changing the stack offsets. In this case we can just
20023 return false, since the presence of far jumps in the function will
20024 not affect stack offsets.
20026 If the arg pointer is live (or if it was live, but has now been
20027 eliminated and so set to dead) then we do have to test to see if
20028 the function might contain a far jump. This test can lead to some
20029 false negatives, since before reload is completed, then length of
20030 branch instructions is not known, so gcc defaults to returning their
20031 longest length, which in turn sets the far jump attribute to true.
20033 A false negative will not result in bad code being generated, but it
20034 will result in a needless push and pop of the link register. We
20035 hope that this does not occur too often.
20037 If we need doubleword stack alignment this could affect the other
20038 elimination offsets so we can't risk getting it wrong. */
20039 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
20040 cfun
->machine
->arg_pointer_live
= 1;
20041 else if (!cfun
->machine
->arg_pointer_live
)
20045 /* Check to see if the function contains a branch
20046 insn with the far jump attribute set. */
20047 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
20049 if (GET_CODE (insn
) == JUMP_INSN
20050 /* Ignore tablejump patterns. */
20051 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
20052 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
20053 && get_attr_far_jump (insn
) == FAR_JUMP_YES
20056 /* Record the fact that we have decided that
20057 the function does use far jumps. */
20058 cfun
->machine
->far_jump_used
= 1;
20066 /* Return nonzero if FUNC must be entered in ARM mode. */
20068 is_called_in_ARM_mode (tree func
)
20070 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
20072 /* Ignore the problem about functions whose address is taken. */
20073 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
20077 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
20083 /* Given the stack offsets and register mask in OFFSETS, decide how
20084 many additional registers to push instead of subtracting a constant
20085 from SP. For epilogues the principle is the same except we use pop.
20086 FOR_PROLOGUE indicates which we're generating. */
20088 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
20090 HOST_WIDE_INT amount
;
20091 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
20092 /* Extract a mask of the ones we can give to the Thumb's push/pop
20094 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
20095 /* Then count how many other high registers will need to be pushed. */
20096 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20097 int n_free
, reg_base
;
20099 if (!for_prologue
&& frame_pointer_needed
)
20100 amount
= offsets
->locals_base
- offsets
->saved_regs
;
20102 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20104 /* If the stack frame size is 512 exactly, we can save one load
20105 instruction, which should make this a win even when optimizing
20107 if (!optimize_size
&& amount
!= 512)
20110 /* Can't do this if there are high registers to push. */
20111 if (high_regs_pushed
!= 0)
20114 /* Shouldn't do it in the prologue if no registers would normally
20115 be pushed at all. In the epilogue, also allow it if we'll have
20116 a pop insn for the PC. */
20119 || TARGET_BACKTRACE
20120 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
20121 || TARGET_INTERWORK
20122 || crtl
->args
.pretend_args_size
!= 0))
20125 /* Don't do this if thumb_expand_prologue wants to emit instructions
20126 between the push and the stack frame allocation. */
20128 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20129 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
20136 reg_base
= arm_size_return_regs () / UNITS_PER_WORD
;
20137 live_regs_mask
>>= reg_base
;
20140 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
20141 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
20143 live_regs_mask
>>= 1;
20149 gcc_assert (amount
/ 4 * 4 == amount
);
20151 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
20152 return (amount
- 508) / 4;
20153 if (amount
<= n_free
* 4)
20158 /* The bits which aren't usefully expanded as rtl. */
20160 thumb_unexpanded_epilogue (void)
20162 arm_stack_offsets
*offsets
;
20164 unsigned long live_regs_mask
= 0;
20165 int high_regs_pushed
= 0;
20167 int had_to_push_lr
;
20170 if (cfun
->machine
->return_used_this_function
!= 0)
20173 if (IS_NAKED (arm_current_func_type ()))
20176 offsets
= arm_get_frame_offsets ();
20177 live_regs_mask
= offsets
->saved_regs_mask
;
20178 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20180 /* If we can deduce the registers used from the function's return value.
20181 This is more reliable that examining df_regs_ever_live_p () because that
20182 will be set if the register is ever used in the function, not just if
20183 the register is used to hold a return value. */
20184 size
= arm_size_return_regs ();
20186 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
20189 unsigned long extra_mask
= (1 << extra_pop
) - 1;
20190 live_regs_mask
|= extra_mask
<< (size
/ UNITS_PER_WORD
);
20193 /* The prolog may have pushed some high registers to use as
20194 work registers. e.g. the testsuite file:
20195 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20196 compiles to produce:
20197 push {r4, r5, r6, r7, lr}
20201 as part of the prolog. We have to undo that pushing here. */
20203 if (high_regs_pushed
)
20205 unsigned long mask
= live_regs_mask
& 0xff;
20208 /* The available low registers depend on the size of the value we are
20216 /* Oh dear! We have no low registers into which we can pop
20219 ("no low registers available for popping high registers");
20221 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
20222 if (live_regs_mask
& (1 << next_hi_reg
))
20225 while (high_regs_pushed
)
20227 /* Find lo register(s) into which the high register(s) can
20229 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
20231 if (mask
& (1 << regno
))
20232 high_regs_pushed
--;
20233 if (high_regs_pushed
== 0)
20237 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
20239 /* Pop the values into the low register(s). */
20240 thumb_pushpop (asm_out_file
, mask
, 0, NULL
, mask
);
20242 /* Move the value(s) into the high registers. */
20243 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
20245 if (mask
& (1 << regno
))
20247 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
20250 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
20251 if (live_regs_mask
& (1 << next_hi_reg
))
20256 live_regs_mask
&= ~0x0f00;
20259 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
20260 live_regs_mask
&= 0xff;
20262 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
20264 /* Pop the return address into the PC. */
20265 if (had_to_push_lr
)
20266 live_regs_mask
|= 1 << PC_REGNUM
;
20268 /* Either no argument registers were pushed or a backtrace
20269 structure was created which includes an adjusted stack
20270 pointer, so just pop everything. */
20271 if (live_regs_mask
)
20272 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
20275 /* We have either just popped the return address into the
20276 PC or it is was kept in LR for the entire function.
20277 Note that thumb_pushpop has already called thumb_exit if the
20278 PC was in the list. */
20279 if (!had_to_push_lr
)
20280 thumb_exit (asm_out_file
, LR_REGNUM
);
20284 /* Pop everything but the return address. */
20285 if (live_regs_mask
)
20286 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
20289 if (had_to_push_lr
)
20293 /* We have no free low regs, so save one. */
20294 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
20298 /* Get the return address into a temporary register. */
20299 thumb_pushpop (asm_out_file
, 1 << LAST_ARG_REGNUM
, 0, NULL
,
20300 1 << LAST_ARG_REGNUM
);
20304 /* Move the return address to lr. */
20305 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
20307 /* Restore the low register. */
20308 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
20313 regno
= LAST_ARG_REGNUM
;
20318 /* Remove the argument registers that were pushed onto the stack. */
20319 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
20320 SP_REGNUM
, SP_REGNUM
,
20321 crtl
->args
.pretend_args_size
);
20323 thumb_exit (asm_out_file
, regno
);
20329 /* Functions to save and restore machine-specific function data. */
20330 static struct machine_function
*
20331 arm_init_machine_status (void)
20333 struct machine_function
*machine
;
20334 machine
= ggc_alloc_cleared_machine_function ();
20336 #if ARM_FT_UNKNOWN != 0
20337 machine
->func_type
= ARM_FT_UNKNOWN
;
20342 /* Return an RTX indicating where the return address to the
20343 calling function can be found. */
20345 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
20350 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
20353 /* Do anything needed before RTL is emitted for each function. */
20355 arm_init_expanders (void)
20357 /* Arrange to initialize and mark the machine per-function status. */
20358 init_machine_status
= arm_init_machine_status
;
20360 /* This is to stop the combine pass optimizing away the alignment
20361 adjustment of va_arg. */
20362 /* ??? It is claimed that this should not be necessary. */
20364 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
20368 /* Like arm_compute_initial_elimination offset. Simpler because there
20369 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20370 to point at the base of the local variables after static stack
20371 space for a function has been allocated. */
20374 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20376 arm_stack_offsets
*offsets
;
20378 offsets
= arm_get_frame_offsets ();
20382 case ARG_POINTER_REGNUM
:
20385 case STACK_POINTER_REGNUM
:
20386 return offsets
->outgoing_args
- offsets
->saved_args
;
20388 case FRAME_POINTER_REGNUM
:
20389 return offsets
->soft_frame
- offsets
->saved_args
;
20391 case ARM_HARD_FRAME_POINTER_REGNUM
:
20392 return offsets
->saved_regs
- offsets
->saved_args
;
20394 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20395 return offsets
->locals_base
- offsets
->saved_args
;
20398 gcc_unreachable ();
20402 case FRAME_POINTER_REGNUM
:
20405 case STACK_POINTER_REGNUM
:
20406 return offsets
->outgoing_args
- offsets
->soft_frame
;
20408 case ARM_HARD_FRAME_POINTER_REGNUM
:
20409 return offsets
->saved_regs
- offsets
->soft_frame
;
20411 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20412 return offsets
->locals_base
- offsets
->soft_frame
;
20415 gcc_unreachable ();
20420 gcc_unreachable ();
20424 /* Generate the rest of a function's prologue. */
20426 thumb1_expand_prologue (void)
20430 HOST_WIDE_INT amount
;
20431 arm_stack_offsets
*offsets
;
20432 unsigned long func_type
;
20434 unsigned long live_regs_mask
;
20436 func_type
= arm_current_func_type ();
20438 /* Naked functions don't have prologues. */
20439 if (IS_NAKED (func_type
))
20442 if (IS_INTERRUPT (func_type
))
20444 error ("interrupt Service Routines cannot be coded in Thumb mode");
20448 offsets
= arm_get_frame_offsets ();
20449 live_regs_mask
= offsets
->saved_regs_mask
;
20450 /* Load the pic register before setting the frame pointer,
20451 so we can use r7 as a temporary work register. */
20452 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20453 arm_load_pic_register (live_regs_mask
);
20455 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
20456 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
20457 stack_pointer_rtx
);
20459 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20460 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
20465 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20466 GEN_INT (- amount
)));
20467 RTX_FRAME_RELATED_P (insn
) = 1;
20473 /* The stack decrement is too big for an immediate value in a single
20474 insn. In theory we could issue multiple subtracts, but after
20475 three of them it becomes more space efficient to place the full
20476 value in the constant pool and load into a register. (Also the
20477 ARM debugger really likes to see only one stack decrement per
20478 function). So instead we look for a scratch register into which
20479 we can load the decrement, and then we subtract this from the
20480 stack pointer. Unfortunately on the thumb the only available
20481 scratch registers are the argument registers, and we cannot use
20482 these as they may hold arguments to the function. Instead we
20483 attempt to locate a call preserved register which is used by this
20484 function. If we can find one, then we know that it will have
20485 been pushed at the start of the prologue and so we can corrupt
20487 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
20488 if (live_regs_mask
& (1 << regno
))
20491 gcc_assert(regno
<= LAST_LO_REGNUM
);
20493 reg
= gen_rtx_REG (SImode
, regno
);
20495 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
20497 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
20498 stack_pointer_rtx
, reg
));
20499 RTX_FRAME_RELATED_P (insn
) = 1;
20500 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20501 plus_constant (stack_pointer_rtx
,
20503 RTX_FRAME_RELATED_P (dwarf
) = 1;
20504 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20508 if (frame_pointer_needed
)
20509 thumb_set_frame_pointer (offsets
);
20511 /* If we are profiling, make sure no instructions are scheduled before
20512 the call to mcount. Similarly if the user has requested no
20513 scheduling in the prolog. Similarly if we want non-call exceptions
20514 using the EABI unwinder, to prevent faulting instructions from being
20515 swapped with a stack adjustment. */
20516 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
20517 || (ARM_EABI_UNWIND_TABLES
&& cfun
->can_throw_non_call_exceptions
))
20518 emit_insn (gen_blockage ());
20520 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
20521 if (live_regs_mask
& 0xff)
20522 cfun
->machine
->lr_save_eliminated
= 0;
20527 thumb1_expand_epilogue (void)
20529 HOST_WIDE_INT amount
;
20530 arm_stack_offsets
*offsets
;
20533 /* Naked functions don't have prologues. */
20534 if (IS_NAKED (arm_current_func_type ()))
20537 offsets
= arm_get_frame_offsets ();
20538 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20540 if (frame_pointer_needed
)
20542 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
20543 amount
= offsets
->locals_base
- offsets
->saved_regs
;
20545 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
20547 gcc_assert (amount
>= 0);
20551 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20552 GEN_INT (amount
)));
20555 /* r3 is always free in the epilogue. */
20556 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
20558 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
20559 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
20563 /* Emit a USE (stack_pointer_rtx), so that
20564 the stack adjustment will not be deleted. */
20565 emit_insn (gen_prologue_use (stack_pointer_rtx
));
20567 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
20568 emit_insn (gen_blockage ());
20570 /* Emit a clobber for each insn that will be restored in the epilogue,
20571 so that flow2 will get register lifetimes correct. */
20572 for (regno
= 0; regno
< 13; regno
++)
20573 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
20574 emit_clobber (gen_rtx_REG (SImode
, regno
));
20576 if (! df_regs_ever_live_p (LR_REGNUM
))
20577 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
20581 thumb1_output_function_prologue (FILE *f
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
20583 arm_stack_offsets
*offsets
;
20584 unsigned long live_regs_mask
= 0;
20585 unsigned long l_mask
;
20586 unsigned high_regs_pushed
= 0;
20587 int cfa_offset
= 0;
20590 if (IS_NAKED (arm_current_func_type ()))
20593 if (is_called_in_ARM_mode (current_function_decl
))
20597 gcc_assert (GET_CODE (DECL_RTL (current_function_decl
)) == MEM
);
20598 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
20600 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
20602 /* Generate code sequence to switch us into Thumb mode. */
20603 /* The .code 32 directive has already been emitted by
20604 ASM_DECLARE_FUNCTION_NAME. */
20605 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
20606 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
20608 /* Generate a label, so that the debugger will notice the
20609 change in instruction sets. This label is also used by
20610 the assembler to bypass the ARM code when this function
20611 is called from a Thumb encoded function elsewhere in the
20612 same file. Hence the definition of STUB_NAME here must
20613 agree with the definition in gas/config/tc-arm.c. */
20615 #define STUB_NAME ".real_start_of"
20617 fprintf (f
, "\t.code\t16\n");
20619 if (arm_dllexport_name_p (name
))
20620 name
= arm_strip_name_encoding (name
);
20622 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
20623 fprintf (f
, "\t.thumb_func\n");
20624 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
20627 if (crtl
->args
.pretend_args_size
)
20629 /* Output unwind directive for the stack adjustment. */
20630 if (ARM_EABI_UNWIND_TABLES
)
20631 fprintf (f
, "\t.pad #%d\n",
20632 crtl
->args
.pretend_args_size
);
20634 if (cfun
->machine
->uses_anonymous_args
)
20638 fprintf (f
, "\tpush\t{");
20640 num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
20642 for (regno
= LAST_ARG_REGNUM
+ 1 - num_pushes
;
20643 regno
<= LAST_ARG_REGNUM
;
20645 asm_fprintf (f
, "%r%s", regno
,
20646 regno
== LAST_ARG_REGNUM
? "" : ", ");
20648 fprintf (f
, "}\n");
20651 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n",
20652 SP_REGNUM
, SP_REGNUM
,
20653 crtl
->args
.pretend_args_size
);
20655 /* We don't need to record the stores for unwinding (would it
20656 help the debugger any if we did?), but record the change in
20657 the stack pointer. */
20658 if (dwarf2out_do_frame ())
20660 char *l
= dwarf2out_cfi_label (false);
20662 cfa_offset
= cfa_offset
+ crtl
->args
.pretend_args_size
;
20663 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
20667 /* Get the registers we are going to push. */
20668 offsets
= arm_get_frame_offsets ();
20669 live_regs_mask
= offsets
->saved_regs_mask
;
20670 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20671 l_mask
= live_regs_mask
& 0x40ff;
20672 /* Then count how many other high registers will need to be pushed. */
20673 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20675 if (TARGET_BACKTRACE
)
20678 unsigned work_register
;
20680 /* We have been asked to create a stack backtrace structure.
20681 The code looks like this:
20685 0 sub SP, #16 Reserve space for 4 registers.
20686 2 push {R7} Push low registers.
20687 4 add R7, SP, #20 Get the stack pointer before the push.
20688 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20689 8 mov R7, PC Get hold of the start of this code plus 12.
20690 10 str R7, [SP, #16] Store it.
20691 12 mov R7, FP Get hold of the current frame pointer.
20692 14 str R7, [SP, #4] Store it.
20693 16 mov R7, LR Get hold of the current return address.
20694 18 str R7, [SP, #12] Store it.
20695 20 add R7, SP, #16 Point at the start of the backtrace structure.
20696 22 mov FP, R7 Put this value into the frame pointer. */
20698 work_register
= thumb_find_work_register (live_regs_mask
);
20700 if (ARM_EABI_UNWIND_TABLES
)
20701 asm_fprintf (f
, "\t.pad #16\n");
20704 (f
, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20705 SP_REGNUM
, SP_REGNUM
);
20707 if (dwarf2out_do_frame ())
20709 char *l
= dwarf2out_cfi_label (false);
20711 cfa_offset
= cfa_offset
+ 16;
20712 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
20717 thumb_pushpop (f
, l_mask
, 1, &cfa_offset
, l_mask
);
20718 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
20723 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
20724 offset
+ 16 + crtl
->args
.pretend_args_size
);
20726 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
20729 /* Make sure that the instruction fetching the PC is in the right place
20730 to calculate "start of backtrace creation code + 12". */
20733 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
20734 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
20736 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
20737 ARM_HARD_FRAME_POINTER_REGNUM
);
20738 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
20743 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
20744 ARM_HARD_FRAME_POINTER_REGNUM
);
20745 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
20747 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
20748 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
20752 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, LR_REGNUM
);
20753 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
20755 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
20757 asm_fprintf (f
, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
20758 ARM_HARD_FRAME_POINTER_REGNUM
, work_register
);
20760 /* Optimization: If we are not pushing any low registers but we are going
20761 to push some high registers then delay our first push. This will just
20762 be a push of LR and we can combine it with the push of the first high
20764 else if ((l_mask
& 0xff) != 0
20765 || (high_regs_pushed
== 0 && l_mask
))
20767 unsigned long mask
= l_mask
;
20768 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
20769 thumb_pushpop (f
, mask
, 1, &cfa_offset
, mask
);
20772 if (high_regs_pushed
)
20774 unsigned pushable_regs
;
20775 unsigned next_hi_reg
;
20777 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
20778 if (live_regs_mask
& (1 << next_hi_reg
))
20781 pushable_regs
= l_mask
& 0xff;
20783 if (pushable_regs
== 0)
20784 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
20786 while (high_regs_pushed
> 0)
20788 unsigned long real_regs_mask
= 0;
20790 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
20792 if (pushable_regs
& (1 << regno
))
20794 asm_fprintf (f
, "\tmov\t%r, %r\n", regno
, next_hi_reg
);
20796 high_regs_pushed
--;
20797 real_regs_mask
|= (1 << next_hi_reg
);
20799 if (high_regs_pushed
)
20801 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
20803 if (live_regs_mask
& (1 << next_hi_reg
))
20808 pushable_regs
&= ~((1 << regno
) - 1);
20814 /* If we had to find a work register and we have not yet
20815 saved the LR then add it to the list of regs to push. */
20816 if (l_mask
== (1 << LR_REGNUM
))
20818 thumb_pushpop (f
, pushable_regs
| (1 << LR_REGNUM
),
20820 real_regs_mask
| (1 << LR_REGNUM
));
20824 thumb_pushpop (f
, pushable_regs
, 1, &cfa_offset
, real_regs_mask
);
20829 /* Handle the case of a double word load into a low register from
20830 a computed memory address. The computed address may involve a
20831 register which is overwritten by the load. */
20833 thumb_load_double_from_address (rtx
*operands
)
20841 gcc_assert (GET_CODE (operands
[0]) == REG
);
20842 gcc_assert (GET_CODE (operands
[1]) == MEM
);
20844 /* Get the memory address. */
20845 addr
= XEXP (operands
[1], 0);
20847 /* Work out how the memory address is computed. */
20848 switch (GET_CODE (addr
))
20851 operands
[2] = adjust_address (operands
[1], SImode
, 4);
20853 if (REGNO (operands
[0]) == REGNO (addr
))
20855 output_asm_insn ("ldr\t%H0, %2", operands
);
20856 output_asm_insn ("ldr\t%0, %1", operands
);
20860 output_asm_insn ("ldr\t%0, %1", operands
);
20861 output_asm_insn ("ldr\t%H0, %2", operands
);
20866 /* Compute <address> + 4 for the high order load. */
20867 operands
[2] = adjust_address (operands
[1], SImode
, 4);
20869 output_asm_insn ("ldr\t%0, %1", operands
);
20870 output_asm_insn ("ldr\t%H0, %2", operands
);
20874 arg1
= XEXP (addr
, 0);
20875 arg2
= XEXP (addr
, 1);
20877 if (CONSTANT_P (arg1
))
20878 base
= arg2
, offset
= arg1
;
20880 base
= arg1
, offset
= arg2
;
20882 gcc_assert (GET_CODE (base
) == REG
);
20884 /* Catch the case of <address> = <reg> + <reg> */
20885 if (GET_CODE (offset
) == REG
)
20887 int reg_offset
= REGNO (offset
);
20888 int reg_base
= REGNO (base
);
20889 int reg_dest
= REGNO (operands
[0]);
20891 /* Add the base and offset registers together into the
20892 higher destination register. */
20893 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
20894 reg_dest
+ 1, reg_base
, reg_offset
);
20896 /* Load the lower destination register from the address in
20897 the higher destination register. */
20898 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
20899 reg_dest
, reg_dest
+ 1);
20901 /* Load the higher destination register from its own address
20903 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
20904 reg_dest
+ 1, reg_dest
+ 1);
20908 /* Compute <address> + 4 for the high order load. */
20909 operands
[2] = adjust_address (operands
[1], SImode
, 4);
20911 /* If the computed address is held in the low order register
20912 then load the high order register first, otherwise always
20913 load the low order register first. */
20914 if (REGNO (operands
[0]) == REGNO (base
))
20916 output_asm_insn ("ldr\t%H0, %2", operands
);
20917 output_asm_insn ("ldr\t%0, %1", operands
);
20921 output_asm_insn ("ldr\t%0, %1", operands
);
20922 output_asm_insn ("ldr\t%H0, %2", operands
);
20928 /* With no registers to worry about we can just load the value
20930 operands
[2] = adjust_address (operands
[1], SImode
, 4);
20932 output_asm_insn ("ldr\t%H0, %2", operands
);
20933 output_asm_insn ("ldr\t%0, %1", operands
);
20937 gcc_unreachable ();
20944 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
20951 if (REGNO (operands
[4]) > REGNO (operands
[5]))
20954 operands
[4] = operands
[5];
20957 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
20958 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
20962 if (REGNO (operands
[4]) > REGNO (operands
[5]))
20965 operands
[4] = operands
[5];
20968 if (REGNO (operands
[5]) > REGNO (operands
[6]))
20971 operands
[5] = operands
[6];
20974 if (REGNO (operands
[4]) > REGNO (operands
[5]))
20977 operands
[4] = operands
[5];
20981 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
20982 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
20986 gcc_unreachable ();
20992 /* Output a call-via instruction for thumb state. */
20994 thumb_call_via_reg (rtx reg
)
20996 int regno
= REGNO (reg
);
20999 gcc_assert (regno
< LR_REGNUM
);
21001 /* If we are in the normal text section we can use a single instance
21002 per compilation unit. If we are doing function sections, then we need
21003 an entry per section, since we can't rely on reachability. */
21004 if (in_section
== text_section
)
21006 thumb_call_reg_needed
= 1;
21008 if (thumb_call_via_label
[regno
] == NULL
)
21009 thumb_call_via_label
[regno
] = gen_label_rtx ();
21010 labelp
= thumb_call_via_label
+ regno
;
21014 if (cfun
->machine
->call_via
[regno
] == NULL
)
21015 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
21016 labelp
= cfun
->machine
->call_via
+ regno
;
21019 output_asm_insn ("bl\t%a0", labelp
);
21023 /* Routines for generating rtl. */
21025 thumb_expand_movmemqi (rtx
*operands
)
21027 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
21028 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
21029 HOST_WIDE_INT len
= INTVAL (operands
[2]);
21030 HOST_WIDE_INT offset
= 0;
21034 emit_insn (gen_movmem12b (out
, in
, out
, in
));
21040 emit_insn (gen_movmem8b (out
, in
, out
, in
));
21046 rtx reg
= gen_reg_rtx (SImode
);
21047 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
21048 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
21055 rtx reg
= gen_reg_rtx (HImode
);
21056 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
21057 plus_constant (in
, offset
))));
21058 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (out
, offset
)),
21066 rtx reg
= gen_reg_rtx (QImode
);
21067 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
21068 plus_constant (in
, offset
))));
21069 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (out
, offset
)),
21075 thumb_reload_out_hi (rtx
*operands
)
21077 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
21080 /* Handle reading a half-word from memory during reload. */
21082 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
21084 gcc_unreachable ();
21087 /* Return the length of a function name prefix
21088 that starts with the character 'c'. */
21090 arm_get_strip_length (int c
)
21094 ARM_NAME_ENCODING_LENGTHS
21099 /* Return a pointer to a function's name with any
21100 and all prefix encodings stripped from it. */
21102 arm_strip_name_encoding (const char *name
)
21106 while ((skip
= arm_get_strip_length (* name
)))
21112 /* If there is a '*' anywhere in the name's prefix, then
21113 emit the stripped name verbatim, otherwise prepend an
21114 underscore if leading underscores are being used. */
21116 arm_asm_output_labelref (FILE *stream
, const char *name
)
21121 while ((skip
= arm_get_strip_length (* name
)))
21123 verbatim
|= (*name
== '*');
21128 fputs (name
, stream
);
21130 asm_fprintf (stream
, "%U%s", name
);
21134 arm_file_start (void)
21138 if (TARGET_UNIFIED_ASM
)
21139 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
21143 const char *fpu_name
;
21144 if (arm_selected_arch
)
21145 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
21147 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_selected_cpu
->name
);
21149 if (TARGET_SOFT_FLOAT
)
21152 fpu_name
= "softvfp";
21154 fpu_name
= "softfpa";
21158 fpu_name
= arm_fpu_desc
->name
;
21159 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
21161 if (TARGET_HARD_FLOAT
)
21162 asm_fprintf (asm_out_file
, "\t.eabi_attribute 27, 3\n");
21163 if (TARGET_HARD_FLOAT_ABI
)
21164 asm_fprintf (asm_out_file
, "\t.eabi_attribute 28, 1\n");
21167 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
21169 /* Some of these attributes only apply when the corresponding features
21170 are used. However we don't have any easy way of figuring this out.
21171 Conservatively record the setting that would have been used. */
21173 /* Tag_ABI_FP_rounding. */
21174 if (flag_rounding_math
)
21175 asm_fprintf (asm_out_file
, "\t.eabi_attribute 19, 1\n");
21176 if (!flag_unsafe_math_optimizations
)
21178 /* Tag_ABI_FP_denomal. */
21179 asm_fprintf (asm_out_file
, "\t.eabi_attribute 20, 1\n");
21180 /* Tag_ABI_FP_exceptions. */
21181 asm_fprintf (asm_out_file
, "\t.eabi_attribute 21, 1\n");
21183 /* Tag_ABI_FP_user_exceptions. */
21184 if (flag_signaling_nans
)
21185 asm_fprintf (asm_out_file
, "\t.eabi_attribute 22, 1\n");
21186 /* Tag_ABI_FP_number_model. */
21187 asm_fprintf (asm_out_file
, "\t.eabi_attribute 23, %d\n",
21188 flag_finite_math_only
? 1 : 3);
21190 /* Tag_ABI_align8_needed. */
21191 asm_fprintf (asm_out_file
, "\t.eabi_attribute 24, 1\n");
21192 /* Tag_ABI_align8_preserved. */
21193 asm_fprintf (asm_out_file
, "\t.eabi_attribute 25, 1\n");
21194 /* Tag_ABI_enum_size. */
21195 asm_fprintf (asm_out_file
, "\t.eabi_attribute 26, %d\n",
21196 flag_short_enums
? 1 : 2);
21198 /* Tag_ABI_optimization_goals. */
21201 else if (optimize
>= 2)
21207 asm_fprintf (asm_out_file
, "\t.eabi_attribute 30, %d\n", val
);
21209 /* Tag_ABI_FP_16bit_format. */
21210 if (arm_fp16_format
)
21211 asm_fprintf (asm_out_file
, "\t.eabi_attribute 38, %d\n",
21212 (int)arm_fp16_format
);
21214 if (arm_lang_output_object_attributes_hook
)
21215 arm_lang_output_object_attributes_hook();
21217 default_file_start();
21221 arm_file_end (void)
21225 if (NEED_INDICATE_EXEC_STACK
)
21226 /* Add .note.GNU-stack. */
21227 file_end_indicate_exec_stack ();
21229 if (! thumb_call_reg_needed
)
21232 switch_to_section (text_section
);
21233 asm_fprintf (asm_out_file
, "\t.code 16\n");
21234 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
21236 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21238 rtx label
= thumb_call_via_label
[regno
];
21242 targetm
.asm_out
.internal_label (asm_out_file
, "L",
21243 CODE_LABEL_NUMBER (label
));
21244 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
21250 /* Symbols in the text segment can be accessed without indirecting via the
21251 constant pool; it may take an extra binary operation, but this is still
21252 faster than indirecting via memory. Don't do this when not optimizing,
21253 since we won't be calculating al of the offsets necessary to do this
21257 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
21259 if (optimize
> 0 && TREE_CONSTANT (decl
))
21260 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
21262 default_encode_section_info (decl
, rtl
, first
);
21264 #endif /* !ARM_PE */
21267 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
21269 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
21270 && !strcmp (prefix
, "L"))
21272 arm_ccfsm_state
= 0;
21273 arm_target_insn
= NULL
;
21275 default_internal_label (stream
, prefix
, labelno
);
21278 /* Output code to add DELTA to the first argument, and then jump
21279 to FUNCTION. Used for C++ multiple inheritance. */
21281 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
21282 HOST_WIDE_INT delta
,
21283 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
21286 static int thunk_label
= 0;
21289 int mi_delta
= delta
;
21290 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
21292 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
21295 mi_delta
= - mi_delta
;
21299 int labelno
= thunk_label
++;
21300 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
21301 /* Thunks are entered in arm mode when avaiable. */
21302 if (TARGET_THUMB1_ONLY
)
21304 /* push r3 so we can use it as a temporary. */
21305 /* TODO: Omit this save if r3 is not used. */
21306 fputs ("\tpush {r3}\n", file
);
21307 fputs ("\tldr\tr3, ", file
);
21311 fputs ("\tldr\tr12, ", file
);
21313 assemble_name (file
, label
);
21314 fputc ('\n', file
);
21317 /* If we are generating PIC, the ldr instruction below loads
21318 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21319 the address of the add + 8, so we have:
21321 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21324 Note that we have "+ 1" because some versions of GNU ld
21325 don't set the low bit of the result for R_ARM_REL32
21326 relocations against thumb function symbols.
21327 On ARMv6M this is +4, not +8. */
21328 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
21329 assemble_name (file
, labelpc
);
21330 fputs (":\n", file
);
21331 if (TARGET_THUMB1_ONLY
)
21333 /* This is 2 insns after the start of the thunk, so we know it
21334 is 4-byte aligned. */
21335 fputs ("\tadd\tr3, pc, r3\n", file
);
21336 fputs ("\tmov r12, r3\n", file
);
21339 fputs ("\tadd\tr12, pc, r12\n", file
);
21341 else if (TARGET_THUMB1_ONLY
)
21342 fputs ("\tmov r12, r3\n", file
);
21344 if (TARGET_THUMB1_ONLY
)
21346 if (mi_delta
> 255)
21348 fputs ("\tldr\tr3, ", file
);
21349 assemble_name (file
, label
);
21350 fputs ("+4\n", file
);
21351 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
21352 mi_op
, this_regno
, this_regno
);
21354 else if (mi_delta
!= 0)
21356 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
21357 mi_op
, this_regno
, this_regno
,
21363 /* TODO: Use movw/movt for large constants when available. */
21364 while (mi_delta
!= 0)
21366 if ((mi_delta
& (3 << shift
)) == 0)
21370 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
21371 mi_op
, this_regno
, this_regno
,
21372 mi_delta
& (0xff << shift
));
21373 mi_delta
&= ~(0xff << shift
);
21380 if (TARGET_THUMB1_ONLY
)
21381 fputs ("\tpop\t{r3}\n", file
);
21383 fprintf (file
, "\tbx\tr12\n");
21384 ASM_OUTPUT_ALIGN (file
, 2);
21385 assemble_name (file
, label
);
21386 fputs (":\n", file
);
21389 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21390 rtx tem
= XEXP (DECL_RTL (function
), 0);
21391 tem
= gen_rtx_PLUS (GET_MODE (tem
), tem
, GEN_INT (-7));
21392 tem
= gen_rtx_MINUS (GET_MODE (tem
),
21394 gen_rtx_SYMBOL_REF (Pmode
,
21395 ggc_strdup (labelpc
)));
21396 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
21399 /* Output ".word .LTHUNKn". */
21400 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
21402 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
21403 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
21407 fputs ("\tb\t", file
);
21408 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
21409 if (NEED_PLT_RELOC
)
21410 fputs ("(PLT)", file
);
21411 fputc ('\n', file
);
21416 arm_emit_vector_const (FILE *file
, rtx x
)
21419 const char * pattern
;
21421 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21423 switch (GET_MODE (x
))
21425 case V2SImode
: pattern
= "%08x"; break;
21426 case V4HImode
: pattern
= "%04x"; break;
21427 case V8QImode
: pattern
= "%02x"; break;
21428 default: gcc_unreachable ();
21431 fprintf (file
, "0x");
21432 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
21436 element
= CONST_VECTOR_ELT (x
, i
);
21437 fprintf (file
, pattern
, INTVAL (element
));
21443 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21444 HFmode constant pool entries are actually loaded with ldr. */
21446 arm_emit_fp16_const (rtx c
)
21451 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
21452 bits
= real_to_target (NULL
, &r
, HFmode
);
21453 if (WORDS_BIG_ENDIAN
)
21454 assemble_zeros (2);
21455 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
21456 if (!WORDS_BIG_ENDIAN
)
21457 assemble_zeros (2);
21461 arm_output_load_gr (rtx
*operands
)
21468 if (GET_CODE (operands
[1]) != MEM
21469 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
21470 || GET_CODE (reg
= XEXP (sum
, 0)) != REG
21471 || GET_CODE (offset
= XEXP (sum
, 1)) != CONST_INT
21472 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
21473 return "wldrw%?\t%0, %1";
21475 /* Fix up an out-of-range load of a GR register. */
21476 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
21477 wcgr
= operands
[0];
21479 output_asm_insn ("ldr%?\t%0, %1", operands
);
21481 operands
[0] = wcgr
;
21483 output_asm_insn ("tmcr%?\t%0, %1", operands
);
21484 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
21489 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21491 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21492 named arg and all anonymous args onto the stack.
21493 XXX I know the prologue shouldn't be pushing registers, but it is faster
21497 arm_setup_incoming_varargs (CUMULATIVE_ARGS
*pcum
,
21498 enum machine_mode mode
,
21501 int second_time ATTRIBUTE_UNUSED
)
21505 cfun
->machine
->uses_anonymous_args
= 1;
21506 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
21508 nregs
= pcum
->aapcs_ncrn
;
21509 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
21513 nregs
= pcum
->nregs
;
21515 if (nregs
< NUM_ARG_REGS
)
21516 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
21519 /* Return nonzero if the CONSUMER instruction (a store) does not need
21520 PRODUCER's value to calculate the address. */
21523 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
21525 rtx value
= PATTERN (producer
);
21526 rtx addr
= PATTERN (consumer
);
21528 if (GET_CODE (value
) == COND_EXEC
)
21529 value
= COND_EXEC_CODE (value
);
21530 if (GET_CODE (value
) == PARALLEL
)
21531 value
= XVECEXP (value
, 0, 0);
21532 value
= XEXP (value
, 0);
21533 if (GET_CODE (addr
) == COND_EXEC
)
21534 addr
= COND_EXEC_CODE (addr
);
21535 if (GET_CODE (addr
) == PARALLEL
)
21536 addr
= XVECEXP (addr
, 0, 0);
21537 addr
= XEXP (addr
, 0);
21539 return !reg_overlap_mentioned_p (value
, addr
);
21542 /* Return nonzero if the CONSUMER instruction (a store) does need
21543 PRODUCER's value to calculate the address. */
21546 arm_early_store_addr_dep (rtx producer
, rtx consumer
)
21548 return !arm_no_early_store_addr_dep (producer
, consumer
);
21551 /* Return nonzero if the CONSUMER instruction (a load) does need
21552 PRODUCER's value to calculate the address. */
21555 arm_early_load_addr_dep (rtx producer
, rtx consumer
)
21557 rtx value
= PATTERN (producer
);
21558 rtx addr
= PATTERN (consumer
);
21560 if (GET_CODE (value
) == COND_EXEC
)
21561 value
= COND_EXEC_CODE (value
);
21562 if (GET_CODE (value
) == PARALLEL
)
21563 value
= XVECEXP (value
, 0, 0);
21564 value
= XEXP (value
, 0);
21565 if (GET_CODE (addr
) == COND_EXEC
)
21566 addr
= COND_EXEC_CODE (addr
);
21567 if (GET_CODE (addr
) == PARALLEL
)
21568 addr
= XVECEXP (addr
, 0, 0);
21569 addr
= XEXP (addr
, 1);
21571 return reg_overlap_mentioned_p (value
, addr
);
21574 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21575 have an early register shift value or amount dependency on the
21576 result of PRODUCER. */
21579 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
21581 rtx value
= PATTERN (producer
);
21582 rtx op
= PATTERN (consumer
);
21585 if (GET_CODE (value
) == COND_EXEC
)
21586 value
= COND_EXEC_CODE (value
);
21587 if (GET_CODE (value
) == PARALLEL
)
21588 value
= XVECEXP (value
, 0, 0);
21589 value
= XEXP (value
, 0);
21590 if (GET_CODE (op
) == COND_EXEC
)
21591 op
= COND_EXEC_CODE (op
);
21592 if (GET_CODE (op
) == PARALLEL
)
21593 op
= XVECEXP (op
, 0, 0);
21596 early_op
= XEXP (op
, 0);
21597 /* This is either an actual independent shift, or a shift applied to
21598 the first operand of another operation. We want the whole shift
21600 if (GET_CODE (early_op
) == REG
)
21603 return !reg_overlap_mentioned_p (value
, early_op
);
21606 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21607 have an early register shift value dependency on the result of
21611 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
21613 rtx value
= PATTERN (producer
);
21614 rtx op
= PATTERN (consumer
);
21617 if (GET_CODE (value
) == COND_EXEC
)
21618 value
= COND_EXEC_CODE (value
);
21619 if (GET_CODE (value
) == PARALLEL
)
21620 value
= XVECEXP (value
, 0, 0);
21621 value
= XEXP (value
, 0);
21622 if (GET_CODE (op
) == COND_EXEC
)
21623 op
= COND_EXEC_CODE (op
);
21624 if (GET_CODE (op
) == PARALLEL
)
21625 op
= XVECEXP (op
, 0, 0);
21628 early_op
= XEXP (op
, 0);
21630 /* This is either an actual independent shift, or a shift applied to
21631 the first operand of another operation. We want the value being
21632 shifted, in either case. */
21633 if (GET_CODE (early_op
) != REG
)
21634 early_op
= XEXP (early_op
, 0);
21636 return !reg_overlap_mentioned_p (value
, early_op
);
21639 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21640 have an early register mult dependency on the result of
21644 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
21646 rtx value
= PATTERN (producer
);
21647 rtx op
= PATTERN (consumer
);
21649 if (GET_CODE (value
) == COND_EXEC
)
21650 value
= COND_EXEC_CODE (value
);
21651 if (GET_CODE (value
) == PARALLEL
)
21652 value
= XVECEXP (value
, 0, 0);
21653 value
= XEXP (value
, 0);
21654 if (GET_CODE (op
) == COND_EXEC
)
21655 op
= COND_EXEC_CODE (op
);
21656 if (GET_CODE (op
) == PARALLEL
)
21657 op
= XVECEXP (op
, 0, 0);
21660 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
21662 if (GET_CODE (XEXP (op
, 0)) == MULT
)
21663 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
21665 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
21671 /* We can't rely on the caller doing the proper promotion when
21672 using APCS or ATPCS. */
21675 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
21677 return !TARGET_AAPCS_BASED
;
21680 static enum machine_mode
21681 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
21682 enum machine_mode mode
,
21683 int *punsignedp ATTRIBUTE_UNUSED
,
21684 const_tree fntype ATTRIBUTE_UNUSED
,
21685 int for_return ATTRIBUTE_UNUSED
)
21687 if (GET_MODE_CLASS (mode
) == MODE_INT
21688 && GET_MODE_SIZE (mode
) < 4)
21694 /* AAPCS based ABIs use short enums by default. */
21697 arm_default_short_enums (void)
21699 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
21703 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21706 arm_align_anon_bitfield (void)
21708 return TARGET_AAPCS_BASED
;
21712 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21715 arm_cxx_guard_type (void)
21717 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
21720 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21721 has an accumulator dependency on the result of the producer (a
21722 multiplication instruction) and no other dependency on that result. */
21724 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
21726 rtx mul
= PATTERN (producer
);
21727 rtx mac
= PATTERN (consumer
);
21729 rtx mac_op0
, mac_op1
, mac_acc
;
21731 if (GET_CODE (mul
) == COND_EXEC
)
21732 mul
= COND_EXEC_CODE (mul
);
21733 if (GET_CODE (mac
) == COND_EXEC
)
21734 mac
= COND_EXEC_CODE (mac
);
21736 /* Check that mul is of the form (set (...) (mult ...))
21737 and mla is of the form (set (...) (plus (mult ...) (...))). */
21738 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
21739 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
21740 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
21743 mul_result
= XEXP (mul
, 0);
21744 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
21745 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
21746 mac_acc
= XEXP (XEXP (mac
, 1), 1);
21748 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
21749 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
21750 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));
21754 /* The EABI says test the least significant bit of a guard variable. */
21757 arm_cxx_guard_mask_bit (void)
21759 return TARGET_AAPCS_BASED
;
21763 /* The EABI specifies that all array cookies are 8 bytes long. */
21766 arm_get_cookie_size (tree type
)
21770 if (!TARGET_AAPCS_BASED
)
21771 return default_cxx_get_cookie_size (type
);
21773 size
= build_int_cst (sizetype
, 8);
21778 /* The EABI says that array cookies should also contain the element size. */
21781 arm_cookie_has_size (void)
21783 return TARGET_AAPCS_BASED
;
21787 /* The EABI says constructors and destructors should return a pointer to
21788 the object constructed/destroyed. */
21791 arm_cxx_cdtor_returns_this (void)
21793 return TARGET_AAPCS_BASED
;
21796 /* The EABI says that an inline function may never be the key
21800 arm_cxx_key_method_may_be_inline (void)
21802 return !TARGET_AAPCS_BASED
;
21806 arm_cxx_determine_class_data_visibility (tree decl
)
21808 if (!TARGET_AAPCS_BASED
21809 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
21812 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
21813 is exported. However, on systems without dynamic vague linkage,
21814 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
21815 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
21816 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
21818 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
21819 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
21823 arm_cxx_class_data_always_comdat (void)
21825 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
21826 vague linkage if the class has no key function. */
21827 return !TARGET_AAPCS_BASED
;
21831 /* The EABI says __aeabi_atexit should be used to register static
21835 arm_cxx_use_aeabi_atexit (void)
21837 return TARGET_AAPCS_BASED
;
21842 arm_set_return_address (rtx source
, rtx scratch
)
21844 arm_stack_offsets
*offsets
;
21845 HOST_WIDE_INT delta
;
21847 unsigned long saved_regs
;
21849 offsets
= arm_get_frame_offsets ();
21850 saved_regs
= offsets
->saved_regs_mask
;
21852 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
21853 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
21856 if (frame_pointer_needed
)
21857 addr
= plus_constant(hard_frame_pointer_rtx
, -4);
21860 /* LR will be the first saved register. */
21861 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
21866 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
21867 GEN_INT (delta
& ~4095)));
21872 addr
= stack_pointer_rtx
;
21874 addr
= plus_constant (addr
, delta
);
21876 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
21882 thumb_set_return_address (rtx source
, rtx scratch
)
21884 arm_stack_offsets
*offsets
;
21885 HOST_WIDE_INT delta
;
21886 HOST_WIDE_INT limit
;
21889 unsigned long mask
;
21893 offsets
= arm_get_frame_offsets ();
21894 mask
= offsets
->saved_regs_mask
;
21895 if (mask
& (1 << LR_REGNUM
))
21898 /* Find the saved regs. */
21899 if (frame_pointer_needed
)
21901 delta
= offsets
->soft_frame
- offsets
->saved_args
;
21902 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
21908 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
21911 /* Allow for the stack frame. */
21912 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
21914 /* The link register is always the first saved register. */
21917 /* Construct the address. */
21918 addr
= gen_rtx_REG (SImode
, reg
);
21921 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
21922 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
21926 addr
= plus_constant (addr
, delta
);
21928 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
21931 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
21934 /* Implements target hook vector_mode_supported_p. */
21936 arm_vector_mode_supported_p (enum machine_mode mode
)
21938 /* Neon also supports V2SImode, etc. listed in the clause below. */
21939 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
21940 || mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
21943 if ((TARGET_NEON
|| TARGET_IWMMXT
)
21944 && ((mode
== V2SImode
)
21945 || (mode
== V4HImode
)
21946 || (mode
== V8QImode
)))
21952 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
21953 registers when autovectorizing for Neon, at least until multiple vector
21954 widths are supported properly by the middle-end. */
21956 static unsigned int
21957 arm_units_per_simd_word (enum machine_mode mode ATTRIBUTE_UNUSED
)
21959 return (TARGET_NEON
21960 ? (TARGET_NEON_VECTORIZE_QUAD
? 16 : 8) : UNITS_PER_WORD
);
21963 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
21965 We need to define this for LO_REGS on thumb. Otherwise we can end up
21966 using r0-r4 for function arguments, r7 for the stack frame and don't
21967 have enough left over to do doubleword arithmetic. */
21970 arm_class_likely_spilled_p (reg_class_t rclass
)
21972 if ((TARGET_THUMB
&& rclass
== LO_REGS
)
21973 || rclass
== CC_REG
)
21979 /* Implements target hook small_register_classes_for_mode_p. */
21981 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
21983 return TARGET_THUMB1
;
21986 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
21987 ARM insns and therefore guarantee that the shift count is modulo 256.
21988 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
21989 guarantee no particular behavior for out-of-range counts. */
21991 static unsigned HOST_WIDE_INT
21992 arm_shift_truncation_mask (enum machine_mode mode
)
21994 return mode
== SImode
? 255 : 0;
21998 /* Map internal gcc register numbers to DWARF2 register numbers. */
22001 arm_dbx_register_number (unsigned int regno
)
22006 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22007 compatibility. The EABI defines them as registers 96-103. */
22008 if (IS_FPA_REGNUM (regno
))
22009 return (TARGET_AAPCS_BASED
? 96 : 16) + regno
- FIRST_FPA_REGNUM
;
22011 if (IS_VFP_REGNUM (regno
))
22013 /* See comment in arm_dwarf_register_span. */
22014 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
22015 return 64 + regno
- FIRST_VFP_REGNUM
;
22017 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
22020 if (IS_IWMMXT_GR_REGNUM (regno
))
22021 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
22023 if (IS_IWMMXT_REGNUM (regno
))
22024 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
22026 gcc_unreachable ();
22029 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22030 GCC models tham as 64 32-bit registers, so we need to describe this to
22031 the DWARF generation code. Other registers can use the default. */
22033 arm_dwarf_register_span (rtx rtl
)
22040 regno
= REGNO (rtl
);
22041 if (!IS_VFP_REGNUM (regno
))
22044 /* XXX FIXME: The EABI defines two VFP register ranges:
22045 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22047 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22048 corresponding D register. Until GDB supports this, we shall use the
22049 legacy encodings. We also use these encodings for D0-D15 for
22050 compatibility with older debuggers. */
22051 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
22054 nregs
= GET_MODE_SIZE (GET_MODE (rtl
)) / 8;
22055 p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nregs
));
22056 regno
= (regno
- FIRST_VFP_REGNUM
) / 2;
22057 for (i
= 0; i
< nregs
; i
++)
22058 XVECEXP (p
, 0, i
) = gen_rtx_REG (DImode
, 256 + regno
+ i
);
22063 #ifdef TARGET_UNWIND_INFO
22064 /* Emit unwind directives for a store-multiple instruction or stack pointer
22065 push during alignment.
22066 These should only ever be generated by the function prologue code, so
22067 expect them to have a particular form. */
22070 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
22073 HOST_WIDE_INT offset
;
22074 HOST_WIDE_INT nregs
;
22080 e
= XVECEXP (p
, 0, 0);
22081 if (GET_CODE (e
) != SET
)
22084 /* First insn will adjust the stack pointer. */
22085 if (GET_CODE (e
) != SET
22086 || GET_CODE (XEXP (e
, 0)) != REG
22087 || REGNO (XEXP (e
, 0)) != SP_REGNUM
22088 || GET_CODE (XEXP (e
, 1)) != PLUS
)
22091 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
22092 nregs
= XVECLEN (p
, 0) - 1;
22094 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
22097 /* The function prologue may also push pc, but not annotate it as it is
22098 never restored. We turn this into a stack pointer adjustment. */
22099 if (nregs
* 4 == offset
- 4)
22101 fprintf (asm_out_file
, "\t.pad #4\n");
22105 fprintf (asm_out_file
, "\t.save {");
22107 else if (IS_VFP_REGNUM (reg
))
22110 fprintf (asm_out_file
, "\t.vsave {");
22112 else if (reg
>= FIRST_FPA_REGNUM
&& reg
<= LAST_FPA_REGNUM
)
22114 /* FPA registers are done differently. */
22115 asm_fprintf (asm_out_file
, "\t.save %r, %wd\n", reg
, nregs
);
22119 /* Unknown register type. */
22122 /* If the stack increment doesn't match the size of the saved registers,
22123 something has gone horribly wrong. */
22124 if (offset
!= nregs
* reg_size
)
22129 /* The remaining insns will describe the stores. */
22130 for (i
= 1; i
<= nregs
; i
++)
22132 /* Expect (set (mem <addr>) (reg)).
22133 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22134 e
= XVECEXP (p
, 0, i
);
22135 if (GET_CODE (e
) != SET
22136 || GET_CODE (XEXP (e
, 0)) != MEM
22137 || GET_CODE (XEXP (e
, 1)) != REG
)
22140 reg
= REGNO (XEXP (e
, 1));
22145 fprintf (asm_out_file
, ", ");
22146 /* We can't use %r for vfp because we need to use the
22147 double precision register names. */
22148 if (IS_VFP_REGNUM (reg
))
22149 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
22151 asm_fprintf (asm_out_file
, "%r", reg
);
22153 #ifdef ENABLE_CHECKING
22154 /* Check that the addresses are consecutive. */
22155 e
= XEXP (XEXP (e
, 0), 0);
22156 if (GET_CODE (e
) == PLUS
)
22158 offset
+= reg_size
;
22159 if (GET_CODE (XEXP (e
, 0)) != REG
22160 || REGNO (XEXP (e
, 0)) != SP_REGNUM
22161 || GET_CODE (XEXP (e
, 1)) != CONST_INT
22162 || offset
!= INTVAL (XEXP (e
, 1)))
22166 || GET_CODE (e
) != REG
22167 || REGNO (e
) != SP_REGNUM
)
22171 fprintf (asm_out_file
, "}\n");
22174 /* Emit unwind directives for a SET. */
22177 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
22185 switch (GET_CODE (e0
))
22188 /* Pushing a single register. */
22189 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
22190 || GET_CODE (XEXP (XEXP (e0
, 0), 0)) != REG
22191 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
22194 asm_fprintf (asm_out_file
, "\t.save ");
22195 if (IS_VFP_REGNUM (REGNO (e1
)))
22196 asm_fprintf(asm_out_file
, "{d%d}\n",
22197 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
22199 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
22203 if (REGNO (e0
) == SP_REGNUM
)
22205 /* A stack increment. */
22206 if (GET_CODE (e1
) != PLUS
22207 || GET_CODE (XEXP (e1
, 0)) != REG
22208 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
22209 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
22212 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
22213 -INTVAL (XEXP (e1
, 1)));
22215 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
22217 HOST_WIDE_INT offset
;
22219 if (GET_CODE (e1
) == PLUS
)
22221 if (GET_CODE (XEXP (e1
, 0)) != REG
22222 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
22224 reg
= REGNO (XEXP (e1
, 0));
22225 offset
= INTVAL (XEXP (e1
, 1));
22226 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
22227 HARD_FRAME_POINTER_REGNUM
, reg
,
22230 else if (GET_CODE (e1
) == REG
)
22233 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
22234 HARD_FRAME_POINTER_REGNUM
, reg
);
22239 else if (GET_CODE (e1
) == REG
&& REGNO (e1
) == SP_REGNUM
)
22241 /* Move from sp to reg. */
22242 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
22244 else if (GET_CODE (e1
) == PLUS
22245 && GET_CODE (XEXP (e1
, 0)) == REG
22246 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
22247 && GET_CODE (XEXP (e1
, 1)) == CONST_INT
)
22249 /* Set reg to offset from sp. */
22250 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
22251 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
22253 else if (GET_CODE (e1
) == UNSPEC
&& XINT (e1
, 1) == UNSPEC_STACK_ALIGN
)
22255 /* Stack pointer save before alignment. */
22257 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22270 /* Emit unwind directives for the given insn. */
22273 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
22277 if (!ARM_EABI_UNWIND_TABLES
)
22280 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
22281 && (TREE_NOTHROW (current_function_decl
)
22282 || crtl
->all_throwers_are_sibcalls
))
22285 if (GET_CODE (insn
) == NOTE
|| !RTX_FRAME_RELATED_P (insn
))
22288 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
22290 pat
= XEXP (pat
, 0);
22292 pat
= PATTERN (insn
);
22294 switch (GET_CODE (pat
))
22297 arm_unwind_emit_set (asm_out_file
, pat
);
22301 /* Store multiple. */
22302 arm_unwind_emit_sequence (asm_out_file
, pat
);
22311 /* Output a reference from a function exception table to the type_info
22312 object X. The EABI specifies that the symbol should be relocated by
22313 an R_ARM_TARGET2 relocation. */
22316 arm_output_ttype (rtx x
)
22318 fputs ("\t.word\t", asm_out_file
);
22319 output_addr_const (asm_out_file
, x
);
22320 /* Use special relocations for symbol references. */
22321 if (GET_CODE (x
) != CONST_INT
)
22322 fputs ("(TARGET2)", asm_out_file
);
22323 fputc ('\n', asm_out_file
);
22328 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22331 arm_asm_emit_except_personality (rtx personality
)
22333 fputs ("\t.personality\t", asm_out_file
);
22334 output_addr_const (asm_out_file
, personality
);
22335 fputc ('\n', asm_out_file
);
22338 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22341 arm_asm_init_sections (void)
22343 exception_section
= get_unnamed_section (0, output_section_asm_op
,
22346 #endif /* TARGET_UNWIND_INFO */
22349 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22350 stack alignment. */
22353 arm_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
22355 rtx unspec
= SET_SRC (pattern
);
22356 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
22360 case UNSPEC_STACK_ALIGN
:
22361 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22362 put anything on the stack, so hopefully it won't matter.
22363 CFA = SP will be correct after alignment. */
22364 dwarf2out_reg_save_reg (label
, stack_pointer_rtx
,
22365 SET_DEST (pattern
));
22368 gcc_unreachable ();
22373 /* Output unwind directives for the start/end of a function. */
22376 arm_output_fn_unwind (FILE * f
, bool prologue
)
22378 if (!ARM_EABI_UNWIND_TABLES
)
22382 fputs ("\t.fnstart\n", f
);
22385 /* If this function will never be unwound, then mark it as such.
22386 The came condition is used in arm_unwind_emit to suppress
22387 the frame annotations. */
22388 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
22389 && (TREE_NOTHROW (current_function_decl
)
22390 || crtl
->all_throwers_are_sibcalls
))
22391 fputs("\t.cantunwind\n", f
);
22393 fputs ("\t.fnend\n", f
);
22398 arm_emit_tls_decoration (FILE *fp
, rtx x
)
22400 enum tls_reloc reloc
;
22403 val
= XVECEXP (x
, 0, 0);
22404 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
22406 output_addr_const (fp
, val
);
22411 fputs ("(tlsgd)", fp
);
22414 fputs ("(tlsldm)", fp
);
22417 fputs ("(tlsldo)", fp
);
22420 fputs ("(gottpoff)", fp
);
22423 fputs ("(tpoff)", fp
);
22426 gcc_unreachable ();
22434 fputs (" + (. - ", fp
);
22435 output_addr_const (fp
, XVECEXP (x
, 0, 2));
22437 output_addr_const (fp
, XVECEXP (x
, 0, 3));
22447 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22450 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
22452 gcc_assert (size
== 4);
22453 fputs ("\t.word\t", file
);
22454 output_addr_const (file
, x
);
22455 fputs ("(tlsldo)", file
);
22458 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22461 arm_output_addr_const_extra (FILE *fp
, rtx x
)
22463 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
22464 return arm_emit_tls_decoration (fp
, x
);
22465 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
22468 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
22470 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
22471 assemble_name_raw (fp
, label
);
22475 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
22477 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
22481 output_addr_const (fp
, XVECEXP (x
, 0, 0));
22485 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
22487 output_addr_const (fp
, XVECEXP (x
, 0, 0));
22491 output_addr_const (fp
, XVECEXP (x
, 0, 1));
22495 else if (GET_CODE (x
) == CONST_VECTOR
)
22496 return arm_emit_vector_const (fp
, x
);
22501 /* Output assembly for a shift instruction.
22502 SET_FLAGS determines how the instruction modifies the condition codes.
22503 0 - Do not set condition codes.
22504 1 - Set condition codes.
22505 2 - Use smallest instruction. */
22507 arm_output_shift(rtx
* operands
, int set_flags
)
22510 static const char flag_chars
[3] = {'?', '.', '!'};
22515 c
= flag_chars
[set_flags
];
22516 if (TARGET_UNIFIED_ASM
)
22518 shift
= shift_op(operands
[3], &val
);
22522 operands
[2] = GEN_INT(val
);
22523 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
22526 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
22529 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
22530 output_asm_insn (pattern
, operands
);
22534 /* Output a Thumb-1 casesi dispatch sequence. */
22536 thumb1_output_casesi (rtx
*operands
)
22538 rtx diff_vec
= PATTERN (next_real_insn (operands
[0]));
22540 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
22542 switch (GET_MODE(diff_vec
))
22545 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
22546 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22548 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
22549 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22551 return "bl\t%___gnu_thumb1_case_si";
22553 gcc_unreachable ();
22557 /* Output a Thumb-2 casesi instruction. */
22559 thumb2_output_casesi (rtx
*operands
)
22561 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
22563 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
22565 output_asm_insn ("cmp\t%0, %1", operands
);
22566 output_asm_insn ("bhi\t%l3", operands
);
22567 switch (GET_MODE(diff_vec
))
22570 return "tbb\t[%|pc, %0]";
22572 return "tbh\t[%|pc, %0, lsl #1]";
22576 output_asm_insn ("adr\t%4, %l2", operands
);
22577 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
22578 output_asm_insn ("add\t%4, %4, %5", operands
);
22583 output_asm_insn ("adr\t%4, %l2", operands
);
22584 return "ldr\t%|pc, [%4, %0, lsl #2]";
22587 gcc_unreachable ();
22591 /* Most ARM cores are single issue, but some newer ones can dual issue.
22592 The scheduler descriptions rely on this being correct. */
22594 arm_issue_rate (void)
22610 /* A table and a function to perform ARM-specific name mangling for
22611 NEON vector types in order to conform to the AAPCS (see "Procedure
22612 Call Standard for the ARM Architecture", Appendix A). To qualify
22613 for emission with the mangled names defined in that document, a
22614 vector type must not only be of the correct mode but also be
22615 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22618 enum machine_mode mode
;
22619 const char *element_type_name
;
22620 const char *aapcs_name
;
22621 } arm_mangle_map_entry
;
22623 static arm_mangle_map_entry arm_mangle_map
[] = {
22624 /* 64-bit containerized types. */
22625 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
22626 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22627 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
22628 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22629 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
22630 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
22631 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
22632 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22633 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22634 /* 128-bit containerized types. */
22635 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
22636 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22637 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
22638 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22639 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
22640 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
22641 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
22642 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22643 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22644 { VOIDmode
, NULL
, NULL
}
22648 arm_mangle_type (const_tree type
)
22650 arm_mangle_map_entry
*pos
= arm_mangle_map
;
22652 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22653 has to be managled as if it is in the "std" namespace. */
22654 if (TARGET_AAPCS_BASED
22655 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
22657 static bool warned
;
22658 if (!warned
&& warn_psabi
&& !in_system_header
)
22661 inform (input_location
,
22662 "the mangling of %<va_list%> has changed in GCC 4.4");
22664 return "St9__va_list";
22667 /* Half-precision float. */
22668 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
22671 if (TREE_CODE (type
) != VECTOR_TYPE
)
22674 /* Check the mode of the vector type, and the name of the vector
22675 element type, against the table. */
22676 while (pos
->mode
!= VOIDmode
)
22678 tree elt_type
= TREE_TYPE (type
);
22680 if (pos
->mode
== TYPE_MODE (type
)
22681 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
22682 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
22683 pos
->element_type_name
))
22684 return pos
->aapcs_name
;
22689 /* Use the default mangling for unrecognized (possibly user-defined)
22694 /* Order of allocation of core registers for Thumb: this allocation is
22695 written over the corresponding initial entries of the array
22696 initialized with REG_ALLOC_ORDER. We allocate all low registers
22697 first. Saving and restoring a low register is usually cheaper than
22698 using a call-clobbered high register. */
22700 static const int thumb_core_reg_alloc_order
[] =
22702 3, 2, 1, 0, 4, 5, 6, 7,
22703 14, 12, 8, 9, 10, 11, 13, 15
22706 /* Adjust register allocation order when compiling for Thumb. */
22709 arm_order_regs_for_local_alloc (void)
22711 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
22712 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
22714 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
22715 sizeof (thumb_core_reg_alloc_order
));
22718 /* Set default optimization options. */
22720 arm_option_optimization (int level
, int size ATTRIBUTE_UNUSED
)
22722 /* Enable section anchors by default at -O1 or higher.
22723 Use 2 to distinguish from an explicit -fsection-anchors
22724 given on the command line. */
22726 flag_section_anchors
= 2;
22729 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
22732 arm_frame_pointer_required (void)
22734 return (cfun
->has_nonlocal_label
22735 || SUBTARGET_FRAME_POINTER_REQUIRED
22736 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
22739 /* Only thumb1 can't support conditional execution, so return true if
22740 the target is not thumb1. */
22742 arm_have_conditional_execution (void)
22744 return !TARGET_THUMB1
;
22747 /* Legitimize a memory reference for sync primitive implemented using
22748 ldrex / strex. We currently force the form of the reference to be
22749 indirect without offset. We do not yet support the indirect offset
22750 addressing supported by some ARM targets for these
22753 arm_legitimize_sync_memory (rtx memory
)
22755 rtx addr
= force_reg (Pmode
, XEXP (memory
, 0));
22756 rtx legitimate_memory
= gen_rtx_MEM (GET_MODE (memory
), addr
);
22758 set_mem_alias_set (legitimate_memory
, ALIAS_SET_MEMORY_BARRIER
);
22759 MEM_VOLATILE_P (legitimate_memory
) = MEM_VOLATILE_P (memory
);
22760 return legitimate_memory
;
22763 /* An instruction emitter. */
22764 typedef void (* emit_f
) (int label
, const char *, rtx
*);
22766 /* An instruction emitter that emits via the conventional
22767 output_asm_insn. */
22769 arm_emit (int label ATTRIBUTE_UNUSED
, const char *pattern
, rtx
*operands
)
22771 output_asm_insn (pattern
, operands
);
22774 /* Count the number of emitted synchronization instructions. */
22775 static unsigned arm_insn_count
;
22777 /* An emitter that counts emitted instructions but does not actually
22778 emit instruction into the the instruction stream. */
22780 arm_count (int label
,
22781 const char *pattern ATTRIBUTE_UNUSED
,
22782 rtx
*operands ATTRIBUTE_UNUSED
)
22788 /* Construct a pattern using conventional output formatting and feed
22789 it to output_asm_insn. Provides a mechanism to construct the
22790 output pattern on the fly. Note the hard limit on the pattern
22793 arm_output_asm_insn (emit_f emit
, int label
, rtx
*operands
,
22794 const char *pattern
, ...)
22799 va_start (ap
, pattern
);
22800 vsprintf (buffer
, pattern
, ap
);
22802 emit (label
, buffer
, operands
);
22805 /* Emit the memory barrier instruction, if any, provided by this
22806 target to a specified emitter. */
22808 arm_process_output_memory_barrier (emit_f emit
, rtx
*operands
)
22810 if (TARGET_HAVE_DMB
)
22812 /* Note we issue a system level barrier. We should consider
22813 issuing a inner shareabilty zone barrier here instead, ie.
22815 emit (0, "dmb\tsy", operands
);
22819 if (TARGET_HAVE_DMB_MCR
)
22821 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands
);
22825 gcc_unreachable ();
22828 /* Emit the memory barrier instruction, if any, provided by this
22831 arm_output_memory_barrier (rtx
*operands
)
22833 arm_process_output_memory_barrier (arm_emit
, operands
);
22837 /* Helper to figure out the instruction suffix required on ldrex/strex
22838 for operations on an object of the specified mode. */
22839 static const char *
22840 arm_ldrex_suffix (enum machine_mode mode
)
22844 case QImode
: return "b";
22845 case HImode
: return "h";
22846 case SImode
: return "";
22847 case DImode
: return "d";
22849 gcc_unreachable ();
22854 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
22857 arm_output_ldrex (emit_f emit
,
22858 enum machine_mode mode
,
22862 const char *suffix
= arm_ldrex_suffix (mode
);
22865 operands
[0] = target
;
22866 operands
[1] = memory
;
22867 arm_output_asm_insn (emit
, 0, operands
, "ldrex%s\t%%0, %%C1", suffix
);
22870 /* Emit a strex{b,h,d, } instruction appropriate for the specified
22873 arm_output_strex (emit_f emit
,
22874 enum machine_mode mode
,
22880 const char *suffix
= arm_ldrex_suffix (mode
);
22883 operands
[0] = result
;
22884 operands
[1] = value
;
22885 operands
[2] = memory
;
22886 arm_output_asm_insn (emit
, 0, operands
, "strex%s%s\t%%0, %%1, %%C2", suffix
,
22890 /* Helper to emit a two operand instruction. */
22892 arm_output_op2 (emit_f emit
, const char *mnemonic
, rtx d
, rtx s
)
22898 arm_output_asm_insn (emit
, 0, operands
, "%s\t%%0, %%1", mnemonic
);
22901 /* Helper to emit a three operand instruction. */
22903 arm_output_op3 (emit_f emit
, const char *mnemonic
, rtx d
, rtx a
, rtx b
)
22910 arm_output_asm_insn (emit
, 0, operands
, "%s\t%%0, %%1, %%2", mnemonic
);
22913 /* Emit a load store exclusive synchronization loop.
22917 if old_value != required_value
22919 t1 = sync_op (old_value, new_value)
22920 [mem] = t1, t2 = [0|1]
22924 t1 == t2 is not permitted
22925 t1 == old_value is permitted
22929 RTX register or const_int representing the required old_value for
22930 the modify to continue, if NULL no comparsion is performed. */
22932 arm_output_sync_loop (emit_f emit
,
22933 enum machine_mode mode
,
22936 rtx required_value
,
22940 enum attr_sync_op sync_op
,
22941 int early_barrier_required
)
22945 gcc_assert (t1
!= t2
);
22947 if (early_barrier_required
)
22948 arm_process_output_memory_barrier (emit
, NULL
);
22950 arm_output_asm_insn (emit
, 1, operands
, "%sLSYT%%=:", LOCAL_LABEL_PREFIX
);
22952 arm_output_ldrex (emit
, mode
, old_value
, memory
);
22954 if (required_value
)
22958 operands
[0] = old_value
;
22959 operands
[1] = required_value
;
22960 arm_output_asm_insn (emit
, 0, operands
, "cmp\t%%0, %%1");
22961 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX
);
22967 arm_output_op3 (emit
, "add", t1
, old_value
, new_value
);
22971 arm_output_op3 (emit
, "sub", t1
, old_value
, new_value
);
22975 arm_output_op3 (emit
, "orr", t1
, old_value
, new_value
);
22979 arm_output_op3 (emit
, "eor", t1
, old_value
, new_value
);
22983 arm_output_op3 (emit
,"and", t1
, old_value
, new_value
);
22987 arm_output_op3 (emit
, "and", t1
, old_value
, new_value
);
22988 arm_output_op2 (emit
, "mvn", t1
, t1
);
22996 arm_output_strex (emit
, mode
, "", t2
, t1
, memory
);
22998 arm_output_asm_insn (emit
, 0, operands
, "teq\t%%0, #0");
22999 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYT%%=", LOCAL_LABEL_PREFIX
);
23001 arm_process_output_memory_barrier (emit
, NULL
);
23002 arm_output_asm_insn (emit
, 1, operands
, "%sLSYB%%=:", LOCAL_LABEL_PREFIX
);
23006 arm_get_sync_operand (rtx
*operands
, int index
, rtx default_value
)
23009 default_value
= operands
[index
- 1];
23011 return default_value
;
23014 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23015 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23017 /* Extract the operands for a synchroniztion instruction from the
23018 instructions attributes and emit the instruction. */
23020 arm_process_output_sync_insn (emit_f emit
, rtx insn
, rtx
*operands
)
23022 rtx result
, memory
, required_value
, new_value
, t1
, t2
;
23024 enum machine_mode mode
;
23025 enum attr_sync_op sync_op
;
23027 result
= FETCH_SYNC_OPERAND(result
, 0);
23028 memory
= FETCH_SYNC_OPERAND(memory
, 0);
23029 required_value
= FETCH_SYNC_OPERAND(required_value
, 0);
23030 new_value
= FETCH_SYNC_OPERAND(new_value
, 0);
23031 t1
= FETCH_SYNC_OPERAND(t1
, 0);
23032 t2
= FETCH_SYNC_OPERAND(t2
, 0);
23034 get_attr_sync_release_barrier (insn
) == SYNC_RELEASE_BARRIER_YES
;
23035 sync_op
= get_attr_sync_op (insn
);
23036 mode
= GET_MODE (memory
);
23038 arm_output_sync_loop (emit
, mode
, result
, memory
, required_value
,
23039 new_value
, t1
, t2
, sync_op
, early_barrier
);
23042 /* Emit a synchronization instruction loop. */
23044 arm_output_sync_insn (rtx insn
, rtx
*operands
)
23046 arm_process_output_sync_insn (arm_emit
, insn
, operands
);
23050 /* Count the number of machine instruction that will be emitted for a
23051 synchronization instruction. Note that the emitter used does not
23052 emit instructions, it just counts instructions being carefull not
23053 to count labels. */
23055 arm_sync_loop_insns (rtx insn
, rtx
*operands
)
23057 arm_insn_count
= 0;
23058 arm_process_output_sync_insn (arm_count
, insn
, operands
);
23059 return arm_insn_count
;
23062 /* Helper to call a target sync instruction generator, dealing with
23063 the variation in operands required by the different generators. */
23065 arm_call_generator (struct arm_sync_generator
*generator
, rtx old_value
,
23066 rtx memory
, rtx required_value
, rtx new_value
)
23068 switch (generator
->op
)
23070 case arm_sync_generator_omn
:
23071 gcc_assert (! required_value
);
23072 return generator
->u
.omn (old_value
, memory
, new_value
);
23074 case arm_sync_generator_omrn
:
23075 gcc_assert (required_value
);
23076 return generator
->u
.omrn (old_value
, memory
, required_value
, new_value
);
23082 /* Expand a synchronization loop. The synchronization loop is expanded
23083 as an opaque block of instructions in order to ensure that we do
23084 not subsequently get extraneous memory accesses inserted within the
23085 critical region. The exclusive access property of ldrex/strex is
23086 only guaranteed in there are no intervening memory accesses. */
23088 arm_expand_sync (enum machine_mode mode
,
23089 struct arm_sync_generator
*generator
,
23090 rtx target
, rtx memory
, rtx required_value
, rtx new_value
)
23092 if (target
== NULL
)
23093 target
= gen_reg_rtx (mode
);
23095 memory
= arm_legitimize_sync_memory (memory
);
23096 if (mode
!= SImode
)
23098 rtx load_temp
= gen_reg_rtx (SImode
);
23100 if (required_value
)
23101 required_value
= convert_modes (SImode
, mode
, required_value
, true);
23103 new_value
= convert_modes (SImode
, mode
, new_value
, true);
23104 emit_insn (arm_call_generator (generator
, load_temp
, memory
,
23105 required_value
, new_value
));
23106 emit_move_insn (target
, gen_lowpart (mode
, load_temp
));
23110 emit_insn (arm_call_generator (generator
, target
, memory
, required_value
,
23115 #include "gt-arm.h"