1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
61 /* Forward definitions of types. */
62 typedef struct minipool_node Mnode
;
63 typedef struct minipool_fixup Mfix
;
65 void (*arm_lang_output_object_attributes_hook
)(void);
67 /* Forward function declarations. */
68 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
69 static int arm_compute_static_chain_stack_bytes (void);
70 static arm_stack_offsets
*arm_get_frame_offsets (void);
71 static void arm_add_gc_roots (void);
72 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
73 HOST_WIDE_INT
, rtx
, rtx
, int, int);
74 static unsigned bit_count (unsigned long);
75 static int arm_address_register_rtx_p (rtx
, int);
76 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
77 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
78 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
79 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
80 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
81 inline static int thumb1_index_register_rtx_p (rtx
, int);
82 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
83 static int thumb_far_jump_used_p (void);
84 static bool thumb_force_lr_save (void);
85 static int const_ok_for_op (HOST_WIDE_INT
, enum rtx_code
);
86 static rtx
emit_sfm (int, int);
87 static unsigned arm_size_return_regs (void);
88 static bool arm_assemble_integer (rtx
, unsigned int, int);
89 static void arm_print_operand (FILE *, rtx
, int);
90 static void arm_print_operand_address (FILE *, rtx
);
91 static bool arm_print_operand_punct_valid_p (unsigned char code
);
92 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
93 static arm_cc
get_arm_condition_code (rtx
);
94 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
95 static rtx
is_jump_table (rtx
);
96 static const char *output_multi_immediate (rtx
*, const char *, const char *,
98 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
99 static struct machine_function
*arm_init_machine_status (void);
100 static void thumb_exit (FILE *, int);
101 static rtx
is_jump_table (rtx
);
102 static HOST_WIDE_INT
get_jump_table_size (rtx
);
103 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
104 static Mnode
*add_minipool_forward_ref (Mfix
*);
105 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
106 static Mnode
*add_minipool_backward_ref (Mfix
*);
107 static void assign_minipool_offsets (Mfix
*);
108 static void arm_print_value (FILE *, rtx
);
109 static void dump_minipool (rtx
);
110 static int arm_barrier_cost (rtx
);
111 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
112 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
113 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
115 static void arm_reorg (void);
116 static bool note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
117 static unsigned long arm_compute_save_reg0_reg12_mask (void);
118 static unsigned long arm_compute_save_reg_mask (void);
119 static unsigned long arm_isr_value (tree
);
120 static unsigned long arm_compute_func_type (void);
121 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
122 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
123 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
124 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
125 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
127 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
128 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
129 static int arm_comp_type_attributes (const_tree
, const_tree
);
130 static void arm_set_default_type_attributes (tree
);
131 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
132 static int count_insns_for_constant (HOST_WIDE_INT
, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree
, tree
);
135 static enum machine_mode
arm_promote_function_mode (const_tree
,
136 enum machine_mode
, int *,
138 static bool arm_return_in_memory (const_tree
, const_tree
);
139 static rtx
arm_function_value (const_tree
, const_tree
, bool);
140 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
145 static bool arm_have_conditional_execution (void);
146 static bool arm_cannot_force_const_mem (enum machine_mode
, rtx
);
147 static bool arm_legitimate_constant_p (enum machine_mode
, rtx
);
148 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
149 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
150 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
151 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
152 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
153 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
154 static bool arm_rtx_costs (rtx
, int, int, int *, bool);
155 static int arm_address_cost (rtx
, bool);
156 static bool arm_memory_load_p (rtx
);
157 static bool arm_cirrus_insn_p (rtx
);
158 static void cirrus_reorg (rtx
);
159 static void arm_init_builtins (void);
160 static void arm_init_iwmmxt_builtins (void);
161 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
162 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
163 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
164 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
165 static tree
arm_builtin_decl (unsigned, bool);
166 static void emit_constant_insn (rtx cond
, rtx pattern
);
167 static rtx
emit_set_insn (rtx
, rtx
);
168 static int arm_arg_partial_bytes (cumulative_args_t
, enum machine_mode
,
170 static rtx
arm_function_arg (cumulative_args_t
, enum machine_mode
,
172 static void arm_function_arg_advance (cumulative_args_t
, enum machine_mode
,
174 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
175 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
177 static int aapcs_select_return_coproc (const_tree
, const_tree
);
179 #ifdef OBJECT_FORMAT_ELF
180 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
181 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
184 static void arm_encode_section_info (tree
, rtx
, int);
187 static void arm_file_end (void);
188 static void arm_file_start (void);
190 static void arm_setup_incoming_varargs (cumulative_args_t
, enum machine_mode
,
192 static bool arm_pass_by_reference (cumulative_args_t
,
193 enum machine_mode
, const_tree
, bool);
194 static bool arm_promote_prototypes (const_tree
);
195 static bool arm_default_short_enums (void);
196 static bool arm_align_anon_bitfield (void);
197 static bool arm_return_in_msb (const_tree
);
198 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
199 static bool arm_return_in_memory (const_tree
, const_tree
);
201 static void arm_unwind_emit (FILE *, rtx
);
202 static bool arm_output_ttype (rtx
);
203 static void arm_asm_emit_except_personality (rtx
);
204 static void arm_asm_init_sections (void);
206 static rtx
arm_dwarf_register_span (rtx
);
208 static tree
arm_cxx_guard_type (void);
209 static bool arm_cxx_guard_mask_bit (void);
210 static tree
arm_get_cookie_size (tree
);
211 static bool arm_cookie_has_size (void);
212 static bool arm_cxx_cdtor_returns_this (void);
213 static bool arm_cxx_key_method_may_be_inline (void);
214 static void arm_cxx_determine_class_data_visibility (tree
);
215 static bool arm_cxx_class_data_always_comdat (void);
216 static bool arm_cxx_use_aeabi_atexit (void);
217 static void arm_init_libfuncs (void);
218 static tree
arm_build_builtin_va_list (void);
219 static void arm_expand_builtin_va_start (tree
, rtx
);
220 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
221 static void arm_option_override (void);
222 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
223 static bool arm_cannot_copy_insn_p (rtx
);
224 static bool arm_tls_symbol_p (rtx x
);
225 static int arm_issue_rate (void);
226 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
227 static bool arm_output_addr_const_extra (FILE *, rtx
);
228 static bool arm_allocate_stack_slots_for_args (void);
229 static const char *arm_invalid_parameter_type (const_tree t
);
230 static const char *arm_invalid_return_type (const_tree t
);
231 static tree
arm_promoted_type (const_tree t
);
232 static tree
arm_convert_to_type (tree type
, tree expr
);
233 static bool arm_scalar_mode_supported_p (enum machine_mode
);
234 static bool arm_frame_pointer_required (void);
235 static bool arm_can_eliminate (const int, const int);
236 static void arm_asm_trampoline_template (FILE *);
237 static void arm_trampoline_init (rtx
, tree
, rtx
);
238 static rtx
arm_trampoline_adjust_address (rtx
);
239 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
240 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
241 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
242 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
243 static bool arm_array_mode_supported_p (enum machine_mode
,
244 unsigned HOST_WIDE_INT
);
245 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
246 static bool arm_class_likely_spilled_p (reg_class_t
);
247 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
248 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
252 static void arm_conditional_register_usage (void);
253 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
254 static unsigned int arm_autovectorize_vector_sizes (void);
255 static int arm_default_branch_cost (bool, bool);
256 static int arm_cortex_a5_branch_cost (bool, bool);
259 /* Table of machine attributes. */
260 static const struct attribute_spec arm_attribute_table
[] =
262 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
263 affects_type_identity } */
264 /* Function calls made to this symbol must be done indirectly, because
265 it may lie outside of the 26 bit addressing range of a normal function
267 { "long_call", 0, 0, false, true, true, NULL
, false },
268 /* Whereas these functions are always known to reside within the 26 bit
270 { "short_call", 0, 0, false, true, true, NULL
, false },
271 /* Specify the procedure call conventions for a function. */
272 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
,
274 /* Interrupt Service Routines have special prologue and epilogue requirements. */
275 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
,
277 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
,
279 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
282 /* ARM/PE has three new attributes:
284 dllexport - for exporting a function/variable that will live in a dll
285 dllimport - for importing a function/variable from a dll
287 Microsoft allows multiple declspecs in one __declspec, separating
288 them with spaces. We do NOT support this. Instead, use __declspec
291 { "dllimport", 0, 0, true, false, false, NULL
, false },
292 { "dllexport", 0, 0, true, false, false, NULL
, false },
293 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
,
295 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
296 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
, false },
297 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
, false },
298 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
,
301 { NULL
, 0, 0, false, false, false, NULL
, false }
304 /* Initialize the GCC target structure. */
305 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
306 #undef TARGET_MERGE_DECL_ATTRIBUTES
307 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
310 #undef TARGET_LEGITIMIZE_ADDRESS
311 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
313 #undef TARGET_ATTRIBUTE_TABLE
314 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
316 #undef TARGET_ASM_FILE_START
317 #define TARGET_ASM_FILE_START arm_file_start
318 #undef TARGET_ASM_FILE_END
319 #define TARGET_ASM_FILE_END arm_file_end
321 #undef TARGET_ASM_ALIGNED_SI_OP
322 #define TARGET_ASM_ALIGNED_SI_OP NULL
323 #undef TARGET_ASM_INTEGER
324 #define TARGET_ASM_INTEGER arm_assemble_integer
326 #undef TARGET_PRINT_OPERAND
327 #define TARGET_PRINT_OPERAND arm_print_operand
328 #undef TARGET_PRINT_OPERAND_ADDRESS
329 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
330 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
331 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
333 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
334 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
336 #undef TARGET_ASM_FUNCTION_PROLOGUE
337 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
339 #undef TARGET_ASM_FUNCTION_EPILOGUE
340 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
342 #undef TARGET_OPTION_OVERRIDE
343 #define TARGET_OPTION_OVERRIDE arm_option_override
345 #undef TARGET_COMP_TYPE_ATTRIBUTES
346 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
348 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
349 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
351 #undef TARGET_SCHED_ADJUST_COST
352 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
354 #undef TARGET_ENCODE_SECTION_INFO
356 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
358 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
361 #undef TARGET_STRIP_NAME_ENCODING
362 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
364 #undef TARGET_ASM_INTERNAL_LABEL
365 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
367 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
368 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
370 #undef TARGET_FUNCTION_VALUE
371 #define TARGET_FUNCTION_VALUE arm_function_value
373 #undef TARGET_LIBCALL_VALUE
374 #define TARGET_LIBCALL_VALUE arm_libcall_value
376 #undef TARGET_ASM_OUTPUT_MI_THUNK
377 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
378 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
379 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
381 #undef TARGET_RTX_COSTS
382 #define TARGET_RTX_COSTS arm_rtx_costs
383 #undef TARGET_ADDRESS_COST
384 #define TARGET_ADDRESS_COST arm_address_cost
386 #undef TARGET_SHIFT_TRUNCATION_MASK
387 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
388 #undef TARGET_VECTOR_MODE_SUPPORTED_P
389 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
390 #undef TARGET_ARRAY_MODE_SUPPORTED_P
391 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
392 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
393 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
394 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
395 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
396 arm_autovectorize_vector_sizes
398 #undef TARGET_MACHINE_DEPENDENT_REORG
399 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
401 #undef TARGET_INIT_BUILTINS
402 #define TARGET_INIT_BUILTINS arm_init_builtins
403 #undef TARGET_EXPAND_BUILTIN
404 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
405 #undef TARGET_BUILTIN_DECL
406 #define TARGET_BUILTIN_DECL arm_builtin_decl
408 #undef TARGET_INIT_LIBFUNCS
409 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
411 #undef TARGET_PROMOTE_FUNCTION_MODE
412 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
413 #undef TARGET_PROMOTE_PROTOTYPES
414 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
415 #undef TARGET_PASS_BY_REFERENCE
416 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
417 #undef TARGET_ARG_PARTIAL_BYTES
418 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
419 #undef TARGET_FUNCTION_ARG
420 #define TARGET_FUNCTION_ARG arm_function_arg
421 #undef TARGET_FUNCTION_ARG_ADVANCE
422 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
423 #undef TARGET_FUNCTION_ARG_BOUNDARY
424 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
429 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
430 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
432 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
433 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
434 #undef TARGET_TRAMPOLINE_INIT
435 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
436 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
437 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
439 #undef TARGET_DEFAULT_SHORT_ENUMS
440 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
442 #undef TARGET_ALIGN_ANON_BITFIELD
443 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
445 #undef TARGET_NARROW_VOLATILE_BITFIELD
446 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
448 #undef TARGET_CXX_GUARD_TYPE
449 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
451 #undef TARGET_CXX_GUARD_MASK_BIT
452 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
454 #undef TARGET_CXX_GET_COOKIE_SIZE
455 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
457 #undef TARGET_CXX_COOKIE_HAS_SIZE
458 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
460 #undef TARGET_CXX_CDTOR_RETURNS_THIS
461 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
463 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
464 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
466 #undef TARGET_CXX_USE_AEABI_ATEXIT
467 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
469 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
470 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
471 arm_cxx_determine_class_data_visibility
473 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
474 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
476 #undef TARGET_RETURN_IN_MSB
477 #define TARGET_RETURN_IN_MSB arm_return_in_msb
479 #undef TARGET_RETURN_IN_MEMORY
480 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
482 #undef TARGET_MUST_PASS_IN_STACK
483 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
486 #undef TARGET_ASM_UNWIND_EMIT
487 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
489 /* EABI unwinding tables use a different format for the typeinfo tables. */
490 #undef TARGET_ASM_TTYPE
491 #define TARGET_ASM_TTYPE arm_output_ttype
493 #undef TARGET_ARM_EABI_UNWINDER
494 #define TARGET_ARM_EABI_UNWINDER true
496 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
497 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
499 #undef TARGET_ASM_INIT_SECTIONS
500 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
501 #endif /* ARM_UNWIND_INFO */
503 #undef TARGET_DWARF_REGISTER_SPAN
504 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
506 #undef TARGET_CANNOT_COPY_INSN_P
507 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
510 #undef TARGET_HAVE_TLS
511 #define TARGET_HAVE_TLS true
514 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
515 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
517 #undef TARGET_LEGITIMATE_CONSTANT_P
518 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
520 #undef TARGET_CANNOT_FORCE_CONST_MEM
521 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
523 #undef TARGET_MAX_ANCHOR_OFFSET
524 #define TARGET_MAX_ANCHOR_OFFSET 4095
526 /* The minimum is set such that the total size of the block
527 for a particular anchor is -4088 + 1 + 4095 bytes, which is
528 divisible by eight, ensuring natural spacing of anchors. */
529 #undef TARGET_MIN_ANCHOR_OFFSET
530 #define TARGET_MIN_ANCHOR_OFFSET -4088
532 #undef TARGET_SCHED_ISSUE_RATE
533 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
535 #undef TARGET_MANGLE_TYPE
536 #define TARGET_MANGLE_TYPE arm_mangle_type
538 #undef TARGET_BUILD_BUILTIN_VA_LIST
539 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
540 #undef TARGET_EXPAND_BUILTIN_VA_START
541 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
542 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
543 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
546 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
547 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
550 #undef TARGET_LEGITIMATE_ADDRESS_P
551 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
553 #undef TARGET_INVALID_PARAMETER_TYPE
554 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
556 #undef TARGET_INVALID_RETURN_TYPE
557 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
559 #undef TARGET_PROMOTED_TYPE
560 #define TARGET_PROMOTED_TYPE arm_promoted_type
562 #undef TARGET_CONVERT_TO_TYPE
563 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
565 #undef TARGET_SCALAR_MODE_SUPPORTED_P
566 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
568 #undef TARGET_FRAME_POINTER_REQUIRED
569 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
571 #undef TARGET_CAN_ELIMINATE
572 #define TARGET_CAN_ELIMINATE arm_can_eliminate
574 #undef TARGET_CONDITIONAL_REGISTER_USAGE
575 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
577 #undef TARGET_CLASS_LIKELY_SPILLED_P
578 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
580 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
581 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
582 arm_vector_alignment_reachable
584 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
585 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
586 arm_builtin_support_vector_misalignment
588 #undef TARGET_PREFERRED_RENAME_CLASS
589 #define TARGET_PREFERRED_RENAME_CLASS \
590 arm_preferred_rename_class
592 struct gcc_target targetm
= TARGET_INITIALIZER
;
594 /* Obstack for minipool constant handling. */
595 static struct obstack minipool_obstack
;
596 static char * minipool_startobj
;
598 /* The maximum number of insns skipped which
599 will be conditionalised if possible. */
600 static int max_insns_skipped
= 5;
602 extern FILE * asm_out_file
;
604 /* True if we are currently building a constant table. */
605 int making_const_table
;
607 /* The processor for which instructions should be scheduled. */
608 enum processor_type arm_tune
= arm_none
;
610 /* The current tuning set. */
611 const struct tune_params
*current_tune
;
613 /* Which floating point hardware to schedule for. */
616 /* Which floating popint hardware to use. */
617 const struct arm_fpu_desc
*arm_fpu_desc
;
619 /* Used for Thumb call_via trampolines. */
620 rtx thumb_call_via_label
[14];
621 static int thumb_call_reg_needed
;
623 /* Bit values used to identify processor capabilities. */
624 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
625 #define FL_ARCH3M (1 << 1) /* Extended multiply */
626 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
627 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
628 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
629 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
630 #define FL_THUMB (1 << 6) /* Thumb aware */
631 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
632 #define FL_STRONG (1 << 8) /* StrongARM */
633 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
634 #define FL_XSCALE (1 << 10) /* XScale */
635 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
636 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
637 media instructions. */
638 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
639 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
640 Note: ARM6 & 7 derivatives only. */
641 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
642 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
643 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
645 #define FL_THUMB_DIV (1 << 18) /* Hardware divide (Thumb mode). */
646 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
647 #define FL_NEON (1 << 20) /* Neon instructions. */
648 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
650 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
651 #define FL_ARM_DIV (1 << 23) /* Hardware divide (ARM mode). */
653 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
655 /* Flags that only effect tuning, not available instructions. */
656 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
659 #define FL_FOR_ARCH2 FL_NOTM
660 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
661 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
662 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
663 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
664 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
665 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
666 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
667 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
668 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
669 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
670 #define FL_FOR_ARCH6J FL_FOR_ARCH6
671 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
672 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
673 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
674 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
675 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
676 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
677 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
678 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_THUMB_DIV)
679 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_THUMB_DIV)
680 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
682 /* The bits in this mask specify which
683 instructions we are allowed to generate. */
684 static unsigned long insn_flags
= 0;
686 /* The bits in this mask specify which instruction scheduling options should
688 static unsigned long tune_flags
= 0;
690 /* The following are used in the arm.md file as equivalents to bits
691 in the above two flag variables. */
693 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
696 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
699 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
702 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
705 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
708 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
711 /* Nonzero if this chip supports the ARM 6K extensions. */
714 /* Nonzero if this chip supports the ARM 7 extensions. */
717 /* Nonzero if instructions not present in the 'M' profile can be used. */
718 int arm_arch_notm
= 0;
720 /* Nonzero if instructions present in ARMv7E-M can be used. */
723 /* Nonzero if this chip can benefit from load scheduling. */
724 int arm_ld_sched
= 0;
726 /* Nonzero if this chip is a StrongARM. */
727 int arm_tune_strongarm
= 0;
729 /* Nonzero if this chip is a Cirrus variant. */
730 int arm_arch_cirrus
= 0;
732 /* Nonzero if this chip supports Intel Wireless MMX technology. */
733 int arm_arch_iwmmxt
= 0;
735 /* Nonzero if this chip is an XScale. */
736 int arm_arch_xscale
= 0;
738 /* Nonzero if tuning for XScale */
739 int arm_tune_xscale
= 0;
741 /* Nonzero if we want to tune for stores that access the write-buffer.
742 This typically means an ARM6 or ARM7 with MMU or MPU. */
743 int arm_tune_wbuf
= 0;
745 /* Nonzero if tuning for Cortex-A9. */
746 int arm_tune_cortex_a9
= 0;
748 /* Nonzero if generating Thumb instructions. */
751 /* Nonzero if generating Thumb-1 instructions. */
754 /* Nonzero if we should define __THUMB_INTERWORK__ in the
756 XXX This is a bit of a hack, it's intended to help work around
757 problems in GLD which doesn't understand that armv5t code is
758 interworking clean. */
759 int arm_cpp_interwork
= 0;
761 /* Nonzero if chip supports Thumb 2. */
764 /* Nonzero if chip supports integer division instruction. */
765 int arm_arch_arm_hwdiv
;
766 int arm_arch_thumb_hwdiv
;
768 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
769 we must report the mode of the memory reference from
770 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
771 enum machine_mode output_memory_reference_mode
;
773 /* The register number to be used for the PIC offset register. */
774 unsigned arm_pic_register
= INVALID_REGNUM
;
776 /* Set to 1 after arm_reorg has started. Reset to start at the start of
777 the next function. */
778 static int after_arm_reorg
= 0;
780 enum arm_pcs arm_pcs_default
;
782 /* For an explanation of these variables, see final_prescan_insn below. */
784 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
785 enum arm_cond_code arm_current_cc
;
788 int arm_target_label
;
789 /* The number of conditionally executed insns, including the current insn. */
790 int arm_condexec_count
= 0;
791 /* A bitmask specifying the patterns for the IT block.
792 Zero means do not output an IT block before this insn. */
793 int arm_condexec_mask
= 0;
794 /* The number of bits used in arm_condexec_mask. */
795 int arm_condexec_masklen
= 0;
797 /* The condition codes of the ARM, and the inverse function. */
798 static const char * const arm_condition_codes
[] =
800 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
801 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
804 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
805 int arm_regs_in_sequence
[] =
807 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
810 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
811 #define streq(string1, string2) (strcmp (string1, string2) == 0)
813 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
814 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
815 | (1 << PIC_OFFSET_TABLE_REGNUM)))
817 /* Initialization code. */
821 const char *const name
;
822 enum processor_type core
;
824 const unsigned long flags
;
825 const struct tune_params
*const tune
;
829 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
830 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
835 const struct tune_params arm_slowmul_tune
=
837 arm_slowmul_rtx_costs
,
839 3, /* Constant limit. */
840 5, /* Max cond insns. */
841 ARM_PREFETCH_NOT_BENEFICIAL
,
842 true, /* Prefer constant pool. */
843 arm_default_branch_cost
846 const struct tune_params arm_fastmul_tune
=
848 arm_fastmul_rtx_costs
,
850 1, /* Constant limit. */
851 5, /* Max cond insns. */
852 ARM_PREFETCH_NOT_BENEFICIAL
,
853 true, /* Prefer constant pool. */
854 arm_default_branch_cost
857 /* StrongARM has early execution of branches, so a sequence that is worth
858 skipping is shorter. Set max_insns_skipped to a lower value. */
860 const struct tune_params arm_strongarm_tune
=
862 arm_fastmul_rtx_costs
,
864 1, /* Constant limit. */
865 3, /* Max cond insns. */
866 ARM_PREFETCH_NOT_BENEFICIAL
,
867 true, /* Prefer constant pool. */
868 arm_default_branch_cost
871 const struct tune_params arm_xscale_tune
=
873 arm_xscale_rtx_costs
,
874 xscale_sched_adjust_cost
,
875 2, /* Constant limit. */
876 3, /* Max cond insns. */
877 ARM_PREFETCH_NOT_BENEFICIAL
,
878 true, /* Prefer constant pool. */
879 arm_default_branch_cost
882 const struct tune_params arm_9e_tune
=
886 1, /* Constant limit. */
887 5, /* Max cond insns. */
888 ARM_PREFETCH_NOT_BENEFICIAL
,
889 true, /* Prefer constant pool. */
890 arm_default_branch_cost
893 const struct tune_params arm_v6t2_tune
=
897 1, /* Constant limit. */
898 5, /* Max cond insns. */
899 ARM_PREFETCH_NOT_BENEFICIAL
,
900 false, /* Prefer constant pool. */
901 arm_default_branch_cost
904 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
905 const struct tune_params arm_cortex_tune
=
909 1, /* Constant limit. */
910 5, /* Max cond insns. */
911 ARM_PREFETCH_NOT_BENEFICIAL
,
912 false, /* Prefer constant pool. */
913 arm_default_branch_cost
916 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
917 less appealing. Set max_insns_skipped to a low value. */
919 const struct tune_params arm_cortex_a5_tune
=
923 1, /* Constant limit. */
924 1, /* Max cond insns. */
925 ARM_PREFETCH_NOT_BENEFICIAL
,
926 false, /* Prefer constant pool. */
927 arm_cortex_a5_branch_cost
930 const struct tune_params arm_cortex_a9_tune
=
933 cortex_a9_sched_adjust_cost
,
934 1, /* Constant limit. */
935 5, /* Max cond insns. */
936 ARM_PREFETCH_BENEFICIAL(4,32,32),
937 false, /* Prefer constant pool. */
938 arm_default_branch_cost
941 const struct tune_params arm_fa726te_tune
=
944 fa726te_sched_adjust_cost
,
945 1, /* Constant limit. */
946 5, /* Max cond insns. */
947 ARM_PREFETCH_NOT_BENEFICIAL
,
948 true, /* Prefer constant pool. */
949 arm_default_branch_cost
953 /* Not all of these give usefully different compilation alternatives,
954 but there is no simple way of generalizing them. */
955 static const struct processors all_cores
[] =
958 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
959 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
960 #include "arm-cores.def"
962 {NULL
, arm_none
, NULL
, 0, NULL
}
965 static const struct processors all_architectures
[] =
967 /* ARM Architectures */
968 /* We don't specify tuning costs here as it will be figured out
971 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
972 {NAME, CORE, #ARCH, FLAGS, NULL},
973 #include "arm-arches.def"
975 {NULL
, arm_none
, NULL
, 0 , NULL
}
979 /* These are populated as commandline arguments are processed, or NULL
981 static const struct processors
*arm_selected_arch
;
982 static const struct processors
*arm_selected_cpu
;
983 static const struct processors
*arm_selected_tune
;
985 /* The name of the preprocessor macro to define for this architecture. */
987 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
989 /* Available values for -mfpu=. */
991 static const struct arm_fpu_desc all_fpus
[] =
993 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16) \
994 { NAME, MODEL, REV, VFP_REGS, NEON, FP16 },
995 #include "arm-fpus.def"
1000 /* Supported TLS relocations. */
1008 TLS_DESCSEQ
/* GNU scheme */
1011 /* The maximum number of insns to be used when loading a constant. */
1013 arm_constant_limit (bool size_p
)
1015 return size_p
? 1 : current_tune
->constant_limit
;
1018 /* Emit an insn that's a simple single-set. Both the operands must be known
1021 emit_set_insn (rtx x
, rtx y
)
1023 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1026 /* Return the number of bits set in VALUE. */
1028 bit_count (unsigned long value
)
1030 unsigned long count
= 0;
1035 value
&= value
- 1; /* Clear the least-significant set bit. */
1043 enum machine_mode mode
;
1045 } arm_fixed_mode_set
;
1047 /* A small helper for setting fixed-point library libfuncs. */
1050 arm_set_fixed_optab_libfunc (optab optable
, enum machine_mode mode
,
1051 const char *funcname
, const char *modename
,
1056 if (num_suffix
== 0)
1057 sprintf (buffer
, "__gnu_%s%s", funcname
, modename
);
1059 sprintf (buffer
, "__gnu_%s%s%d", funcname
, modename
, num_suffix
);
1061 set_optab_libfunc (optable
, mode
, buffer
);
1065 arm_set_fixed_conv_libfunc (convert_optab optable
, enum machine_mode to
,
1066 enum machine_mode from
, const char *funcname
,
1067 const char *toname
, const char *fromname
)
1070 char *maybe_suffix_2
= "";
1072 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
1073 if (ALL_FIXED_POINT_MODE_P (from
) && ALL_FIXED_POINT_MODE_P (to
)
1074 && UNSIGNED_FIXED_POINT_MODE_P (from
) == UNSIGNED_FIXED_POINT_MODE_P (to
)
1075 && ALL_FRACT_MODE_P (from
) == ALL_FRACT_MODE_P (to
))
1076 maybe_suffix_2
= "2";
1078 sprintf (buffer
, "__gnu_%s%s%s%s", funcname
, fromname
, toname
,
1081 set_conv_libfunc (optable
, to
, from
, buffer
);
1084 /* Set up library functions unique to ARM. */
1087 arm_init_libfuncs (void)
1089 /* There are no special library functions unless we are using the
1094 /* The functions below are described in Section 4 of the "Run-Time
1095 ABI for the ARM architecture", Version 1.0. */
1097 /* Double-precision floating-point arithmetic. Table 2. */
1098 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1099 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1100 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1101 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1102 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1104 /* Double-precision comparisons. Table 3. */
1105 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1106 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1107 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1108 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1109 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1110 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1111 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1113 /* Single-precision floating-point arithmetic. Table 4. */
1114 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1115 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1116 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1117 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1118 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1120 /* Single-precision comparisons. Table 5. */
1121 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1122 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1123 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1124 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1125 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1126 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1127 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1129 /* Floating-point to integer conversions. Table 6. */
1130 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1131 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1132 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1133 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1134 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1135 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1136 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1137 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1139 /* Conversions between floating types. Table 7. */
1140 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1141 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1143 /* Integer to floating-point conversions. Table 8. */
1144 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1145 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1146 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1147 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1148 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1149 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1150 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1151 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1153 /* Long long. Table 9. */
1154 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1155 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1156 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1157 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1158 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1159 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1160 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1161 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1163 /* Integer (32/32->32) division. \S 4.3.1. */
1164 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1165 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1167 /* The divmod functions are designed so that they can be used for
1168 plain division, even though they return both the quotient and the
1169 remainder. The quotient is returned in the usual location (i.e.,
1170 r0 for SImode, {r0, r1} for DImode), just as would be expected
1171 for an ordinary division routine. Because the AAPCS calling
1172 conventions specify that all of { r0, r1, r2, r3 } are
1173 callee-saved registers, there is no need to tell the compiler
1174 explicitly that those registers are clobbered by these
1176 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1177 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1179 /* For SImode division the ABI provides div-without-mod routines,
1180 which are faster. */
1181 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1182 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1184 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1185 divmod libcalls instead. */
1186 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1187 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1188 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1189 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1191 /* Half-precision float operations. The compiler handles all operations
1192 with NULL libfuncs by converting the SFmode. */
1193 switch (arm_fp16_format
)
1195 case ARM_FP16_FORMAT_IEEE
:
1196 case ARM_FP16_FORMAT_ALTERNATIVE
:
1199 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1200 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1202 : "__gnu_f2h_alternative"));
1203 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
1204 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1206 : "__gnu_h2f_alternative"));
1209 set_optab_libfunc (add_optab
, HFmode
, NULL
);
1210 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
1211 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
1212 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
1213 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
1216 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
1217 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
1218 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
1219 set_optab_libfunc (le_optab
, HFmode
, NULL
);
1220 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
1221 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
1222 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
1229 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
1231 const arm_fixed_mode_set fixed_arith_modes
[] =
1252 const arm_fixed_mode_set fixed_conv_modes
[] =
1282 for (i
= 0; i
< ARRAY_SIZE (fixed_arith_modes
); i
++)
1284 arm_set_fixed_optab_libfunc (add_optab
, fixed_arith_modes
[i
].mode
,
1285 "add", fixed_arith_modes
[i
].name
, 3);
1286 arm_set_fixed_optab_libfunc (ssadd_optab
, fixed_arith_modes
[i
].mode
,
1287 "ssadd", fixed_arith_modes
[i
].name
, 3);
1288 arm_set_fixed_optab_libfunc (usadd_optab
, fixed_arith_modes
[i
].mode
,
1289 "usadd", fixed_arith_modes
[i
].name
, 3);
1290 arm_set_fixed_optab_libfunc (sub_optab
, fixed_arith_modes
[i
].mode
,
1291 "sub", fixed_arith_modes
[i
].name
, 3);
1292 arm_set_fixed_optab_libfunc (sssub_optab
, fixed_arith_modes
[i
].mode
,
1293 "sssub", fixed_arith_modes
[i
].name
, 3);
1294 arm_set_fixed_optab_libfunc (ussub_optab
, fixed_arith_modes
[i
].mode
,
1295 "ussub", fixed_arith_modes
[i
].name
, 3);
1296 arm_set_fixed_optab_libfunc (smul_optab
, fixed_arith_modes
[i
].mode
,
1297 "mul", fixed_arith_modes
[i
].name
, 3);
1298 arm_set_fixed_optab_libfunc (ssmul_optab
, fixed_arith_modes
[i
].mode
,
1299 "ssmul", fixed_arith_modes
[i
].name
, 3);
1300 arm_set_fixed_optab_libfunc (usmul_optab
, fixed_arith_modes
[i
].mode
,
1301 "usmul", fixed_arith_modes
[i
].name
, 3);
1302 arm_set_fixed_optab_libfunc (sdiv_optab
, fixed_arith_modes
[i
].mode
,
1303 "div", fixed_arith_modes
[i
].name
, 3);
1304 arm_set_fixed_optab_libfunc (udiv_optab
, fixed_arith_modes
[i
].mode
,
1305 "udiv", fixed_arith_modes
[i
].name
, 3);
1306 arm_set_fixed_optab_libfunc (ssdiv_optab
, fixed_arith_modes
[i
].mode
,
1307 "ssdiv", fixed_arith_modes
[i
].name
, 3);
1308 arm_set_fixed_optab_libfunc (usdiv_optab
, fixed_arith_modes
[i
].mode
,
1309 "usdiv", fixed_arith_modes
[i
].name
, 3);
1310 arm_set_fixed_optab_libfunc (neg_optab
, fixed_arith_modes
[i
].mode
,
1311 "neg", fixed_arith_modes
[i
].name
, 2);
1312 arm_set_fixed_optab_libfunc (ssneg_optab
, fixed_arith_modes
[i
].mode
,
1313 "ssneg", fixed_arith_modes
[i
].name
, 2);
1314 arm_set_fixed_optab_libfunc (usneg_optab
, fixed_arith_modes
[i
].mode
,
1315 "usneg", fixed_arith_modes
[i
].name
, 2);
1316 arm_set_fixed_optab_libfunc (ashl_optab
, fixed_arith_modes
[i
].mode
,
1317 "ashl", fixed_arith_modes
[i
].name
, 3);
1318 arm_set_fixed_optab_libfunc (ashr_optab
, fixed_arith_modes
[i
].mode
,
1319 "ashr", fixed_arith_modes
[i
].name
, 3);
1320 arm_set_fixed_optab_libfunc (lshr_optab
, fixed_arith_modes
[i
].mode
,
1321 "lshr", fixed_arith_modes
[i
].name
, 3);
1322 arm_set_fixed_optab_libfunc (ssashl_optab
, fixed_arith_modes
[i
].mode
,
1323 "ssashl", fixed_arith_modes
[i
].name
, 3);
1324 arm_set_fixed_optab_libfunc (usashl_optab
, fixed_arith_modes
[i
].mode
,
1325 "usashl", fixed_arith_modes
[i
].name
, 3);
1326 arm_set_fixed_optab_libfunc (cmp_optab
, fixed_arith_modes
[i
].mode
,
1327 "cmp", fixed_arith_modes
[i
].name
, 2);
1330 for (i
= 0; i
< ARRAY_SIZE (fixed_conv_modes
); i
++)
1331 for (j
= 0; j
< ARRAY_SIZE (fixed_conv_modes
); j
++)
1334 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[i
].mode
)
1335 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes
[j
].mode
)))
1338 arm_set_fixed_conv_libfunc (fract_optab
, fixed_conv_modes
[i
].mode
,
1339 fixed_conv_modes
[j
].mode
, "fract",
1340 fixed_conv_modes
[i
].name
,
1341 fixed_conv_modes
[j
].name
);
1342 arm_set_fixed_conv_libfunc (satfract_optab
,
1343 fixed_conv_modes
[i
].mode
,
1344 fixed_conv_modes
[j
].mode
, "satfract",
1345 fixed_conv_modes
[i
].name
,
1346 fixed_conv_modes
[j
].name
);
1347 arm_set_fixed_conv_libfunc (fractuns_optab
,
1348 fixed_conv_modes
[i
].mode
,
1349 fixed_conv_modes
[j
].mode
, "fractuns",
1350 fixed_conv_modes
[i
].name
,
1351 fixed_conv_modes
[j
].name
);
1352 arm_set_fixed_conv_libfunc (satfractuns_optab
,
1353 fixed_conv_modes
[i
].mode
,
1354 fixed_conv_modes
[j
].mode
, "satfractuns",
1355 fixed_conv_modes
[i
].name
,
1356 fixed_conv_modes
[j
].name
);
1360 if (TARGET_AAPCS_BASED
)
1361 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
1364 /* On AAPCS systems, this is the "struct __va_list". */
1365 static GTY(()) tree va_list_type
;
1367 /* Return the type to use as __builtin_va_list. */
1369 arm_build_builtin_va_list (void)
1374 if (!TARGET_AAPCS_BASED
)
1375 return std_build_builtin_va_list ();
1377 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1385 The C Library ABI further reinforces this definition in \S
1388 We must follow this definition exactly. The structure tag
1389 name is visible in C++ mangled names, and thus forms a part
1390 of the ABI. The field name may be used by people who
1391 #include <stdarg.h>. */
1392 /* Create the type. */
1393 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1394 /* Give it the required name. */
1395 va_list_name
= build_decl (BUILTINS_LOCATION
,
1397 get_identifier ("__va_list"),
1399 DECL_ARTIFICIAL (va_list_name
) = 1;
1400 TYPE_NAME (va_list_type
) = va_list_name
;
1401 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
1402 /* Create the __ap field. */
1403 ap_field
= build_decl (BUILTINS_LOCATION
,
1405 get_identifier ("__ap"),
1407 DECL_ARTIFICIAL (ap_field
) = 1;
1408 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
1409 TYPE_FIELDS (va_list_type
) = ap_field
;
1410 /* Compute its layout. */
1411 layout_type (va_list_type
);
1413 return va_list_type
;
1416 /* Return an expression of type "void *" pointing to the next
1417 available argument in a variable-argument list. VALIST is the
1418 user-level va_list object, of type __builtin_va_list. */
1420 arm_extract_valist_ptr (tree valist
)
1422 if (TREE_TYPE (valist
) == error_mark_node
)
1423 return error_mark_node
;
1425 /* On an AAPCS target, the pointer is stored within "struct
1427 if (TARGET_AAPCS_BASED
)
1429 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
1430 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
1431 valist
, ap_field
, NULL_TREE
);
1437 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1439 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
1441 valist
= arm_extract_valist_ptr (valist
);
1442 std_expand_builtin_va_start (valist
, nextarg
);
1445 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1447 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
1450 valist
= arm_extract_valist_ptr (valist
);
1451 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
1454 /* Fix up any incompatible options that the user has specified. */
1456 arm_option_override (void)
1458 if (global_options_set
.x_arm_arch_option
)
1459 arm_selected_arch
= &all_architectures
[arm_arch_option
];
1461 if (global_options_set
.x_arm_cpu_option
)
1462 arm_selected_cpu
= &all_cores
[(int) arm_cpu_option
];
1464 if (global_options_set
.x_arm_tune_option
)
1465 arm_selected_tune
= &all_cores
[(int) arm_tune_option
];
1467 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1468 SUBTARGET_OVERRIDE_OPTIONS
;
1471 if (arm_selected_arch
)
1473 if (arm_selected_cpu
)
1475 /* Check for conflict between mcpu and march. */
1476 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
1478 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1479 arm_selected_cpu
->name
, arm_selected_arch
->name
);
1480 /* -march wins for code generation.
1481 -mcpu wins for default tuning. */
1482 if (!arm_selected_tune
)
1483 arm_selected_tune
= arm_selected_cpu
;
1485 arm_selected_cpu
= arm_selected_arch
;
1489 arm_selected_arch
= NULL
;
1492 /* Pick a CPU based on the architecture. */
1493 arm_selected_cpu
= arm_selected_arch
;
1496 /* If the user did not specify a processor, choose one for them. */
1497 if (!arm_selected_cpu
)
1499 const struct processors
* sel
;
1500 unsigned int sought
;
1502 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
1503 if (!arm_selected_cpu
->name
)
1505 #ifdef SUBTARGET_CPU_DEFAULT
1506 /* Use the subtarget default CPU if none was specified by
1508 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
1510 /* Default to ARM6. */
1511 if (!arm_selected_cpu
->name
)
1512 arm_selected_cpu
= &all_cores
[arm6
];
1515 sel
= arm_selected_cpu
;
1516 insn_flags
= sel
->flags
;
1518 /* Now check to see if the user has specified some command line
1519 switch that require certain abilities from the cpu. */
1522 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1524 sought
|= (FL_THUMB
| FL_MODE32
);
1526 /* There are no ARM processors that support both APCS-26 and
1527 interworking. Therefore we force FL_MODE26 to be removed
1528 from insn_flags here (if it was set), so that the search
1529 below will always be able to find a compatible processor. */
1530 insn_flags
&= ~FL_MODE26
;
1533 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1535 /* Try to locate a CPU type that supports all of the abilities
1536 of the default CPU, plus the extra abilities requested by
1538 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1539 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1542 if (sel
->name
== NULL
)
1544 unsigned current_bit_count
= 0;
1545 const struct processors
* best_fit
= NULL
;
1547 /* Ideally we would like to issue an error message here
1548 saying that it was not possible to find a CPU compatible
1549 with the default CPU, but which also supports the command
1550 line options specified by the programmer, and so they
1551 ought to use the -mcpu=<name> command line option to
1552 override the default CPU type.
1554 If we cannot find a cpu that has both the
1555 characteristics of the default cpu and the given
1556 command line options we scan the array again looking
1557 for a best match. */
1558 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1559 if ((sel
->flags
& sought
) == sought
)
1563 count
= bit_count (sel
->flags
& insn_flags
);
1565 if (count
>= current_bit_count
)
1568 current_bit_count
= count
;
1572 gcc_assert (best_fit
);
1576 arm_selected_cpu
= sel
;
1580 gcc_assert (arm_selected_cpu
);
1581 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1582 if (!arm_selected_tune
)
1583 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
1585 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
1586 insn_flags
= arm_selected_cpu
->flags
;
1588 arm_tune
= arm_selected_tune
->core
;
1589 tune_flags
= arm_selected_tune
->flags
;
1590 current_tune
= arm_selected_tune
->tune
;
1592 /* Make sure that the processor choice does not conflict with any of the
1593 other command line choices. */
1594 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
1595 error ("target CPU does not support ARM mode");
1597 /* BPABI targets use linker tricks to allow interworking on cores
1598 without thumb support. */
1599 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
1601 warning (0, "target CPU does not support interworking" );
1602 target_flags
&= ~MASK_INTERWORK
;
1605 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
1607 warning (0, "target CPU does not support THUMB instructions");
1608 target_flags
&= ~MASK_THUMB
;
1611 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
1613 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1614 target_flags
&= ~MASK_APCS_FRAME
;
1617 /* Callee super interworking implies thumb interworking. Adding
1618 this to the flags here simplifies the logic elsewhere. */
1619 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
1620 target_flags
|= MASK_INTERWORK
;
1622 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1623 from here where no function is being compiled currently. */
1624 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
1625 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1627 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
1628 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1630 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
1632 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1633 target_flags
|= MASK_APCS_FRAME
;
1636 if (TARGET_POKE_FUNCTION_NAME
)
1637 target_flags
|= MASK_APCS_FRAME
;
1639 if (TARGET_APCS_REENT
&& flag_pic
)
1640 error ("-fpic and -mapcs-reent are incompatible");
1642 if (TARGET_APCS_REENT
)
1643 warning (0, "APCS reentrant code not supported. Ignored");
1645 /* If this target is normally configured to use APCS frames, warn if they
1646 are turned off and debugging is turned on. */
1648 && write_symbols
!= NO_DEBUG
1649 && !TARGET_APCS_FRAME
1650 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
1651 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1653 if (TARGET_APCS_FLOAT
)
1654 warning (0, "passing floating point arguments in fp regs not yet supported");
1656 if (TARGET_LITTLE_WORDS
)
1657 warning (OPT_Wdeprecated
, "%<mwords-little-endian%> is deprecated and "
1658 "will be removed in a future release");
1660 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1661 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
1662 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
1663 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
1664 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
1665 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
1666 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
1667 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
1668 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
1669 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
1670 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
1671 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
1672 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
1673 arm_arch_cirrus
= (insn_flags
& FL_CIRRUS
) != 0;
1675 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
1676 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
1677 thumb_code
= TARGET_ARM
== 0;
1678 thumb1_code
= TARGET_THUMB1
!= 0;
1679 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
1680 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
1681 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
1682 arm_arch_thumb_hwdiv
= (insn_flags
& FL_THUMB_DIV
) != 0;
1683 arm_arch_arm_hwdiv
= (insn_flags
& FL_ARM_DIV
) != 0;
1684 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
1686 /* If we are not using the default (ARM mode) section anchor offset
1687 ranges, then set the correct ranges now. */
1690 /* Thumb-1 LDR instructions cannot have negative offsets.
1691 Permissible positive offset ranges are 5-bit (for byte loads),
1692 6-bit (for halfword loads), or 7-bit (for word loads).
1693 Empirical results suggest a 7-bit anchor range gives the best
1694 overall code size. */
1695 targetm
.min_anchor_offset
= 0;
1696 targetm
.max_anchor_offset
= 127;
1698 else if (TARGET_THUMB2
)
1700 /* The minimum is set such that the total size of the block
1701 for a particular anchor is 248 + 1 + 4095 bytes, which is
1702 divisible by eight, ensuring natural spacing of anchors. */
1703 targetm
.min_anchor_offset
= -248;
1704 targetm
.max_anchor_offset
= 4095;
1707 /* V5 code we generate is completely interworking capable, so we turn off
1708 TARGET_INTERWORK here to avoid many tests later on. */
1710 /* XXX However, we must pass the right pre-processor defines to CPP
1711 or GLD can get confused. This is a hack. */
1712 if (TARGET_INTERWORK
)
1713 arm_cpp_interwork
= 1;
1716 target_flags
&= ~MASK_INTERWORK
;
1718 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
1719 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1721 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
1722 error ("iwmmxt abi requires an iwmmxt capable cpu");
1724 if (!global_options_set
.x_arm_fpu_index
)
1726 const char *target_fpu_name
;
1729 #ifdef FPUTYPE_DEFAULT
1730 target_fpu_name
= FPUTYPE_DEFAULT
;
1732 if (arm_arch_cirrus
)
1733 target_fpu_name
= "maverick";
1735 target_fpu_name
= "fpe2";
1738 ok
= opt_enum_arg_to_value (OPT_mfpu_
, target_fpu_name
, &arm_fpu_index
,
1743 arm_fpu_desc
= &all_fpus
[arm_fpu_index
];
1745 switch (arm_fpu_desc
->model
)
1747 case ARM_FP_MODEL_FPA
:
1748 if (arm_fpu_desc
->rev
== 2)
1749 arm_fpu_attr
= FPU_FPE2
;
1750 else if (arm_fpu_desc
->rev
== 3)
1751 arm_fpu_attr
= FPU_FPE3
;
1753 arm_fpu_attr
= FPU_FPA
;
1756 case ARM_FP_MODEL_MAVERICK
:
1757 arm_fpu_attr
= FPU_MAVERICK
;
1760 case ARM_FP_MODEL_VFP
:
1761 arm_fpu_attr
= FPU_VFP
;
1768 if (TARGET_AAPCS_BASED
1769 && (arm_fpu_desc
->model
== ARM_FP_MODEL_FPA
))
1770 error ("FPA is unsupported in the AAPCS");
1772 if (TARGET_AAPCS_BASED
)
1774 if (TARGET_CALLER_INTERWORKING
)
1775 error ("AAPCS does not support -mcaller-super-interworking");
1777 if (TARGET_CALLEE_INTERWORKING
)
1778 error ("AAPCS does not support -mcallee-super-interworking");
1781 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1782 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1783 will ever exist. GCC makes no attempt to support this combination. */
1784 if (TARGET_IWMMXT
&& !TARGET_SOFT_FLOAT
)
1785 sorry ("iWMMXt and hardware floating point");
1787 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1788 if (TARGET_THUMB2
&& TARGET_IWMMXT
)
1789 sorry ("Thumb-2 iWMMXt");
1791 /* __fp16 support currently assumes the core has ldrh. */
1792 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
1793 sorry ("__fp16 and no ldrh");
1795 /* If soft-float is specified then don't use FPU. */
1796 if (TARGET_SOFT_FLOAT
)
1797 arm_fpu_attr
= FPU_NONE
;
1799 if (TARGET_AAPCS_BASED
)
1801 if (arm_abi
== ARM_ABI_IWMMXT
)
1802 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
1803 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
1804 && TARGET_HARD_FLOAT
1806 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
1808 arm_pcs_default
= ARM_PCS_AAPCS
;
1812 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
1813 sorry ("-mfloat-abi=hard and VFP");
1815 if (arm_abi
== ARM_ABI_APCS
)
1816 arm_pcs_default
= ARM_PCS_APCS
;
1818 arm_pcs_default
= ARM_PCS_ATPCS
;
1821 /* For arm2/3 there is no need to do any scheduling if there is only
1822 a floating point emulator, or we are doing software floating-point. */
1823 if ((TARGET_SOFT_FLOAT
1824 || (TARGET_FPA
&& arm_fpu_desc
->rev
))
1825 && (tune_flags
& FL_MODE32
) == 0)
1826 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
1828 /* Use the cp15 method if it is available. */
1829 if (target_thread_pointer
== TP_AUTO
)
1831 if (arm_arch6k
&& !TARGET_THUMB1
)
1832 target_thread_pointer
= TP_CP15
;
1834 target_thread_pointer
= TP_SOFT
;
1837 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
1838 error ("can not use -mtp=cp15 with 16-bit Thumb");
1840 /* Override the default structure alignment for AAPCS ABI. */
1841 if (!global_options_set
.x_arm_structure_size_boundary
)
1843 if (TARGET_AAPCS_BASED
)
1844 arm_structure_size_boundary
= 8;
1848 if (arm_structure_size_boundary
!= 8
1849 && arm_structure_size_boundary
!= 32
1850 && !(ARM_DOUBLEWORD_ALIGN
&& arm_structure_size_boundary
== 64))
1852 if (ARM_DOUBLEWORD_ALIGN
)
1854 "structure size boundary can only be set to 8, 32 or 64");
1856 warning (0, "structure size boundary can only be set to 8 or 32");
1857 arm_structure_size_boundary
1858 = (TARGET_AAPCS_BASED
? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY
);
1862 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
1864 error ("RTP PIC is incompatible with Thumb");
1868 /* If stack checking is disabled, we can use r10 as the PIC register,
1869 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1870 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
1872 if (TARGET_VXWORKS_RTP
)
1873 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1874 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
1877 if (flag_pic
&& TARGET_VXWORKS_RTP
)
1878 arm_pic_register
= 9;
1880 if (arm_pic_register_string
!= NULL
)
1882 int pic_register
= decode_reg_name (arm_pic_register_string
);
1885 warning (0, "-mpic-register= is useless without -fpic");
1887 /* Prevent the user from choosing an obviously stupid PIC register. */
1888 else if (pic_register
< 0 || call_used_regs
[pic_register
]
1889 || pic_register
== HARD_FRAME_POINTER_REGNUM
1890 || pic_register
== STACK_POINTER_REGNUM
1891 || pic_register
>= PC_REGNUM
1892 || (TARGET_VXWORKS_RTP
1893 && (unsigned int) pic_register
!= arm_pic_register
))
1894 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
1896 arm_pic_register
= pic_register
;
1899 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1900 if (fix_cm3_ldrd
== 2)
1902 if (arm_selected_cpu
->core
== cortexm3
)
1908 if (TARGET_THUMB1
&& flag_schedule_insns
)
1910 /* Don't warn since it's on by default in -O2. */
1911 flag_schedule_insns
= 0;
1916 /* If optimizing for size, bump the number of instructions that we
1917 are prepared to conditionally execute (even on a StrongARM). */
1918 max_insns_skipped
= 6;
1921 max_insns_skipped
= current_tune
->max_insns_skipped
;
1923 /* Hot/Cold partitioning is not currently supported, since we can't
1924 handle literal pool placement in that case. */
1925 if (flag_reorder_blocks_and_partition
)
1927 inform (input_location
,
1928 "-freorder-blocks-and-partition not supported on this architecture");
1929 flag_reorder_blocks_and_partition
= 0;
1930 flag_reorder_blocks
= 1;
1934 /* Hoisting PIC address calculations more aggressively provides a small,
1935 but measurable, size reduction for PIC code. Therefore, we decrease
1936 the bar for unrestricted expression hoisting to the cost of PIC address
1937 calculation, which is 2 instructions. */
1938 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
1939 global_options
.x_param_values
,
1940 global_options_set
.x_param_values
);
1942 /* ARM EABI defaults to strict volatile bitfields. */
1943 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0)
1944 flag_strict_volatile_bitfields
= 1;
1946 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
1947 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
1948 if (flag_prefetch_loop_arrays
< 0
1951 && current_tune
->num_prefetch_slots
> 0)
1952 flag_prefetch_loop_arrays
= 1;
1954 /* Set up parameters to be used in prefetching algorithm. Do not override the
1955 defaults unless we are tuning for a core we have researched values for. */
1956 if (current_tune
->num_prefetch_slots
> 0)
1957 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
1958 current_tune
->num_prefetch_slots
,
1959 global_options
.x_param_values
,
1960 global_options_set
.x_param_values
);
1961 if (current_tune
->l1_cache_line_size
>= 0)
1962 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
1963 current_tune
->l1_cache_line_size
,
1964 global_options
.x_param_values
,
1965 global_options_set
.x_param_values
);
1966 if (current_tune
->l1_cache_size
>= 0)
1967 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
1968 current_tune
->l1_cache_size
,
1969 global_options
.x_param_values
,
1970 global_options_set
.x_param_values
);
1972 /* Register global variables with the garbage collector. */
1973 arm_add_gc_roots ();
1977 arm_add_gc_roots (void)
1979 gcc_obstack_init(&minipool_obstack
);
1980 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
1983 /* A table of known ARM exception types.
1984 For use with the interrupt function attribute. */
1988 const char *const arg
;
1989 const unsigned long return_value
;
1993 static const isr_attribute_arg isr_attribute_args
[] =
1995 { "IRQ", ARM_FT_ISR
},
1996 { "irq", ARM_FT_ISR
},
1997 { "FIQ", ARM_FT_FIQ
},
1998 { "fiq", ARM_FT_FIQ
},
1999 { "ABORT", ARM_FT_ISR
},
2000 { "abort", ARM_FT_ISR
},
2001 { "ABORT", ARM_FT_ISR
},
2002 { "abort", ARM_FT_ISR
},
2003 { "UNDEF", ARM_FT_EXCEPTION
},
2004 { "undef", ARM_FT_EXCEPTION
},
2005 { "SWI", ARM_FT_EXCEPTION
},
2006 { "swi", ARM_FT_EXCEPTION
},
2007 { NULL
, ARM_FT_NORMAL
}
2010 /* Returns the (interrupt) function type of the current
2011 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2013 static unsigned long
2014 arm_isr_value (tree argument
)
2016 const isr_attribute_arg
* ptr
;
2020 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
2022 /* No argument - default to IRQ. */
2023 if (argument
== NULL_TREE
)
2026 /* Get the value of the argument. */
2027 if (TREE_VALUE (argument
) == NULL_TREE
2028 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2029 return ARM_FT_UNKNOWN
;
2031 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2033 /* Check it against the list of known arguments. */
2034 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2035 if (streq (arg
, ptr
->arg
))
2036 return ptr
->return_value
;
2038 /* An unrecognized interrupt type. */
2039 return ARM_FT_UNKNOWN
;
2042 /* Computes the type of the current function. */
2044 static unsigned long
2045 arm_compute_func_type (void)
2047 unsigned long type
= ARM_FT_UNKNOWN
;
2051 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2053 /* Decide if the current function is volatile. Such functions
2054 never return, and many memory cycles can be saved by not storing
2055 register values that will never be needed again. This optimization
2056 was added to speed up context switching in a kernel application. */
2058 && (TREE_NOTHROW (current_function_decl
)
2059 || !(flag_unwind_tables
2061 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
2062 && TREE_THIS_VOLATILE (current_function_decl
))
2063 type
|= ARM_FT_VOLATILE
;
2065 if (cfun
->static_chain_decl
!= NULL
)
2066 type
|= ARM_FT_NESTED
;
2068 attr
= DECL_ATTRIBUTES (current_function_decl
);
2070 a
= lookup_attribute ("naked", attr
);
2072 type
|= ARM_FT_NAKED
;
2074 a
= lookup_attribute ("isr", attr
);
2076 a
= lookup_attribute ("interrupt", attr
);
2079 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2081 type
|= arm_isr_value (TREE_VALUE (a
));
2086 /* Returns the type of the current function. */
2089 arm_current_func_type (void)
2091 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2092 cfun
->machine
->func_type
= arm_compute_func_type ();
2094 return cfun
->machine
->func_type
;
2098 arm_allocate_stack_slots_for_args (void)
2100 /* Naked functions should not allocate stack slots for arguments. */
2101 return !IS_NAKED (arm_current_func_type ());
2105 /* Output assembler code for a block containing the constant parts
2106 of a trampoline, leaving space for the variable parts.
2108 On the ARM, (if r8 is the static chain regnum, and remembering that
2109 referencing pc adds an offset of 8) the trampoline looks like:
2112 .word static chain value
2113 .word function's address
2114 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2117 arm_asm_trampoline_template (FILE *f
)
2121 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2122 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2124 else if (TARGET_THUMB2
)
2126 /* The Thumb-2 trampoline is similar to the arm implementation.
2127 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2128 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2129 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2130 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2134 ASM_OUTPUT_ALIGN (f
, 2);
2135 fprintf (f
, "\t.code\t16\n");
2136 fprintf (f
, ".Ltrampoline_start:\n");
2137 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2138 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2139 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2140 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2141 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2142 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2144 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2145 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2148 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2151 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
2153 rtx fnaddr
, mem
, a_tramp
;
2155 emit_block_move (m_tramp
, assemble_trampoline_template (),
2156 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
2158 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
2159 emit_move_insn (mem
, chain_value
);
2161 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
2162 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
2163 emit_move_insn (mem
, fnaddr
);
2165 a_tramp
= XEXP (m_tramp
, 0);
2166 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
2167 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
2168 plus_constant (a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
2171 /* Thumb trampolines should be entered in thumb mode, so set
2172 the bottom bit of the address. */
2175 arm_trampoline_adjust_address (rtx addr
)
2178 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
2179 NULL
, 0, OPTAB_LIB_WIDEN
);
2183 /* Return 1 if it is possible to return using a single instruction.
2184 If SIBLING is non-null, this is a test for a return before a sibling
2185 call. SIBLING is the call insn, so we can examine its register usage. */
2188 use_return_insn (int iscond
, rtx sibling
)
2191 unsigned int func_type
;
2192 unsigned long saved_int_regs
;
2193 unsigned HOST_WIDE_INT stack_adjust
;
2194 arm_stack_offsets
*offsets
;
2196 /* Never use a return instruction before reload has run. */
2197 if (!reload_completed
)
2200 func_type
= arm_current_func_type ();
2202 /* Naked, volatile and stack alignment functions need special
2204 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
2207 /* So do interrupt functions that use the frame pointer and Thumb
2208 interrupt functions. */
2209 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
2212 offsets
= arm_get_frame_offsets ();
2213 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
2215 /* As do variadic functions. */
2216 if (crtl
->args
.pretend_args_size
2217 || cfun
->machine
->uses_anonymous_args
2218 /* Or if the function calls __builtin_eh_return () */
2219 || crtl
->calls_eh_return
2220 /* Or if the function calls alloca */
2221 || cfun
->calls_alloca
2222 /* Or if there is a stack adjustment. However, if the stack pointer
2223 is saved on the stack, we can use a pre-incrementing stack load. */
2224 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
2225 && stack_adjust
== 4)))
2228 saved_int_regs
= offsets
->saved_regs_mask
;
2230 /* Unfortunately, the insn
2232 ldmib sp, {..., sp, ...}
2234 triggers a bug on most SA-110 based devices, such that the stack
2235 pointer won't be correctly restored if the instruction takes a
2236 page fault. We work around this problem by popping r3 along with
2237 the other registers, since that is never slower than executing
2238 another instruction.
2240 We test for !arm_arch5 here, because code for any architecture
2241 less than this could potentially be run on one of the buggy
2243 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
2245 /* Validate that r3 is a call-clobbered register (always true in
2246 the default abi) ... */
2247 if (!call_used_regs
[3])
2250 /* ... that it isn't being used for a return value ... */
2251 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
2254 /* ... or for a tail-call argument ... */
2257 gcc_assert (GET_CODE (sibling
) == CALL_INSN
);
2259 if (find_regno_fusage (sibling
, USE
, 3))
2263 /* ... and that there are no call-saved registers in r0-r2
2264 (always true in the default ABI). */
2265 if (saved_int_regs
& 0x7)
2269 /* Can't be done if interworking with Thumb, and any registers have been
2271 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
2274 /* On StrongARM, conditional returns are expensive if they aren't
2275 taken and multiple registers have been stacked. */
2276 if (iscond
&& arm_tune_strongarm
)
2278 /* Conditional return when just the LR is stored is a simple
2279 conditional-load instruction, that's not expensive. */
2280 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
2284 && arm_pic_register
!= INVALID_REGNUM
2285 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
2289 /* If there are saved registers but the LR isn't saved, then we need
2290 two instructions for the return. */
2291 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
2294 /* Can't be done if any of the FPA regs are pushed,
2295 since this also requires an insn. */
2296 if (TARGET_HARD_FLOAT
&& TARGET_FPA
)
2297 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
2298 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2301 /* Likewise VFP regs. */
2302 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
2303 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
2304 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2307 if (TARGET_REALLY_IWMMXT
)
2308 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
2309 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2315 /* Return TRUE if int I is a valid immediate ARM constant. */
2318 const_ok_for_arm (HOST_WIDE_INT i
)
2322 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2323 be all zero, or all one. */
2324 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
2325 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
2326 != ((~(unsigned HOST_WIDE_INT
) 0)
2327 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
2330 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
2332 /* Fast return for 0 and small values. We must do this for zero, since
2333 the code below can't handle that one case. */
2334 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
2337 /* Get the number of trailing zeros. */
2338 lowbit
= ffs((int) i
) - 1;
2340 /* Only even shifts are allowed in ARM mode so round down to the
2341 nearest even number. */
2345 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
2350 /* Allow rotated constants in ARM mode. */
2352 && ((i
& ~0xc000003f) == 0
2353 || (i
& ~0xf000000f) == 0
2354 || (i
& ~0xfc000003) == 0))
2361 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2364 if (i
== v
|| i
== (v
| (v
<< 8)))
2367 /* Allow repeated pattern 0xXY00XY00. */
2377 /* Return true if I is a valid constant for the operation CODE. */
2379 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
2381 if (const_ok_for_arm (i
))
2387 /* See if we can use movw. */
2388 if (arm_arch_thumb2
&& (i
& 0xffff0000) == 0)
2391 /* Otherwise, try mvn. */
2392 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2413 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
2415 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
2421 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2425 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2432 /* Emit a sequence of insns to handle a large constant.
2433 CODE is the code of the operation required, it can be any of SET, PLUS,
2434 IOR, AND, XOR, MINUS;
2435 MODE is the mode in which the operation is being performed;
2436 VAL is the integer to operate on;
2437 SOURCE is the other operand (a register, or a null-pointer for SET);
2438 SUBTARGETS means it is safe to create scratch registers if that will
2439 either produce a simpler sequence, or we will want to cse the values.
2440 Return value is the number of insns emitted. */
2442 /* ??? Tweak this for thumb2. */
2444 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
2445 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
2449 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
2450 cond
= COND_EXEC_TEST (PATTERN (insn
));
2454 if (subtargets
|| code
== SET
2455 || (GET_CODE (target
) == REG
&& GET_CODE (source
) == REG
2456 && REGNO (target
) != REGNO (source
)))
2458 /* After arm_reorg has been called, we can't fix up expensive
2459 constants by pushing them into memory so we must synthesize
2460 them in-line, regardless of the cost. This is only likely to
2461 be more costly on chips that have load delay slots and we are
2462 compiling without running the scheduler (so no splitting
2463 occurred before the final instruction emission).
2465 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2467 if (!after_arm_reorg
2469 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
2471 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
2476 /* Currently SET is the only monadic value for CODE, all
2477 the rest are diadic. */
2478 if (TARGET_USE_MOVT
)
2479 arm_emit_movpair (target
, GEN_INT (val
));
2481 emit_set_insn (target
, GEN_INT (val
));
2487 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
2489 if (TARGET_USE_MOVT
)
2490 arm_emit_movpair (temp
, GEN_INT (val
));
2492 emit_set_insn (temp
, GEN_INT (val
));
2494 /* For MINUS, the value is subtracted from, since we never
2495 have subtraction of a constant. */
2497 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
2499 emit_set_insn (target
,
2500 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
2506 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
2510 /* Return the number of instructions required to synthesize the given
2511 constant, if we start emitting them from bit-position I. */
2513 count_insns_for_constant (HOST_WIDE_INT remainder
, int i
)
2515 HOST_WIDE_INT temp1
;
2516 int step_size
= TARGET_ARM
? 2 : 1;
2519 gcc_assert (TARGET_ARM
|| i
== 0);
2527 if (remainder
& (((1 << step_size
) - 1) << (i
- step_size
)))
2532 temp1
= remainder
& ((0x0ff << end
)
2533 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
2534 remainder
&= ~temp1
;
2539 } while (remainder
);
2544 find_best_start (unsigned HOST_WIDE_INT remainder
)
2546 int best_consecutive_zeros
= 0;
2550 /* If we aren't targetting ARM, the best place to start is always at
2555 for (i
= 0; i
< 32; i
+= 2)
2557 int consecutive_zeros
= 0;
2559 if (!(remainder
& (3 << i
)))
2561 while ((i
< 32) && !(remainder
& (3 << i
)))
2563 consecutive_zeros
+= 2;
2566 if (consecutive_zeros
> best_consecutive_zeros
)
2568 best_consecutive_zeros
= consecutive_zeros
;
2569 best_start
= i
- consecutive_zeros
;
2575 /* So long as it won't require any more insns to do so, it's
2576 desirable to emit a small constant (in bits 0...9) in the last
2577 insn. This way there is more chance that it can be combined with
2578 a later addressing insn to form a pre-indexed load or store
2579 operation. Consider:
2581 *((volatile int *)0xe0000100) = 1;
2582 *((volatile int *)0xe0000110) = 2;
2584 We want this to wind up as:
2588 str rB, [rA, #0x100]
2590 str rB, [rA, #0x110]
2592 rather than having to synthesize both large constants from scratch.
2594 Therefore, we calculate how many insns would be required to emit
2595 the constant starting from `best_start', and also starting from
2596 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2597 yield a shorter sequence, we may as well use zero. */
2599 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < remainder
)
2600 && (count_insns_for_constant (remainder
, 0) <=
2601 count_insns_for_constant (remainder
, best_start
)))
2607 /* Emit an instruction with the indicated PATTERN. If COND is
2608 non-NULL, conditionalize the execution of the instruction on COND
2612 emit_constant_insn (rtx cond
, rtx pattern
)
2615 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
2616 emit_insn (pattern
);
2619 /* As above, but extra parameter GENERATE which, if clear, suppresses
2621 /* ??? This needs more work for thumb2. */
2624 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
2625 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
2630 int final_invert
= 0;
2632 int num_bits_set
= 0;
2633 int set_sign_bit_copies
= 0;
2634 int clear_sign_bit_copies
= 0;
2635 int clear_zero_bit_copies
= 0;
2636 int set_zero_bit_copies
= 0;
2638 unsigned HOST_WIDE_INT temp1
, temp2
;
2639 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
2640 int step_size
= TARGET_ARM
? 2 : 1;
2642 /* Find out which operations are safe for a given CODE. Also do a quick
2643 check for degenerate cases; these can occur when DImode operations
2657 if (remainder
== 0xffffffff)
2660 emit_constant_insn (cond
,
2661 gen_rtx_SET (VOIDmode
, target
,
2662 GEN_INT (ARM_SIGN_EXTEND (val
))));
2668 if (reload_completed
&& rtx_equal_p (target
, source
))
2672 emit_constant_insn (cond
,
2673 gen_rtx_SET (VOIDmode
, target
, source
));
2682 emit_constant_insn (cond
,
2683 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
2686 if (remainder
== 0xffffffff)
2688 if (reload_completed
&& rtx_equal_p (target
, source
))
2691 emit_constant_insn (cond
,
2692 gen_rtx_SET (VOIDmode
, target
, source
));
2701 if (reload_completed
&& rtx_equal_p (target
, source
))
2704 emit_constant_insn (cond
,
2705 gen_rtx_SET (VOIDmode
, target
, source
));
2709 if (remainder
== 0xffffffff)
2712 emit_constant_insn (cond
,
2713 gen_rtx_SET (VOIDmode
, target
,
2714 gen_rtx_NOT (mode
, source
)));
2720 /* We treat MINUS as (val - source), since (source - val) is always
2721 passed as (source + (-val)). */
2725 emit_constant_insn (cond
,
2726 gen_rtx_SET (VOIDmode
, target
,
2727 gen_rtx_NEG (mode
, source
)));
2730 if (const_ok_for_arm (val
))
2733 emit_constant_insn (cond
,
2734 gen_rtx_SET (VOIDmode
, target
,
2735 gen_rtx_MINUS (mode
, GEN_INT (val
),
2747 /* If we can do it in one insn get out quickly. */
2748 if (const_ok_for_op (val
, code
))
2751 emit_constant_insn (cond
,
2752 gen_rtx_SET (VOIDmode
, target
,
2754 ? gen_rtx_fmt_ee (code
, mode
, source
,
2760 /* Calculate a few attributes that may be useful for specific
2762 /* Count number of leading zeros. */
2763 for (i
= 31; i
>= 0; i
--)
2765 if ((remainder
& (1 << i
)) == 0)
2766 clear_sign_bit_copies
++;
2771 /* Count number of leading 1's. */
2772 for (i
= 31; i
>= 0; i
--)
2774 if ((remainder
& (1 << i
)) != 0)
2775 set_sign_bit_copies
++;
2780 /* Count number of trailing zero's. */
2781 for (i
= 0; i
<= 31; i
++)
2783 if ((remainder
& (1 << i
)) == 0)
2784 clear_zero_bit_copies
++;
2789 /* Count number of trailing 1's. */
2790 for (i
= 0; i
<= 31; i
++)
2792 if ((remainder
& (1 << i
)) != 0)
2793 set_zero_bit_copies
++;
2801 /* See if we can do this by sign_extending a constant that is known
2802 to be negative. This is a good, way of doing it, since the shift
2803 may well merge into a subsequent insn. */
2804 if (set_sign_bit_copies
> 1)
2806 if (const_ok_for_arm
2807 (temp1
= ARM_SIGN_EXTEND (remainder
2808 << (set_sign_bit_copies
- 1))))
2812 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2813 emit_constant_insn (cond
,
2814 gen_rtx_SET (VOIDmode
, new_src
,
2816 emit_constant_insn (cond
,
2817 gen_ashrsi3 (target
, new_src
,
2818 GEN_INT (set_sign_bit_copies
- 1)));
2822 /* For an inverted constant, we will need to set the low bits,
2823 these will be shifted out of harm's way. */
2824 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
2825 if (const_ok_for_arm (~temp1
))
2829 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2830 emit_constant_insn (cond
,
2831 gen_rtx_SET (VOIDmode
, new_src
,
2833 emit_constant_insn (cond
,
2834 gen_ashrsi3 (target
, new_src
,
2835 GEN_INT (set_sign_bit_copies
- 1)));
2841 /* See if we can calculate the value as the difference between two
2842 valid immediates. */
2843 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
2845 int topshift
= clear_sign_bit_copies
& ~1;
2847 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
2848 & (0xff000000 >> topshift
));
2850 /* If temp1 is zero, then that means the 9 most significant
2851 bits of remainder were 1 and we've caused it to overflow.
2852 When topshift is 0 we don't need to do anything since we
2853 can borrow from 'bit 32'. */
2854 if (temp1
== 0 && topshift
!= 0)
2855 temp1
= 0x80000000 >> (topshift
- 1);
2857 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
2859 if (const_ok_for_arm (temp2
))
2863 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2864 emit_constant_insn (cond
,
2865 gen_rtx_SET (VOIDmode
, new_src
,
2867 emit_constant_insn (cond
,
2868 gen_addsi3 (target
, new_src
,
2876 /* See if we can generate this by setting the bottom (or the top)
2877 16 bits, and then shifting these into the other half of the
2878 word. We only look for the simplest cases, to do more would cost
2879 too much. Be careful, however, not to generate this when the
2880 alternative would take fewer insns. */
2881 if (val
& 0xffff0000)
2883 temp1
= remainder
& 0xffff0000;
2884 temp2
= remainder
& 0x0000ffff;
2886 /* Overlaps outside this range are best done using other methods. */
2887 for (i
= 9; i
< 24; i
++)
2889 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
2890 && !const_ok_for_arm (temp2
))
2892 rtx new_src
= (subtargets
2893 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2895 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
2896 source
, subtargets
, generate
);
2904 gen_rtx_ASHIFT (mode
, source
,
2911 /* Don't duplicate cases already considered. */
2912 for (i
= 17; i
< 24; i
++)
2914 if (((temp1
| (temp1
>> i
)) == remainder
)
2915 && !const_ok_for_arm (temp1
))
2917 rtx new_src
= (subtargets
2918 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2920 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
2921 source
, subtargets
, generate
);
2926 gen_rtx_SET (VOIDmode
, target
,
2929 gen_rtx_LSHIFTRT (mode
, source
,
2940 /* If we have IOR or XOR, and the constant can be loaded in a
2941 single instruction, and we can find a temporary to put it in,
2942 then this can be done in two instructions instead of 3-4. */
2944 /* TARGET can't be NULL if SUBTARGETS is 0 */
2945 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
2947 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
2951 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2953 emit_constant_insn (cond
,
2954 gen_rtx_SET (VOIDmode
, sub
,
2956 emit_constant_insn (cond
,
2957 gen_rtx_SET (VOIDmode
, target
,
2958 gen_rtx_fmt_ee (code
, mode
,
2969 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2970 and the remainder 0s for e.g. 0xfff00000)
2971 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2973 This can be done in 2 instructions by using shifts with mov or mvn.
2978 mvn r0, r0, lsr #12 */
2979 if (set_sign_bit_copies
> 8
2980 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
2984 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2985 rtx shift
= GEN_INT (set_sign_bit_copies
);
2989 gen_rtx_SET (VOIDmode
, sub
,
2991 gen_rtx_ASHIFT (mode
,
2996 gen_rtx_SET (VOIDmode
, target
,
2998 gen_rtx_LSHIFTRT (mode
, sub
,
3005 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3007 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3009 For eg. r0 = r0 | 0xfff
3014 if (set_zero_bit_copies
> 8
3015 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
3019 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3020 rtx shift
= GEN_INT (set_zero_bit_copies
);
3024 gen_rtx_SET (VOIDmode
, sub
,
3026 gen_rtx_LSHIFTRT (mode
,
3031 gen_rtx_SET (VOIDmode
, target
,
3033 gen_rtx_ASHIFT (mode
, sub
,
3039 /* This will never be reached for Thumb2 because orn is a valid
3040 instruction. This is for Thumb1 and the ARM 32 bit cases.
3042 x = y | constant (such that ~constant is a valid constant)
3044 x = ~(~y & ~constant).
3046 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
3050 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3051 emit_constant_insn (cond
,
3052 gen_rtx_SET (VOIDmode
, sub
,
3053 gen_rtx_NOT (mode
, source
)));
3056 sub
= gen_reg_rtx (mode
);
3057 emit_constant_insn (cond
,
3058 gen_rtx_SET (VOIDmode
, sub
,
3059 gen_rtx_AND (mode
, source
,
3061 emit_constant_insn (cond
,
3062 gen_rtx_SET (VOIDmode
, target
,
3063 gen_rtx_NOT (mode
, sub
)));
3070 /* See if two shifts will do 2 or more insn's worth of work. */
3071 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
3073 HOST_WIDE_INT shift_mask
= ((0xffffffff
3074 << (32 - clear_sign_bit_copies
))
3077 if ((remainder
| shift_mask
) != 0xffffffff)
3081 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3082 insns
= arm_gen_constant (AND
, mode
, cond
,
3083 remainder
| shift_mask
,
3084 new_src
, source
, subtargets
, 1);
3089 rtx targ
= subtargets
? NULL_RTX
: target
;
3090 insns
= arm_gen_constant (AND
, mode
, cond
,
3091 remainder
| shift_mask
,
3092 targ
, source
, subtargets
, 0);
3098 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3099 rtx shift
= GEN_INT (clear_sign_bit_copies
);
3101 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
3102 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
3108 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
3110 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
3112 if ((remainder
| shift_mask
) != 0xffffffff)
3116 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3118 insns
= arm_gen_constant (AND
, mode
, cond
,
3119 remainder
| shift_mask
,
3120 new_src
, source
, subtargets
, 1);
3125 rtx targ
= subtargets
? NULL_RTX
: target
;
3127 insns
= arm_gen_constant (AND
, mode
, cond
,
3128 remainder
| shift_mask
,
3129 targ
, source
, subtargets
, 0);
3135 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3136 rtx shift
= GEN_INT (clear_zero_bit_copies
);
3138 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
3139 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
3151 for (i
= 0; i
< 32; i
++)
3152 if (remainder
& (1 << i
))
3155 if ((code
== AND
) || (can_invert
&& num_bits_set
> 16))
3156 remainder
^= 0xffffffff;
3157 else if (code
== PLUS
&& num_bits_set
> 16)
3158 remainder
= (-remainder
) & 0xffffffff;
3160 /* For XOR, if more than half the bits are set and there's a sequence
3161 of more than 8 consecutive ones in the pattern then we can XOR by the
3162 inverted constant and then invert the final result; this may save an
3163 instruction and might also lead to the final mvn being merged with
3164 some other operation. */
3165 else if (code
== XOR
&& num_bits_set
> 16
3166 && (count_insns_for_constant (remainder
^ 0xffffffff,
3168 (remainder
^ 0xffffffff))
3169 < count_insns_for_constant (remainder
,
3170 find_best_start (remainder
))))
3172 remainder
^= 0xffffffff;
3181 /* Now try and find a way of doing the job in either two or three
3183 We start by looking for the largest block of zeros that are aligned on
3184 a 2-bit boundary, we then fill up the temps, wrapping around to the
3185 top of the word when we drop off the bottom.
3186 In the worst case this code should produce no more than four insns.
3187 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3188 best place to start. */
3190 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3193 /* Now start emitting the insns. */
3194 i
= find_best_start (remainder
);
3201 if (remainder
& (3 << (i
- 2)))
3206 temp1
= remainder
& ((0x0ff << end
)
3207 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
3208 remainder
&= ~temp1
;
3212 rtx new_src
, temp1_rtx
;
3214 if (code
== SET
|| code
== MINUS
)
3216 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
3217 if (can_invert
&& code
!= MINUS
)
3222 if ((final_invert
|| remainder
) && subtargets
)
3223 new_src
= gen_reg_rtx (mode
);
3228 else if (can_negate
)
3232 temp1
= trunc_int_for_mode (temp1
, mode
);
3233 temp1_rtx
= GEN_INT (temp1
);
3237 else if (code
== MINUS
)
3238 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
3240 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
3242 emit_constant_insn (cond
,
3243 gen_rtx_SET (VOIDmode
, new_src
,
3253 else if (code
== MINUS
)
3259 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3269 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
3270 gen_rtx_NOT (mode
, source
)));
3277 /* Canonicalize a comparison so that we are more likely to recognize it.
3278 This can be done for a few constant compares, where we can make the
3279 immediate value easier to load. */
3282 arm_canonicalize_comparison (enum rtx_code code
, rtx
*op0
, rtx
*op1
)
3284 enum machine_mode mode
;
3285 unsigned HOST_WIDE_INT i
, maxval
;
3287 mode
= GET_MODE (*op0
);
3288 if (mode
== VOIDmode
)
3289 mode
= GET_MODE (*op1
);
3291 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
3293 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3294 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3295 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3296 for GTU/LEU in Thumb mode. */
3301 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3303 if (TARGET_ARM
&& TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
3306 if (code
== GT
|| code
== LE
3307 || (!TARGET_ARM
&& (code
== GTU
|| code
== LEU
)))
3309 /* Missing comparison. First try to use an available
3311 if (GET_CODE (*op1
) == CONST_INT
)
3319 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3321 *op1
= GEN_INT (i
+ 1);
3322 return code
== GT
? GE
: LT
;
3327 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3328 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3330 *op1
= GEN_INT (i
+ 1);
3331 return code
== GTU
? GEU
: LTU
;
3339 /* If that did not work, reverse the condition. */
3343 return swap_condition (code
);
3349 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
3350 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
3351 to facilitate possible combining with a cmp into 'ands'. */
3353 && GET_CODE (*op0
) == ZERO_EXTEND
3354 && GET_CODE (XEXP (*op0
, 0)) == SUBREG
3355 && GET_MODE (XEXP (*op0
, 0)) == QImode
3356 && GET_MODE (SUBREG_REG (XEXP (*op0
, 0))) == SImode
3357 && subreg_lowpart_p (XEXP (*op0
, 0))
3358 && *op1
== const0_rtx
)
3359 *op0
= gen_rtx_AND (SImode
, SUBREG_REG (XEXP (*op0
, 0)),
3362 /* Comparisons smaller than DImode. Only adjust comparisons against
3363 an out-of-range constant. */
3364 if (GET_CODE (*op1
) != CONST_INT
3365 || const_ok_for_arm (INTVAL (*op1
))
3366 || const_ok_for_arm (- INTVAL (*op1
)))
3380 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3382 *op1
= GEN_INT (i
+ 1);
3383 return code
== GT
? GE
: LT
;
3390 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3392 *op1
= GEN_INT (i
- 1);
3393 return code
== GE
? GT
: LE
;
3399 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3400 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3402 *op1
= GEN_INT (i
+ 1);
3403 return code
== GTU
? GEU
: LTU
;
3410 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3412 *op1
= GEN_INT (i
- 1);
3413 return code
== GEU
? GTU
: LEU
;
3425 /* Define how to find the value returned by a function. */
3428 arm_function_value(const_tree type
, const_tree func
,
3429 bool outgoing ATTRIBUTE_UNUSED
)
3431 enum machine_mode mode
;
3432 int unsignedp ATTRIBUTE_UNUSED
;
3433 rtx r ATTRIBUTE_UNUSED
;
3435 mode
= TYPE_MODE (type
);
3437 if (TARGET_AAPCS_BASED
)
3438 return aapcs_allocate_return_reg (mode
, type
, func
);
3440 /* Promote integer types. */
3441 if (INTEGRAL_TYPE_P (type
))
3442 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
3444 /* Promotes small structs returned in a register to full-word size
3445 for big-endian AAPCS. */
3446 if (arm_return_in_msb (type
))
3448 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3449 if (size
% UNITS_PER_WORD
!= 0)
3451 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
3452 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
3456 return LIBCALL_VALUE (mode
);
3460 libcall_eq (const void *p1
, const void *p2
)
3462 return rtx_equal_p ((const_rtx
) p1
, (const_rtx
) p2
);
3466 libcall_hash (const void *p1
)
3468 return hash_rtx ((const_rtx
) p1
, VOIDmode
, NULL
, NULL
, FALSE
);
3472 add_libcall (htab_t htab
, rtx libcall
)
3474 *htab_find_slot (htab
, libcall
, INSERT
) = libcall
;
3478 arm_libcall_uses_aapcs_base (const_rtx libcall
)
3480 static bool init_done
= false;
3481 static htab_t libcall_htab
;
3487 libcall_htab
= htab_create (31, libcall_hash
, libcall_eq
,
3489 add_libcall (libcall_htab
,
3490 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
3491 add_libcall (libcall_htab
,
3492 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
3493 add_libcall (libcall_htab
,
3494 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
3495 add_libcall (libcall_htab
,
3496 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
3498 add_libcall (libcall_htab
,
3499 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
3500 add_libcall (libcall_htab
,
3501 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
3502 add_libcall (libcall_htab
,
3503 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
3504 add_libcall (libcall_htab
,
3505 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
3507 add_libcall (libcall_htab
,
3508 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
3509 add_libcall (libcall_htab
,
3510 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
3511 add_libcall (libcall_htab
,
3512 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
3513 add_libcall (libcall_htab
,
3514 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
3515 add_libcall (libcall_htab
,
3516 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
3517 add_libcall (libcall_htab
,
3518 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
3520 /* Values from double-precision helper functions are returned in core
3521 registers if the selected core only supports single-precision
3522 arithmetic, even if we are using the hard-float ABI. The same is
3523 true for single-precision helpers, but we will never be using the
3524 hard-float ABI on a CPU which doesn't support single-precision
3525 operations in hardware. */
3526 add_libcall (libcall_htab
, optab_libfunc (add_optab
, DFmode
));
3527 add_libcall (libcall_htab
, optab_libfunc (sdiv_optab
, DFmode
));
3528 add_libcall (libcall_htab
, optab_libfunc (smul_optab
, DFmode
));
3529 add_libcall (libcall_htab
, optab_libfunc (neg_optab
, DFmode
));
3530 add_libcall (libcall_htab
, optab_libfunc (sub_optab
, DFmode
));
3531 add_libcall (libcall_htab
, optab_libfunc (eq_optab
, DFmode
));
3532 add_libcall (libcall_htab
, optab_libfunc (lt_optab
, DFmode
));
3533 add_libcall (libcall_htab
, optab_libfunc (le_optab
, DFmode
));
3534 add_libcall (libcall_htab
, optab_libfunc (ge_optab
, DFmode
));
3535 add_libcall (libcall_htab
, optab_libfunc (gt_optab
, DFmode
));
3536 add_libcall (libcall_htab
, optab_libfunc (unord_optab
, DFmode
));
3537 add_libcall (libcall_htab
, convert_optab_libfunc (sext_optab
, DFmode
,
3539 add_libcall (libcall_htab
, convert_optab_libfunc (trunc_optab
, SFmode
,
3543 return libcall
&& htab_find (libcall_htab
, libcall
) != NULL
;
3547 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
3549 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
3550 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
3552 /* The following libcalls return their result in integer registers,
3553 even though they return a floating point value. */
3554 if (arm_libcall_uses_aapcs_base (libcall
))
3555 return gen_rtx_REG (mode
, ARG_REGISTER(1));
3559 return LIBCALL_VALUE (mode
);
3562 /* Determine the amount of memory needed to store the possible return
3563 registers of an untyped call. */
3565 arm_apply_result_size (void)
3571 if (TARGET_HARD_FLOAT_ABI
)
3577 if (TARGET_MAVERICK
)
3580 if (TARGET_IWMMXT_ABI
)
3587 /* Decide whether TYPE should be returned in memory (true)
3588 or in a register (false). FNTYPE is the type of the function making
3591 arm_return_in_memory (const_tree type
, const_tree fntype
)
3595 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
3597 if (TARGET_AAPCS_BASED
)
3599 /* Simple, non-aggregate types (ie not including vectors and
3600 complex) are always returned in a register (or registers).
3601 We don't care about which register here, so we can short-cut
3602 some of the detail. */
3603 if (!AGGREGATE_TYPE_P (type
)
3604 && TREE_CODE (type
) != VECTOR_TYPE
3605 && TREE_CODE (type
) != COMPLEX_TYPE
)
3608 /* Any return value that is no larger than one word can be
3610 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
3613 /* Check any available co-processors to see if they accept the
3614 type as a register candidate (VFP, for example, can return
3615 some aggregates in consecutive registers). These aren't
3616 available if the call is variadic. */
3617 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
3620 /* Vector values should be returned using ARM registers, not
3621 memory (unless they're over 16 bytes, which will break since
3622 we only have four call-clobbered registers to play with). */
3623 if (TREE_CODE (type
) == VECTOR_TYPE
)
3624 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3626 /* The rest go in memory. */
3630 if (TREE_CODE (type
) == VECTOR_TYPE
)
3631 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3633 if (!AGGREGATE_TYPE_P (type
) &&
3634 (TREE_CODE (type
) != VECTOR_TYPE
))
3635 /* All simple types are returned in registers. */
3638 if (arm_abi
!= ARM_ABI_APCS
)
3640 /* ATPCS and later return aggregate types in memory only if they are
3641 larger than a word (or are variable size). */
3642 return (size
< 0 || size
> UNITS_PER_WORD
);
3645 /* For the arm-wince targets we choose to be compatible with Microsoft's
3646 ARM and Thumb compilers, which always return aggregates in memory. */
3648 /* All structures/unions bigger than one word are returned in memory.
3649 Also catch the case where int_size_in_bytes returns -1. In this case
3650 the aggregate is either huge or of variable size, and in either case
3651 we will want to return it via memory and not in a register. */
3652 if (size
< 0 || size
> UNITS_PER_WORD
)
3655 if (TREE_CODE (type
) == RECORD_TYPE
)
3659 /* For a struct the APCS says that we only return in a register
3660 if the type is 'integer like' and every addressable element
3661 has an offset of zero. For practical purposes this means
3662 that the structure can have at most one non bit-field element
3663 and that this element must be the first one in the structure. */
3665 /* Find the first field, ignoring non FIELD_DECL things which will
3666 have been created by C++. */
3667 for (field
= TYPE_FIELDS (type
);
3668 field
&& TREE_CODE (field
) != FIELD_DECL
;
3669 field
= DECL_CHAIN (field
))
3673 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3675 /* Check that the first field is valid for returning in a register. */
3677 /* ... Floats are not allowed */
3678 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3681 /* ... Aggregates that are not themselves valid for returning in
3682 a register are not allowed. */
3683 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3686 /* Now check the remaining fields, if any. Only bitfields are allowed,
3687 since they are not addressable. */
3688 for (field
= DECL_CHAIN (field
);
3690 field
= DECL_CHAIN (field
))
3692 if (TREE_CODE (field
) != FIELD_DECL
)
3695 if (!DECL_BIT_FIELD_TYPE (field
))
3702 if (TREE_CODE (type
) == UNION_TYPE
)
3706 /* Unions can be returned in registers if every element is
3707 integral, or can be returned in an integer register. */
3708 for (field
= TYPE_FIELDS (type
);
3710 field
= DECL_CHAIN (field
))
3712 if (TREE_CODE (field
) != FIELD_DECL
)
3715 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3718 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3724 #endif /* not ARM_WINCE */
3726 /* Return all other types in memory. */
3730 /* Indicate whether or not words of a double are in big-endian order. */
3733 arm_float_words_big_endian (void)
3735 if (TARGET_MAVERICK
)
3738 /* For FPA, float words are always big-endian. For VFP, floats words
3739 follow the memory system mode. */
3747 return (TARGET_BIG_END
? 1 : 0);
3752 const struct pcs_attribute_arg
3756 } pcs_attribute_args
[] =
3758 {"aapcs", ARM_PCS_AAPCS
},
3759 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
3761 /* We could recognize these, but changes would be needed elsewhere
3762 * to implement them. */
3763 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
3764 {"atpcs", ARM_PCS_ATPCS
},
3765 {"apcs", ARM_PCS_APCS
},
3767 {NULL
, ARM_PCS_UNKNOWN
}
3771 arm_pcs_from_attribute (tree attr
)
3773 const struct pcs_attribute_arg
*ptr
;
3776 /* Get the value of the argument. */
3777 if (TREE_VALUE (attr
) == NULL_TREE
3778 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
3779 return ARM_PCS_UNKNOWN
;
3781 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
3783 /* Check it against the list of known arguments. */
3784 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3785 if (streq (arg
, ptr
->arg
))
3788 /* An unrecognized interrupt type. */
3789 return ARM_PCS_UNKNOWN
;
3792 /* Get the PCS variant to use for this call. TYPE is the function's type
3793 specification, DECL is the specific declartion. DECL may be null if
3794 the call could be indirect or if this is a library call. */
3796 arm_get_pcs_model (const_tree type
, const_tree decl
)
3798 bool user_convention
= false;
3799 enum arm_pcs user_pcs
= arm_pcs_default
;
3804 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
3807 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
3808 user_convention
= true;
3811 if (TARGET_AAPCS_BASED
)
3813 /* Detect varargs functions. These always use the base rules
3814 (no argument is ever a candidate for a co-processor
3816 bool base_rules
= stdarg_p (type
);
3818 if (user_convention
)
3820 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
3821 sorry ("non-AAPCS derived PCS variant");
3822 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
3823 error ("variadic functions must use the base AAPCS variant");
3827 return ARM_PCS_AAPCS
;
3828 else if (user_convention
)
3830 else if (decl
&& flag_unit_at_a_time
)
3832 /* Local functions never leak outside this compilation unit,
3833 so we are free to use whatever conventions are
3835 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3836 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3838 return ARM_PCS_AAPCS_LOCAL
;
3841 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
3842 sorry ("PCS variant");
3844 /* For everything else we use the target's default. */
3845 return arm_pcs_default
;
3850 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
3851 const_tree fntype ATTRIBUTE_UNUSED
,
3852 rtx libcall ATTRIBUTE_UNUSED
,
3853 const_tree fndecl ATTRIBUTE_UNUSED
)
3855 /* Record the unallocated VFP registers. */
3856 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
3857 pcum
->aapcs_vfp_reg_alloc
= 0;
3860 /* Walk down the type tree of TYPE counting consecutive base elements.
3861 If *MODEP is VOIDmode, then set it to the first valid floating point
3862 type. If a non-floating point type is found, or if a floating point
3863 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3864 otherwise return the count in the sub-tree. */
3866 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
3868 enum machine_mode mode
;
3871 switch (TREE_CODE (type
))
3874 mode
= TYPE_MODE (type
);
3875 if (mode
!= DFmode
&& mode
!= SFmode
)
3878 if (*modep
== VOIDmode
)
3887 mode
= TYPE_MODE (TREE_TYPE (type
));
3888 if (mode
!= DFmode
&& mode
!= SFmode
)
3891 if (*modep
== VOIDmode
)
3900 /* Use V2SImode and V4SImode as representatives of all 64-bit
3901 and 128-bit vector types, whether or not those modes are
3902 supported with the present options. */
3903 size
= int_size_in_bytes (type
);
3916 if (*modep
== VOIDmode
)
3919 /* Vector modes are considered to be opaque: two vectors are
3920 equivalent for the purposes of being homogeneous aggregates
3921 if they are the same size. */
3930 tree index
= TYPE_DOMAIN (type
);
3932 /* Can't handle incomplete types. */
3933 if (!COMPLETE_TYPE_P(type
))
3936 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
3939 || !TYPE_MAX_VALUE (index
)
3940 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
3941 || !TYPE_MIN_VALUE (index
)
3942 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
3946 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
3947 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
3949 /* There must be no padding. */
3950 if (!host_integerp (TYPE_SIZE (type
), 1)
3951 || (tree_low_cst (TYPE_SIZE (type
), 1)
3952 != count
* GET_MODE_BITSIZE (*modep
)))
3964 /* Can't handle incomplete types. */
3965 if (!COMPLETE_TYPE_P(type
))
3968 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
3970 if (TREE_CODE (field
) != FIELD_DECL
)
3973 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
3979 /* There must be no padding. */
3980 if (!host_integerp (TYPE_SIZE (type
), 1)
3981 || (tree_low_cst (TYPE_SIZE (type
), 1)
3982 != count
* GET_MODE_BITSIZE (*modep
)))
3989 case QUAL_UNION_TYPE
:
3991 /* These aren't very interesting except in a degenerate case. */
3996 /* Can't handle incomplete types. */
3997 if (!COMPLETE_TYPE_P(type
))
4000 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
4002 if (TREE_CODE (field
) != FIELD_DECL
)
4005 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
4008 count
= count
> sub_count
? count
: sub_count
;
4011 /* There must be no padding. */
4012 if (!host_integerp (TYPE_SIZE (type
), 1)
4013 || (tree_low_cst (TYPE_SIZE (type
), 1)
4014 != count
* GET_MODE_BITSIZE (*modep
)))
4027 /* Return true if PCS_VARIANT should use VFP registers. */
4029 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
4031 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
4033 static bool seen_thumb1_vfp
= false;
4035 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
4037 sorry ("Thumb-1 hard-float VFP ABI");
4038 /* sorry() is not immediately fatal, so only display this once. */
4039 seen_thumb1_vfp
= true;
4045 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
4048 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
4049 (TARGET_VFP_DOUBLE
|| !is_double
));
4053 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
4054 enum machine_mode mode
, const_tree type
,
4055 enum machine_mode
*base_mode
, int *count
)
4057 enum machine_mode new_mode
= VOIDmode
;
4059 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
4060 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
4061 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
4066 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
4069 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
4071 else if (type
&& (mode
== BLKmode
|| TREE_CODE (type
) == VECTOR_TYPE
))
4073 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
4075 if (ag_count
> 0 && ag_count
<= 4)
4084 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
4087 *base_mode
= new_mode
;
4092 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
4093 enum machine_mode mode
, const_tree type
)
4095 int count ATTRIBUTE_UNUSED
;
4096 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
4098 if (!use_vfp_abi (pcs_variant
, false))
4100 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4105 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4108 if (!use_vfp_abi (pcum
->pcs_variant
, false))
4111 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
4112 &pcum
->aapcs_vfp_rmode
,
4113 &pcum
->aapcs_vfp_rcount
);
4117 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4118 const_tree type ATTRIBUTE_UNUSED
)
4120 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
4121 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
4124 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
4125 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
4127 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
4128 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4131 int rcount
= pcum
->aapcs_vfp_rcount
;
4133 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
4137 /* Avoid using unsupported vector modes. */
4138 if (rmode
== V2SImode
)
4140 else if (rmode
== V4SImode
)
4147 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
4148 for (i
= 0; i
< rcount
; i
++)
4150 rtx tmp
= gen_rtx_REG (rmode
,
4151 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
4152 tmp
= gen_rtx_EXPR_LIST
4154 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
4155 XVECEXP (par
, 0, i
) = tmp
;
4158 pcum
->aapcs_reg
= par
;
4161 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
4168 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
4169 enum machine_mode mode
,
4170 const_tree type ATTRIBUTE_UNUSED
)
4172 if (!use_vfp_abi (pcs_variant
, false))
4175 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4178 enum machine_mode ag_mode
;
4183 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4188 if (ag_mode
== V2SImode
)
4190 else if (ag_mode
== V4SImode
)
4196 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
4197 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
4198 for (i
= 0; i
< count
; i
++)
4200 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
4201 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
4202 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
4203 XVECEXP (par
, 0, i
) = tmp
;
4209 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
4213 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4214 enum machine_mode mode ATTRIBUTE_UNUSED
,
4215 const_tree type ATTRIBUTE_UNUSED
)
4217 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
4218 pcum
->aapcs_vfp_reg_alloc
= 0;
4222 #define AAPCS_CP(X) \
4224 aapcs_ ## X ## _cum_init, \
4225 aapcs_ ## X ## _is_call_candidate, \
4226 aapcs_ ## X ## _allocate, \
4227 aapcs_ ## X ## _is_return_candidate, \
4228 aapcs_ ## X ## _allocate_return_reg, \
4229 aapcs_ ## X ## _advance \
4232 /* Table of co-processors that can be used to pass arguments in
4233 registers. Idealy no arugment should be a candidate for more than
4234 one co-processor table entry, but the table is processed in order
4235 and stops after the first match. If that entry then fails to put
4236 the argument into a co-processor register, the argument will go on
4240 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4241 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
4243 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4244 BLKmode) is a candidate for this co-processor's registers; this
4245 function should ignore any position-dependent state in
4246 CUMULATIVE_ARGS and only use call-type dependent information. */
4247 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4249 /* Return true if the argument does get a co-processor register; it
4250 should set aapcs_reg to an RTX of the register allocated as is
4251 required for a return from FUNCTION_ARG. */
4252 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4254 /* Return true if a result of mode MODE (or type TYPE if MODE is
4255 BLKmode) is can be returned in this co-processor's registers. */
4256 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4258 /* Allocate and return an RTX element to hold the return type of a
4259 call, this routine must not fail and will only be called if
4260 is_return_candidate returned true with the same parameters. */
4261 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4263 /* Finish processing this argument and prepare to start processing
4265 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4266 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
4274 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4279 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4280 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
4287 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
4289 /* We aren't passed a decl, so we can't check that a call is local.
4290 However, it isn't clear that that would be a win anyway, since it
4291 might limit some tail-calling opportunities. */
4292 enum arm_pcs pcs_variant
;
4296 const_tree fndecl
= NULL_TREE
;
4298 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4301 fntype
= TREE_TYPE (fntype
);
4304 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4307 pcs_variant
= arm_pcs_default
;
4309 if (pcs_variant
!= ARM_PCS_AAPCS
)
4313 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4314 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
4323 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
4326 /* We aren't passed a decl, so we can't check that a call is local.
4327 However, it isn't clear that that would be a win anyway, since it
4328 might limit some tail-calling opportunities. */
4329 enum arm_pcs pcs_variant
;
4330 int unsignedp ATTRIBUTE_UNUSED
;
4334 const_tree fndecl
= NULL_TREE
;
4336 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4339 fntype
= TREE_TYPE (fntype
);
4342 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4345 pcs_variant
= arm_pcs_default
;
4347 /* Promote integer types. */
4348 if (type
&& INTEGRAL_TYPE_P (type
))
4349 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
4351 if (pcs_variant
!= ARM_PCS_AAPCS
)
4355 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4356 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
4358 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
4362 /* Promotes small structs returned in a register to full-word size
4363 for big-endian AAPCS. */
4364 if (type
&& arm_return_in_msb (type
))
4366 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4367 if (size
% UNITS_PER_WORD
!= 0)
4369 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4370 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4374 return gen_rtx_REG (mode
, R0_REGNUM
);
4378 aapcs_libcall_value (enum machine_mode mode
)
4380 if (BYTES_BIG_ENDIAN
&& ALL_FIXED_POINT_MODE_P (mode
)
4381 && GET_MODE_SIZE (mode
) <= 4)
4384 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
4387 /* Lay out a function argument using the AAPCS rules. The rule
4388 numbers referred to here are those in the AAPCS. */
4390 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4391 const_tree type
, bool named
)
4396 /* We only need to do this once per argument. */
4397 if (pcum
->aapcs_arg_processed
)
4400 pcum
->aapcs_arg_processed
= true;
4402 /* Special case: if named is false then we are handling an incoming
4403 anonymous argument which is on the stack. */
4407 /* Is this a potential co-processor register candidate? */
4408 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4410 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
4411 pcum
->aapcs_cprc_slot
= slot
;
4413 /* We don't have to apply any of the rules from part B of the
4414 preparation phase, these are handled elsewhere in the
4419 /* A Co-processor register candidate goes either in its own
4420 class of registers or on the stack. */
4421 if (!pcum
->aapcs_cprc_failed
[slot
])
4423 /* C1.cp - Try to allocate the argument to co-processor
4425 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
4428 /* C2.cp - Put the argument on the stack and note that we
4429 can't assign any more candidates in this slot. We also
4430 need to note that we have allocated stack space, so that
4431 we won't later try to split a non-cprc candidate between
4432 core registers and the stack. */
4433 pcum
->aapcs_cprc_failed
[slot
] = true;
4434 pcum
->can_split
= false;
4437 /* We didn't get a register, so this argument goes on the
4439 gcc_assert (pcum
->can_split
== false);
4444 /* C3 - For double-word aligned arguments, round the NCRN up to the
4445 next even number. */
4446 ncrn
= pcum
->aapcs_ncrn
;
4447 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
4450 nregs
= ARM_NUM_REGS2(mode
, type
);
4452 /* Sigh, this test should really assert that nregs > 0, but a GCC
4453 extension allows empty structs and then gives them empty size; it
4454 then allows such a structure to be passed by value. For some of
4455 the code below we have to pretend that such an argument has
4456 non-zero size so that we 'locate' it correctly either in
4457 registers or on the stack. */
4458 gcc_assert (nregs
>= 0);
4460 nregs2
= nregs
? nregs
: 1;
4462 /* C4 - Argument fits entirely in core registers. */
4463 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
4465 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4466 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
4470 /* C5 - Some core registers left and there are no arguments already
4471 on the stack: split this argument between the remaining core
4472 registers and the stack. */
4473 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
4475 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4476 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4477 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
4481 /* C6 - NCRN is set to 4. */
4482 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4484 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4488 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4489 for a call to a function whose data type is FNTYPE.
4490 For a library call, FNTYPE is NULL. */
4492 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
4494 tree fndecl ATTRIBUTE_UNUSED
)
4496 /* Long call handling. */
4498 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4500 pcum
->pcs_variant
= arm_pcs_default
;
4502 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4504 if (arm_libcall_uses_aapcs_base (libname
))
4505 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
4507 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
4508 pcum
->aapcs_reg
= NULL_RTX
;
4509 pcum
->aapcs_partial
= 0;
4510 pcum
->aapcs_arg_processed
= false;
4511 pcum
->aapcs_cprc_slot
= -1;
4512 pcum
->can_split
= true;
4514 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4518 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4520 pcum
->aapcs_cprc_failed
[i
] = false;
4521 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
4529 /* On the ARM, the offset starts at 0. */
4531 pcum
->iwmmxt_nregs
= 0;
4532 pcum
->can_split
= true;
4534 /* Varargs vectors are treated the same as long long.
4535 named_count avoids having to change the way arm handles 'named' */
4536 pcum
->named_count
= 0;
4539 if (TARGET_REALLY_IWMMXT
&& fntype
)
4543 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
4545 fn_arg
= TREE_CHAIN (fn_arg
))
4546 pcum
->named_count
+= 1;
4548 if (! pcum
->named_count
)
4549 pcum
->named_count
= INT_MAX
;
4554 /* Return true if mode/type need doubleword alignment. */
4556 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
4558 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
4559 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
4563 /* Determine where to put an argument to a function.
4564 Value is zero to push the argument on the stack,
4565 or a hard register in which to store the argument.
4567 MODE is the argument's machine mode.
4568 TYPE is the data type of the argument (as a tree).
4569 This is null for libcalls where that information may
4571 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4572 the preceding args and about the function being called.
4573 NAMED is nonzero if this argument is a named parameter
4574 (otherwise it is an extra parameter matching an ellipsis).
4576 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4577 other arguments are passed on the stack. If (NAMED == 0) (which happens
4578 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4579 defined), say it is passed in the stack (function_prologue will
4580 indeed make it pass in the stack if necessary). */
4583 arm_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
4584 const_tree type
, bool named
)
4586 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4589 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4590 a call insn (op3 of a call_value insn). */
4591 if (mode
== VOIDmode
)
4594 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4596 aapcs_layout_arg (pcum
, mode
, type
, named
);
4597 return pcum
->aapcs_reg
;
4600 /* Varargs vectors are treated the same as long long.
4601 named_count avoids having to change the way arm handles 'named' */
4602 if (TARGET_IWMMXT_ABI
4603 && arm_vector_mode_supported_p (mode
)
4604 && pcum
->named_count
> pcum
->nargs
+ 1)
4606 if (pcum
->iwmmxt_nregs
<= 9)
4607 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
4610 pcum
->can_split
= false;
4615 /* Put doubleword aligned quantities in even register pairs. */
4617 && ARM_DOUBLEWORD_ALIGN
4618 && arm_needs_doubleword_align (mode
, type
))
4621 /* Only allow splitting an arg between regs and memory if all preceding
4622 args were allocated to regs. For args passed by reference we only count
4623 the reference pointer. */
4624 if (pcum
->can_split
)
4627 nregs
= ARM_NUM_REGS2 (mode
, type
);
4629 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
4632 return gen_rtx_REG (mode
, pcum
->nregs
);
4636 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
4638 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
4639 ? DOUBLEWORD_ALIGNMENT
4644 arm_arg_partial_bytes (cumulative_args_t pcum_v
, enum machine_mode mode
,
4645 tree type
, bool named
)
4647 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4648 int nregs
= pcum
->nregs
;
4650 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4652 aapcs_layout_arg (pcum
, mode
, type
, named
);
4653 return pcum
->aapcs_partial
;
4656 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
4659 if (NUM_ARG_REGS
> nregs
4660 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
4662 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
4667 /* Update the data in PCUM to advance over an argument
4668 of mode MODE and data type TYPE.
4669 (TYPE is null for libcalls where that information may not be available.) */
4672 arm_function_arg_advance (cumulative_args_t pcum_v
, enum machine_mode mode
,
4673 const_tree type
, bool named
)
4675 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
4677 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4679 aapcs_layout_arg (pcum
, mode
, type
, named
);
4681 if (pcum
->aapcs_cprc_slot
>= 0)
4683 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
4685 pcum
->aapcs_cprc_slot
= -1;
4688 /* Generic stuff. */
4689 pcum
->aapcs_arg_processed
= false;
4690 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
4691 pcum
->aapcs_reg
= NULL_RTX
;
4692 pcum
->aapcs_partial
= 0;
4697 if (arm_vector_mode_supported_p (mode
)
4698 && pcum
->named_count
> pcum
->nargs
4699 && TARGET_IWMMXT_ABI
)
4700 pcum
->iwmmxt_nregs
+= 1;
4702 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
4706 /* Variable sized types are passed by reference. This is a GCC
4707 extension to the ARM ABI. */
4710 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
4711 enum machine_mode mode ATTRIBUTE_UNUSED
,
4712 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4714 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4717 /* Encode the current state of the #pragma [no_]long_calls. */
4720 OFF
, /* No #pragma [no_]long_calls is in effect. */
4721 LONG
, /* #pragma long_calls is in effect. */
4722 SHORT
/* #pragma no_long_calls is in effect. */
4725 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
4728 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4730 arm_pragma_long_calls
= LONG
;
4734 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4736 arm_pragma_long_calls
= SHORT
;
4740 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4742 arm_pragma_long_calls
= OFF
;
4745 /* Handle an attribute requiring a FUNCTION_DECL;
4746 arguments as in struct attribute_spec.handler. */
4748 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
4749 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4751 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4753 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4755 *no_add_attrs
= true;
4761 /* Handle an "interrupt" or "isr" attribute;
4762 arguments as in struct attribute_spec.handler. */
4764 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
4769 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4771 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4773 *no_add_attrs
= true;
4775 /* FIXME: the argument if any is checked for type attributes;
4776 should it be checked for decl ones? */
4780 if (TREE_CODE (*node
) == FUNCTION_TYPE
4781 || TREE_CODE (*node
) == METHOD_TYPE
)
4783 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
4785 warning (OPT_Wattributes
, "%qE attribute ignored",
4787 *no_add_attrs
= true;
4790 else if (TREE_CODE (*node
) == POINTER_TYPE
4791 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
4792 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
4793 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
4795 *node
= build_variant_type_copy (*node
);
4796 TREE_TYPE (*node
) = build_type_attribute_variant
4798 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
4799 *no_add_attrs
= true;
4803 /* Possibly pass this attribute on from the type to a decl. */
4804 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
4805 | (int) ATTR_FLAG_FUNCTION_NEXT
4806 | (int) ATTR_FLAG_ARRAY_NEXT
))
4808 *no_add_attrs
= true;
4809 return tree_cons (name
, args
, NULL_TREE
);
4813 warning (OPT_Wattributes
, "%qE attribute ignored",
4822 /* Handle a "pcs" attribute; arguments as in struct
4823 attribute_spec.handler. */
4825 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
4826 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4828 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
4830 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
4831 *no_add_attrs
= true;
4836 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4837 /* Handle the "notshared" attribute. This attribute is another way of
4838 requesting hidden visibility. ARM's compiler supports
4839 "__declspec(notshared)"; we support the same thing via an
4843 arm_handle_notshared_attribute (tree
*node
,
4844 tree name ATTRIBUTE_UNUSED
,
4845 tree args ATTRIBUTE_UNUSED
,
4846 int flags ATTRIBUTE_UNUSED
,
4849 tree decl
= TYPE_NAME (*node
);
4853 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
4854 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
4855 *no_add_attrs
= false;
4861 /* Return 0 if the attributes for two types are incompatible, 1 if they
4862 are compatible, and 2 if they are nearly compatible (which causes a
4863 warning to be generated). */
4865 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
4869 /* Check for mismatch of non-default calling convention. */
4870 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
4873 /* Check for mismatched call attributes. */
4874 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4875 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4876 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4877 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4879 /* Only bother to check if an attribute is defined. */
4880 if (l1
| l2
| s1
| s2
)
4882 /* If one type has an attribute, the other must have the same attribute. */
4883 if ((l1
!= l2
) || (s1
!= s2
))
4886 /* Disallow mixed attributes. */
4887 if ((l1
& s2
) || (l2
& s1
))
4891 /* Check for mismatched ISR attribute. */
4892 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
4894 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
4895 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
4897 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
4904 /* Assigns default attributes to newly defined type. This is used to
4905 set short_call/long_call attributes for function types of
4906 functions defined inside corresponding #pragma scopes. */
4908 arm_set_default_type_attributes (tree type
)
4910 /* Add __attribute__ ((long_call)) to all functions, when
4911 inside #pragma long_calls or __attribute__ ((short_call)),
4912 when inside #pragma no_long_calls. */
4913 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
4915 tree type_attr_list
, attr_name
;
4916 type_attr_list
= TYPE_ATTRIBUTES (type
);
4918 if (arm_pragma_long_calls
== LONG
)
4919 attr_name
= get_identifier ("long_call");
4920 else if (arm_pragma_long_calls
== SHORT
)
4921 attr_name
= get_identifier ("short_call");
4925 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
4926 TYPE_ATTRIBUTES (type
) = type_attr_list
;
4930 /* Return true if DECL is known to be linked into section SECTION. */
4933 arm_function_in_section_p (tree decl
, section
*section
)
4935 /* We can only be certain about functions defined in the same
4936 compilation unit. */
4937 if (!TREE_STATIC (decl
))
4940 /* Make sure that SYMBOL always binds to the definition in this
4941 compilation unit. */
4942 if (!targetm
.binds_local_p (decl
))
4945 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4946 if (!DECL_SECTION_NAME (decl
))
4948 /* Make sure that we will not create a unique section for DECL. */
4949 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
4953 return function_section (decl
) == section
;
4956 /* Return nonzero if a 32-bit "long_call" should be generated for
4957 a call from the current function to DECL. We generate a long_call
4960 a. has an __attribute__((long call))
4961 or b. is within the scope of a #pragma long_calls
4962 or c. the -mlong-calls command line switch has been specified
4964 However we do not generate a long call if the function:
4966 d. has an __attribute__ ((short_call))
4967 or e. is inside the scope of a #pragma no_long_calls
4968 or f. is defined in the same section as the current function. */
4971 arm_is_long_call_p (tree decl
)
4976 return TARGET_LONG_CALLS
;
4978 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
4979 if (lookup_attribute ("short_call", attrs
))
4982 /* For "f", be conservative, and only cater for cases in which the
4983 whole of the current function is placed in the same section. */
4984 if (!flag_reorder_blocks_and_partition
4985 && TREE_CODE (decl
) == FUNCTION_DECL
4986 && arm_function_in_section_p (decl
, current_function_section ()))
4989 if (lookup_attribute ("long_call", attrs
))
4992 return TARGET_LONG_CALLS
;
4995 /* Return nonzero if it is ok to make a tail-call to DECL. */
4997 arm_function_ok_for_sibcall (tree decl
, tree exp
)
4999 unsigned long func_type
;
5001 if (cfun
->machine
->sibcall_blocked
)
5004 /* Never tailcall something for which we have no decl, or if we
5005 are generating code for Thumb-1. */
5006 if (decl
== NULL
|| TARGET_THUMB1
)
5009 /* The PIC register is live on entry to VxWorks PLT entries, so we
5010 must make the call before restoring the PIC register. */
5011 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
5014 /* Cannot tail-call to long calls, since these are out of range of
5015 a branch instruction. */
5016 if (arm_is_long_call_p (decl
))
5019 /* If we are interworking and the function is not declared static
5020 then we can't tail-call it unless we know that it exists in this
5021 compilation unit (since it might be a Thumb routine). */
5022 if (TARGET_INTERWORK
&& TREE_PUBLIC (decl
) && !TREE_ASM_WRITTEN (decl
))
5025 func_type
= arm_current_func_type ();
5026 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5027 if (IS_INTERRUPT (func_type
))
5030 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5032 /* Check that the return value locations are the same. For
5033 example that we aren't returning a value from the sibling in
5034 a VFP register but then need to transfer it to a core
5038 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
5039 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5041 if (!rtx_equal_p (a
, b
))
5045 /* Never tailcall if function may be called with a misaligned SP. */
5046 if (IS_STACKALIGN (func_type
))
5049 /* Everything else is ok. */
5054 /* Addressing mode support functions. */
5056 /* Return nonzero if X is a legitimate immediate operand when compiling
5057 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5059 legitimate_pic_operand_p (rtx x
)
5061 if (GET_CODE (x
) == SYMBOL_REF
5062 || (GET_CODE (x
) == CONST
5063 && GET_CODE (XEXP (x
, 0)) == PLUS
5064 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5070 /* Record that the current function needs a PIC register. Initialize
5071 cfun->machine->pic_reg if we have not already done so. */
5074 require_pic_register (void)
5076 /* A lot of the logic here is made obscure by the fact that this
5077 routine gets called as part of the rtx cost estimation process.
5078 We don't want those calls to affect any assumptions about the real
5079 function; and further, we can't call entry_of_function() until we
5080 start the real expansion process. */
5081 if (!crtl
->uses_pic_offset_table
)
5083 gcc_assert (can_create_pseudo_p ());
5084 if (arm_pic_register
!= INVALID_REGNUM
)
5086 if (!cfun
->machine
->pic_reg
)
5087 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
5089 /* Play games to avoid marking the function as needing pic
5090 if we are being called as part of the cost-estimation
5092 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5093 crtl
->uses_pic_offset_table
= 1;
5099 if (!cfun
->machine
->pic_reg
)
5100 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
5102 /* Play games to avoid marking the function as needing pic
5103 if we are being called as part of the cost-estimation
5105 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5107 crtl
->uses_pic_offset_table
= 1;
5110 arm_load_pic_register (0UL);
5115 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
5117 INSN_LOCATOR (insn
) = prologue_locator
;
5119 /* We can be called during expansion of PHI nodes, where
5120 we can't yet emit instructions directly in the final
5121 insn stream. Queue the insns on the entry edge, they will
5122 be committed after everything else is expanded. */
5123 insert_insn_on_edge (seq
, single_succ_edge (ENTRY_BLOCK_PTR
));
5130 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
5132 if (GET_CODE (orig
) == SYMBOL_REF
5133 || GET_CODE (orig
) == LABEL_REF
)
5139 gcc_assert (can_create_pseudo_p ());
5140 reg
= gen_reg_rtx (Pmode
);
5143 /* VxWorks does not impose a fixed gap between segments; the run-time
5144 gap can be different from the object-file gap. We therefore can't
5145 use GOTOFF unless we are absolutely sure that the symbol is in the
5146 same segment as the GOT. Unfortunately, the flexibility of linker
5147 scripts means that we can't be sure of that in general, so assume
5148 that GOTOFF is never valid on VxWorks. */
5149 if ((GET_CODE (orig
) == LABEL_REF
5150 || (GET_CODE (orig
) == SYMBOL_REF
&&
5151 SYMBOL_REF_LOCAL_P (orig
)))
5153 && !TARGET_VXWORKS_RTP
)
5154 insn
= arm_pic_static_addr (orig
, reg
);
5160 /* If this function doesn't have a pic register, create one now. */
5161 require_pic_register ();
5163 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
5165 /* Make the MEM as close to a constant as possible. */
5166 mem
= SET_SRC (pat
);
5167 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
5168 MEM_READONLY_P (mem
) = 1;
5169 MEM_NOTRAP_P (mem
) = 1;
5171 insn
= emit_insn (pat
);
5174 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5176 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
5180 else if (GET_CODE (orig
) == CONST
)
5184 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5185 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
5188 /* Handle the case where we have: const (UNSPEC_TLS). */
5189 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
5190 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
5193 /* Handle the case where we have:
5194 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5196 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5197 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
5198 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
5200 gcc_assert (GET_CODE (XEXP (XEXP (orig
, 0), 1)) == CONST_INT
);
5206 gcc_assert (can_create_pseudo_p ());
5207 reg
= gen_reg_rtx (Pmode
);
5210 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
5212 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
5213 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
5214 base
== reg
? 0 : reg
);
5216 if (GET_CODE (offset
) == CONST_INT
)
5218 /* The base register doesn't really matter, we only want to
5219 test the index for the appropriate mode. */
5220 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
5222 gcc_assert (can_create_pseudo_p ());
5223 offset
= force_reg (Pmode
, offset
);
5226 if (GET_CODE (offset
) == CONST_INT
)
5227 return plus_constant (base
, INTVAL (offset
));
5230 if (GET_MODE_SIZE (mode
) > 4
5231 && (GET_MODE_CLASS (mode
) == MODE_INT
5232 || TARGET_SOFT_FLOAT
))
5234 emit_insn (gen_addsi3 (reg
, base
, offset
));
5238 return gen_rtx_PLUS (Pmode
, base
, offset
);
5245 /* Find a spare register to use during the prolog of a function. */
5248 thumb_find_work_register (unsigned long pushed_regs_mask
)
5252 /* Check the argument registers first as these are call-used. The
5253 register allocation order means that sometimes r3 might be used
5254 but earlier argument registers might not, so check them all. */
5255 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
5256 if (!df_regs_ever_live_p (reg
))
5259 /* Before going on to check the call-saved registers we can try a couple
5260 more ways of deducing that r3 is available. The first is when we are
5261 pushing anonymous arguments onto the stack and we have less than 4
5262 registers worth of fixed arguments(*). In this case r3 will be part of
5263 the variable argument list and so we can be sure that it will be
5264 pushed right at the start of the function. Hence it will be available
5265 for the rest of the prologue.
5266 (*): ie crtl->args.pretend_args_size is greater than 0. */
5267 if (cfun
->machine
->uses_anonymous_args
5268 && crtl
->args
.pretend_args_size
> 0)
5269 return LAST_ARG_REGNUM
;
5271 /* The other case is when we have fixed arguments but less than 4 registers
5272 worth. In this case r3 might be used in the body of the function, but
5273 it is not being used to convey an argument into the function. In theory
5274 we could just check crtl->args.size to see how many bytes are
5275 being passed in argument registers, but it seems that it is unreliable.
5276 Sometimes it will have the value 0 when in fact arguments are being
5277 passed. (See testcase execute/20021111-1.c for an example). So we also
5278 check the args_info.nregs field as well. The problem with this field is
5279 that it makes no allowances for arguments that are passed to the
5280 function but which are not used. Hence we could miss an opportunity
5281 when a function has an unused argument in r3. But it is better to be
5282 safe than to be sorry. */
5283 if (! cfun
->machine
->uses_anonymous_args
5284 && crtl
->args
.size
>= 0
5285 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
5286 && crtl
->args
.info
.nregs
< 4)
5287 return LAST_ARG_REGNUM
;
5289 /* Otherwise look for a call-saved register that is going to be pushed. */
5290 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
5291 if (pushed_regs_mask
& (1 << reg
))
5296 /* Thumb-2 can use high regs. */
5297 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
5298 if (pushed_regs_mask
& (1 << reg
))
5301 /* Something went wrong - thumb_compute_save_reg_mask()
5302 should have arranged for a suitable register to be pushed. */
5306 static GTY(()) int pic_labelno
;
5308 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5312 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
5314 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
5316 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
5319 gcc_assert (flag_pic
);
5321 pic_reg
= cfun
->machine
->pic_reg
;
5322 if (TARGET_VXWORKS_RTP
)
5324 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
5325 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5326 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5328 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
5330 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5331 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
5335 /* We use an UNSPEC rather than a LABEL_REF because this label
5336 never appears in the code stream. */
5338 labelno
= GEN_INT (pic_labelno
++);
5339 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5340 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5342 /* On the ARM the PC register contains 'dot + 8' at the time of the
5343 addition, on the Thumb it is 'dot + 4'. */
5344 pic_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5345 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
5347 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5351 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5353 emit_insn (gen_pic_add_dot_plus_eight (pic_reg
, pic_reg
, labelno
));
5355 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5357 else /* TARGET_THUMB1 */
5359 if (arm_pic_register
!= INVALID_REGNUM
5360 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
5362 /* We will have pushed the pic register, so we should always be
5363 able to find a work register. */
5364 pic_tmp
= gen_rtx_REG (SImode
,
5365 thumb_find_work_register (saved_regs
));
5366 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
5367 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
5370 emit_insn (gen_pic_load_addr_thumb1 (pic_reg
, pic_rtx
));
5371 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5375 /* Need to emit this whether or not we obey regdecls,
5376 since setjmp/longjmp can cause life info to screw up. */
5380 /* Generate code to load the address of a static var when flag_pic is set. */
5382 arm_pic_static_addr (rtx orig
, rtx reg
)
5384 rtx l1
, labelno
, offset_rtx
, insn
;
5386 gcc_assert (flag_pic
);
5388 /* We use an UNSPEC rather than a LABEL_REF because this label
5389 never appears in the code stream. */
5390 labelno
= GEN_INT (pic_labelno
++);
5391 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5392 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5394 /* On the ARM the PC register contains 'dot + 8' at the time of the
5395 addition, on the Thumb it is 'dot + 4'. */
5396 offset_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5397 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
5398 UNSPEC_SYMBOL_OFFSET
);
5399 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
5403 emit_insn (gen_pic_load_addr_32bit (reg
, offset_rtx
));
5405 insn
= emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5407 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5409 else /* TARGET_THUMB1 */
5411 emit_insn (gen_pic_load_addr_thumb1 (reg
, offset_rtx
));
5412 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5418 /* Return nonzero if X is valid as an ARM state addressing register. */
5420 arm_address_register_rtx_p (rtx x
, int strict_p
)
5424 if (GET_CODE (x
) != REG
)
5430 return ARM_REGNO_OK_FOR_BASE_P (regno
);
5432 return (regno
<= LAST_ARM_REGNUM
5433 || regno
>= FIRST_PSEUDO_REGISTER
5434 || regno
== FRAME_POINTER_REGNUM
5435 || regno
== ARG_POINTER_REGNUM
);
5438 /* Return TRUE if this rtx is the difference of a symbol and a label,
5439 and will reduce to a PC-relative relocation in the object file.
5440 Expressions like this can be left alone when generating PIC, rather
5441 than forced through the GOT. */
5443 pcrel_constant_p (rtx x
)
5445 if (GET_CODE (x
) == MINUS
)
5446 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
5451 /* Return true if X will surely end up in an index register after next
5454 will_be_in_index_register (const_rtx x
)
5456 /* arm.md: calculate_pic_address will split this into a register. */
5457 return GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_SYM
;
5460 /* Return nonzero if X is a valid ARM state address operand. */
5462 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
5466 enum rtx_code code
= GET_CODE (x
);
5468 if (arm_address_register_rtx_p (x
, strict_p
))
5471 use_ldrd
= (TARGET_LDRD
5473 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5475 if (code
== POST_INC
|| code
== PRE_DEC
5476 || ((code
== PRE_INC
|| code
== POST_DEC
)
5477 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5478 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5480 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5481 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5482 && GET_CODE (XEXP (x
, 1)) == PLUS
5483 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5485 rtx addend
= XEXP (XEXP (x
, 1), 1);
5487 /* Don't allow ldrd post increment by register because it's hard
5488 to fixup invalid register choices. */
5490 && GET_CODE (x
) == POST_MODIFY
5491 && GET_CODE (addend
) == REG
)
5494 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
5495 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
5498 /* After reload constants split into minipools will have addresses
5499 from a LABEL_REF. */
5500 else if (reload_completed
5501 && (code
== LABEL_REF
5503 && GET_CODE (XEXP (x
, 0)) == PLUS
5504 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5505 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5508 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5511 else if (code
== PLUS
)
5513 rtx xop0
= XEXP (x
, 0);
5514 rtx xop1
= XEXP (x
, 1);
5516 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5517 && ((GET_CODE(xop1
) == CONST_INT
5518 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
5519 || (!strict_p
&& will_be_in_index_register (xop1
))))
5520 || (arm_address_register_rtx_p (xop1
, strict_p
)
5521 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
5525 /* Reload currently can't handle MINUS, so disable this for now */
5526 else if (GET_CODE (x
) == MINUS
)
5528 rtx xop0
= XEXP (x
, 0);
5529 rtx xop1
= XEXP (x
, 1);
5531 return (arm_address_register_rtx_p (xop0
, strict_p
)
5532 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
5536 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5537 && code
== SYMBOL_REF
5538 && CONSTANT_POOL_ADDRESS_P (x
)
5540 && symbol_mentioned_p (get_pool_constant (x
))
5541 && ! pcrel_constant_p (get_pool_constant (x
))))
5547 /* Return nonzero if X is a valid Thumb-2 address operand. */
5549 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5552 enum rtx_code code
= GET_CODE (x
);
5554 if (arm_address_register_rtx_p (x
, strict_p
))
5557 use_ldrd
= (TARGET_LDRD
5559 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5561 if (code
== POST_INC
|| code
== PRE_DEC
5562 || ((code
== PRE_INC
|| code
== POST_DEC
)
5563 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5564 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5566 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5567 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5568 && GET_CODE (XEXP (x
, 1)) == PLUS
5569 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5571 /* Thumb-2 only has autoincrement by constant. */
5572 rtx addend
= XEXP (XEXP (x
, 1), 1);
5573 HOST_WIDE_INT offset
;
5575 if (GET_CODE (addend
) != CONST_INT
)
5578 offset
= INTVAL(addend
);
5579 if (GET_MODE_SIZE (mode
) <= 4)
5580 return (offset
> -256 && offset
< 256);
5582 return (use_ldrd
&& offset
> -1024 && offset
< 1024
5583 && (offset
& 3) == 0);
5586 /* After reload constants split into minipools will have addresses
5587 from a LABEL_REF. */
5588 else if (reload_completed
5589 && (code
== LABEL_REF
5591 && GET_CODE (XEXP (x
, 0)) == PLUS
5592 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5593 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5596 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5599 else if (code
== PLUS
)
5601 rtx xop0
= XEXP (x
, 0);
5602 rtx xop1
= XEXP (x
, 1);
5604 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5605 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
5606 || (!strict_p
&& will_be_in_index_register (xop1
))))
5607 || (arm_address_register_rtx_p (xop1
, strict_p
)
5608 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
5611 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5612 && code
== SYMBOL_REF
5613 && CONSTANT_POOL_ADDRESS_P (x
)
5615 && symbol_mentioned_p (get_pool_constant (x
))
5616 && ! pcrel_constant_p (get_pool_constant (x
))))
5622 /* Return nonzero if INDEX is valid for an address index operand in
5625 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
5628 HOST_WIDE_INT range
;
5629 enum rtx_code code
= GET_CODE (index
);
5631 /* Standard coprocessor addressing modes. */
5632 if (TARGET_HARD_FLOAT
5633 && (TARGET_VFP
|| TARGET_FPA
|| TARGET_MAVERICK
)
5634 && (mode
== SFmode
|| mode
== DFmode
5635 || (TARGET_MAVERICK
&& mode
== DImode
)))
5636 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5637 && INTVAL (index
) > -1024
5638 && (INTVAL (index
) & 3) == 0);
5640 /* For quad modes, we restrict the constant offset to be slightly less
5641 than what the instruction format permits. We do this because for
5642 quad mode moves, we will actually decompose them into two separate
5643 double-mode reads or writes. INDEX must therefore be a valid
5644 (double-mode) offset and so should INDEX+8. */
5645 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
5646 return (code
== CONST_INT
5647 && INTVAL (index
) < 1016
5648 && INTVAL (index
) > -1024
5649 && (INTVAL (index
) & 3) == 0);
5651 /* We have no such constraint on double mode offsets, so we permit the
5652 full range of the instruction format. */
5653 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
5654 return (code
== CONST_INT
5655 && INTVAL (index
) < 1024
5656 && INTVAL (index
) > -1024
5657 && (INTVAL (index
) & 3) == 0);
5659 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5660 return (code
== CONST_INT
5661 && INTVAL (index
) < 1024
5662 && INTVAL (index
) > -1024
5663 && (INTVAL (index
) & 3) == 0);
5665 if (arm_address_register_rtx_p (index
, strict_p
)
5666 && (GET_MODE_SIZE (mode
) <= 4))
5669 if (mode
== DImode
|| mode
== DFmode
)
5671 if (code
== CONST_INT
)
5673 HOST_WIDE_INT val
= INTVAL (index
);
5676 return val
> -256 && val
< 256;
5678 return val
> -4096 && val
< 4092;
5681 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
5684 if (GET_MODE_SIZE (mode
) <= 4
5688 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
5692 rtx xiop0
= XEXP (index
, 0);
5693 rtx xiop1
= XEXP (index
, 1);
5695 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5696 && power_of_two_operand (xiop1
, SImode
))
5697 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5698 && power_of_two_operand (xiop0
, SImode
)));
5700 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
5701 || code
== ASHIFT
|| code
== ROTATERT
)
5703 rtx op
= XEXP (index
, 1);
5705 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5706 && GET_CODE (op
) == CONST_INT
5708 && INTVAL (op
) <= 31);
5712 /* For ARM v4 we may be doing a sign-extend operation during the
5718 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
5724 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
5726 return (code
== CONST_INT
5727 && INTVAL (index
) < range
5728 && INTVAL (index
) > -range
);
5731 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5732 index operand. i.e. 1, 2, 4 or 8. */
5734 thumb2_index_mul_operand (rtx op
)
5738 if (GET_CODE(op
) != CONST_INT
)
5742 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
5745 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5747 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
5749 enum rtx_code code
= GET_CODE (index
);
5751 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5752 /* Standard coprocessor addressing modes. */
5753 if (TARGET_HARD_FLOAT
5754 && (TARGET_VFP
|| TARGET_FPA
|| TARGET_MAVERICK
)
5755 && (mode
== SFmode
|| mode
== DFmode
5756 || (TARGET_MAVERICK
&& mode
== DImode
)))
5757 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5758 /* Thumb-2 allows only > -256 index range for it's core register
5759 load/stores. Since we allow SF/DF in core registers, we have
5760 to use the intersection between -256~4096 (core) and -1024~1024
5762 && INTVAL (index
) > -256
5763 && (INTVAL (index
) & 3) == 0);
5765 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5767 /* For DImode assume values will usually live in core regs
5768 and only allow LDRD addressing modes. */
5769 if (!TARGET_LDRD
|| mode
!= DImode
)
5770 return (code
== CONST_INT
5771 && INTVAL (index
) < 1024
5772 && INTVAL (index
) > -1024
5773 && (INTVAL (index
) & 3) == 0);
5776 /* For quad modes, we restrict the constant offset to be slightly less
5777 than what the instruction format permits. We do this because for
5778 quad mode moves, we will actually decompose them into two separate
5779 double-mode reads or writes. INDEX must therefore be a valid
5780 (double-mode) offset and so should INDEX+8. */
5781 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
5782 return (code
== CONST_INT
5783 && INTVAL (index
) < 1016
5784 && INTVAL (index
) > -1024
5785 && (INTVAL (index
) & 3) == 0);
5787 /* We have no such constraint on double mode offsets, so we permit the
5788 full range of the instruction format. */
5789 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
5790 return (code
== CONST_INT
5791 && INTVAL (index
) < 1024
5792 && INTVAL (index
) > -1024
5793 && (INTVAL (index
) & 3) == 0);
5795 if (arm_address_register_rtx_p (index
, strict_p
)
5796 && (GET_MODE_SIZE (mode
) <= 4))
5799 if (mode
== DImode
|| mode
== DFmode
)
5801 if (code
== CONST_INT
)
5803 HOST_WIDE_INT val
= INTVAL (index
);
5804 /* ??? Can we assume ldrd for thumb2? */
5805 /* Thumb-2 ldrd only has reg+const addressing modes. */
5806 /* ldrd supports offsets of +-1020.
5807 However the ldr fallback does not. */
5808 return val
> -256 && val
< 256 && (val
& 3) == 0;
5816 rtx xiop0
= XEXP (index
, 0);
5817 rtx xiop1
= XEXP (index
, 1);
5819 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5820 && thumb2_index_mul_operand (xiop1
))
5821 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5822 && thumb2_index_mul_operand (xiop0
)));
5824 else if (code
== ASHIFT
)
5826 rtx op
= XEXP (index
, 1);
5828 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5829 && GET_CODE (op
) == CONST_INT
5831 && INTVAL (op
) <= 3);
5834 return (code
== CONST_INT
5835 && INTVAL (index
) < 4096
5836 && INTVAL (index
) > -256);
5839 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5841 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
5845 if (GET_CODE (x
) != REG
)
5851 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
5853 return (regno
<= LAST_LO_REGNUM
5854 || regno
> LAST_VIRTUAL_REGISTER
5855 || regno
== FRAME_POINTER_REGNUM
5856 || (GET_MODE_SIZE (mode
) >= 4
5857 && (regno
== STACK_POINTER_REGNUM
5858 || regno
>= FIRST_PSEUDO_REGISTER
5859 || x
== hard_frame_pointer_rtx
5860 || x
== arg_pointer_rtx
)));
5863 /* Return nonzero if x is a legitimate index register. This is the case
5864 for any base register that can access a QImode object. */
5866 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
5868 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
5871 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5873 The AP may be eliminated to either the SP or the FP, so we use the
5874 least common denominator, e.g. SImode, and offsets from 0 to 64.
5876 ??? Verify whether the above is the right approach.
5878 ??? Also, the FP may be eliminated to the SP, so perhaps that
5879 needs special handling also.
5881 ??? Look at how the mips16 port solves this problem. It probably uses
5882 better ways to solve some of these problems.
5884 Although it is not incorrect, we don't accept QImode and HImode
5885 addresses based on the frame pointer or arg pointer until the
5886 reload pass starts. This is so that eliminating such addresses
5887 into stack based ones won't produce impossible code. */
5889 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5891 /* ??? Not clear if this is right. Experiment. */
5892 if (GET_MODE_SIZE (mode
) < 4
5893 && !(reload_in_progress
|| reload_completed
)
5894 && (reg_mentioned_p (frame_pointer_rtx
, x
)
5895 || reg_mentioned_p (arg_pointer_rtx
, x
)
5896 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
5897 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
5898 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
5899 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
5902 /* Accept any base register. SP only in SImode or larger. */
5903 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
5906 /* This is PC relative data before arm_reorg runs. */
5907 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
5908 && GET_CODE (x
) == SYMBOL_REF
5909 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
5912 /* This is PC relative data after arm_reorg runs. */
5913 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
5915 && (GET_CODE (x
) == LABEL_REF
5916 || (GET_CODE (x
) == CONST
5917 && GET_CODE (XEXP (x
, 0)) == PLUS
5918 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5919 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5922 /* Post-inc indexing only supported for SImode and larger. */
5923 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
5924 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
5927 else if (GET_CODE (x
) == PLUS
)
5929 /* REG+REG address can be any two index registers. */
5930 /* We disallow FRAME+REG addressing since we know that FRAME
5931 will be replaced with STACK, and SP relative addressing only
5932 permits SP+OFFSET. */
5933 if (GET_MODE_SIZE (mode
) <= 4
5934 && XEXP (x
, 0) != frame_pointer_rtx
5935 && XEXP (x
, 1) != frame_pointer_rtx
5936 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5937 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
5938 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
5941 /* REG+const has 5-7 bit offset for non-SP registers. */
5942 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5943 || XEXP (x
, 0) == arg_pointer_rtx
)
5944 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5945 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
5948 /* REG+const has 10-bit offset for SP, but only SImode and
5949 larger is supported. */
5950 /* ??? Should probably check for DI/DFmode overflow here
5951 just like GO_IF_LEGITIMATE_OFFSET does. */
5952 else if (GET_CODE (XEXP (x
, 0)) == REG
5953 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
5954 && GET_MODE_SIZE (mode
) >= 4
5955 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5956 && INTVAL (XEXP (x
, 1)) >= 0
5957 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
5958 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5961 else if (GET_CODE (XEXP (x
, 0)) == REG
5962 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
5963 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
5964 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
5965 && REGNO (XEXP (x
, 0))
5966 <= LAST_VIRTUAL_POINTER_REGISTER
))
5967 && GET_MODE_SIZE (mode
) >= 4
5968 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5969 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5973 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5974 && GET_MODE_SIZE (mode
) == 4
5975 && GET_CODE (x
) == SYMBOL_REF
5976 && CONSTANT_POOL_ADDRESS_P (x
)
5978 && symbol_mentioned_p (get_pool_constant (x
))
5979 && ! pcrel_constant_p (get_pool_constant (x
))))
5985 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5986 instruction of mode MODE. */
5988 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
5990 switch (GET_MODE_SIZE (mode
))
5993 return val
>= 0 && val
< 32;
5996 return val
>= 0 && val
< 64 && (val
& 1) == 0;
6000 && (val
+ GET_MODE_SIZE (mode
)) <= 128
6006 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
6009 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
6010 else if (TARGET_THUMB2
)
6011 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
6012 else /* if (TARGET_THUMB1) */
6013 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
6016 /* Build the SYMBOL_REF for __tls_get_addr. */
6018 static GTY(()) rtx tls_get_addr_libfunc
;
6021 get_tls_get_addr (void)
6023 if (!tls_get_addr_libfunc
)
6024 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
6025 return tls_get_addr_libfunc
;
6029 arm_load_tp (rtx target
)
6032 target
= gen_reg_rtx (SImode
);
6036 /* Can return in any reg. */
6037 emit_insn (gen_load_tp_hard (target
));
6041 /* Always returned in r0. Immediately copy the result into a pseudo,
6042 otherwise other uses of r0 (e.g. setting up function arguments) may
6043 clobber the value. */
6047 emit_insn (gen_load_tp_soft ());
6049 tmp
= gen_rtx_REG (SImode
, 0);
6050 emit_move_insn (target
, tmp
);
6056 load_tls_operand (rtx x
, rtx reg
)
6060 if (reg
== NULL_RTX
)
6061 reg
= gen_reg_rtx (SImode
);
6063 tmp
= gen_rtx_CONST (SImode
, x
);
6065 emit_move_insn (reg
, tmp
);
6071 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
6073 rtx insns
, label
, labelno
, sum
;
6075 gcc_assert (reloc
!= TLS_DESCSEQ
);
6078 labelno
= GEN_INT (pic_labelno
++);
6079 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6080 label
= gen_rtx_CONST (VOIDmode
, label
);
6082 sum
= gen_rtx_UNSPEC (Pmode
,
6083 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
6084 GEN_INT (TARGET_ARM
? 8 : 4)),
6086 reg
= load_tls_operand (sum
, reg
);
6089 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
6091 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6093 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
,
6094 LCT_PURE
, /* LCT_CONST? */
6095 Pmode
, 1, reg
, Pmode
);
6097 insns
= get_insns ();
6104 arm_tls_descseq_addr (rtx x
, rtx reg
)
6106 rtx labelno
= GEN_INT (pic_labelno
++);
6107 rtx label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6108 rtx sum
= gen_rtx_UNSPEC (Pmode
,
6109 gen_rtvec (4, x
, GEN_INT (TLS_DESCSEQ
),
6110 gen_rtx_CONST (VOIDmode
, label
),
6111 GEN_INT (!TARGET_ARM
)),
6113 rtx reg0
= load_tls_operand (sum
, gen_rtx_REG (SImode
, 0));
6115 emit_insn (gen_tlscall (x
, labelno
));
6117 reg
= gen_reg_rtx (SImode
);
6119 gcc_assert (REGNO (reg
) != 0);
6121 emit_move_insn (reg
, reg0
);
6127 legitimize_tls_address (rtx x
, rtx reg
)
6129 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
6130 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
6134 case TLS_MODEL_GLOBAL_DYNAMIC
:
6135 if (TARGET_GNU2_TLS
)
6137 reg
= arm_tls_descseq_addr (x
, reg
);
6139 tp
= arm_load_tp (NULL_RTX
);
6141 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
6145 /* Original scheme */
6146 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
6147 dest
= gen_reg_rtx (Pmode
);
6148 emit_libcall_block (insns
, dest
, ret
, x
);
6152 case TLS_MODEL_LOCAL_DYNAMIC
:
6153 if (TARGET_GNU2_TLS
)
6155 reg
= arm_tls_descseq_addr (x
, reg
);
6157 tp
= arm_load_tp (NULL_RTX
);
6159 dest
= gen_rtx_PLUS (Pmode
, tp
, reg
);
6163 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
6165 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6166 share the LDM result with other LD model accesses. */
6167 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
6169 dest
= gen_reg_rtx (Pmode
);
6170 emit_libcall_block (insns
, dest
, ret
, eqv
);
6172 /* Load the addend. */
6173 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
,
6174 GEN_INT (TLS_LDO32
)),
6176 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
6177 dest
= gen_rtx_PLUS (Pmode
, dest
, addend
);
6181 case TLS_MODEL_INITIAL_EXEC
:
6182 labelno
= GEN_INT (pic_labelno
++);
6183 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6184 label
= gen_rtx_CONST (VOIDmode
, label
);
6185 sum
= gen_rtx_UNSPEC (Pmode
,
6186 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
6187 GEN_INT (TARGET_ARM
? 8 : 4)),
6189 reg
= load_tls_operand (sum
, reg
);
6192 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
6193 else if (TARGET_THUMB2
)
6194 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
6197 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6198 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
6201 tp
= arm_load_tp (NULL_RTX
);
6203 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6205 case TLS_MODEL_LOCAL_EXEC
:
6206 tp
= arm_load_tp (NULL_RTX
);
6208 reg
= gen_rtx_UNSPEC (Pmode
,
6209 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
6211 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
6213 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6220 /* Try machine-dependent ways of modifying an illegitimate address
6221 to be legitimate. If we find one, return the new, valid address. */
6223 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6227 /* TODO: legitimize_address for Thumb2. */
6230 return thumb_legitimize_address (x
, orig_x
, mode
);
6233 if (arm_tls_symbol_p (x
))
6234 return legitimize_tls_address (x
, NULL_RTX
);
6236 if (GET_CODE (x
) == PLUS
)
6238 rtx xop0
= XEXP (x
, 0);
6239 rtx xop1
= XEXP (x
, 1);
6241 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
6242 xop0
= force_reg (SImode
, xop0
);
6244 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
6245 xop1
= force_reg (SImode
, xop1
);
6247 if (ARM_BASE_REGISTER_RTX_P (xop0
)
6248 && GET_CODE (xop1
) == CONST_INT
)
6250 HOST_WIDE_INT n
, low_n
;
6254 /* VFP addressing modes actually allow greater offsets, but for
6255 now we just stick with the lowest common denominator. */
6257 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
6269 low_n
= ((mode
) == TImode
? 0
6270 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
6274 base_reg
= gen_reg_rtx (SImode
);
6275 val
= force_operand (plus_constant (xop0
, n
), NULL_RTX
);
6276 emit_move_insn (base_reg
, val
);
6277 x
= plus_constant (base_reg
, low_n
);
6279 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6280 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6283 /* XXX We don't allow MINUS any more -- see comment in
6284 arm_legitimate_address_outer_p (). */
6285 else if (GET_CODE (x
) == MINUS
)
6287 rtx xop0
= XEXP (x
, 0);
6288 rtx xop1
= XEXP (x
, 1);
6290 if (CONSTANT_P (xop0
))
6291 xop0
= force_reg (SImode
, xop0
);
6293 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
6294 xop1
= force_reg (SImode
, xop1
);
6296 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6297 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
6300 /* Make sure to take full advantage of the pre-indexed addressing mode
6301 with absolute addresses which often allows for the base register to
6302 be factorized for multiple adjacent memory references, and it might
6303 even allows for the mini pool to be avoided entirely. */
6304 else if (GET_CODE (x
) == CONST_INT
&& optimize
> 0)
6307 HOST_WIDE_INT mask
, base
, index
;
6310 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6311 use a 8-bit index. So let's use a 12-bit index for SImode only and
6312 hope that arm_gen_constant will enable ldrb to use more bits. */
6313 bits
= (mode
== SImode
) ? 12 : 8;
6314 mask
= (1 << bits
) - 1;
6315 base
= INTVAL (x
) & ~mask
;
6316 index
= INTVAL (x
) & mask
;
6317 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
6319 /* It'll most probably be more efficient to generate the base
6320 with more bits set and use a negative index instead. */
6324 base_reg
= force_reg (SImode
, GEN_INT (base
));
6325 x
= plus_constant (base_reg
, index
);
6330 /* We need to find and carefully transform any SYMBOL and LABEL
6331 references; so go back to the original address expression. */
6332 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6334 if (new_x
!= orig_x
)
6342 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6343 to be legitimate. If we find one, return the new, valid address. */
6345 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6347 if (arm_tls_symbol_p (x
))
6348 return legitimize_tls_address (x
, NULL_RTX
);
6350 if (GET_CODE (x
) == PLUS
6351 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6352 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
6353 || INTVAL (XEXP (x
, 1)) < 0))
6355 rtx xop0
= XEXP (x
, 0);
6356 rtx xop1
= XEXP (x
, 1);
6357 HOST_WIDE_INT offset
= INTVAL (xop1
);
6359 /* Try and fold the offset into a biasing of the base register and
6360 then offsetting that. Don't do this when optimizing for space
6361 since it can cause too many CSEs. */
6362 if (optimize_size
&& offset
>= 0
6363 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
6365 HOST_WIDE_INT delta
;
6368 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
6369 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
6370 delta
= 31 * GET_MODE_SIZE (mode
);
6372 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
6374 xop0
= force_operand (plus_constant (xop0
, offset
- delta
),
6376 x
= plus_constant (xop0
, delta
);
6378 else if (offset
< 0 && offset
> -256)
6379 /* Small negative offsets are best done with a subtract before the
6380 dereference, forcing these into a register normally takes two
6382 x
= force_operand (x
, NULL_RTX
);
6385 /* For the remaining cases, force the constant into a register. */
6386 xop1
= force_reg (SImode
, xop1
);
6387 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6390 else if (GET_CODE (x
) == PLUS
6391 && s_register_operand (XEXP (x
, 1), SImode
)
6392 && !s_register_operand (XEXP (x
, 0), SImode
))
6394 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
6396 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
6401 /* We need to find and carefully transform any SYMBOL and LABEL
6402 references; so go back to the original address expression. */
6403 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6405 if (new_x
!= orig_x
)
6413 arm_legitimize_reload_address (rtx
*p
,
6414 enum machine_mode mode
,
6415 int opnum
, int type
,
6416 int ind_levels ATTRIBUTE_UNUSED
)
6418 if (GET_CODE (*p
) == PLUS
6419 && GET_CODE (XEXP (*p
, 0)) == REG
6420 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p
, 0)))
6421 && GET_CODE (XEXP (*p
, 1)) == CONST_INT
)
6423 HOST_WIDE_INT val
= INTVAL (XEXP (*p
, 1));
6424 HOST_WIDE_INT low
, high
;
6426 /* Detect coprocessor load/stores. */
6427 bool coproc_p
= ((TARGET_HARD_FLOAT
6428 && (TARGET_VFP
|| TARGET_FPA
|| TARGET_MAVERICK
)
6429 && (mode
== SFmode
|| mode
== DFmode
6430 || (mode
== DImode
&& TARGET_MAVERICK
)))
6431 || (TARGET_REALLY_IWMMXT
6432 && VALID_IWMMXT_REG_MODE (mode
))
6434 && (VALID_NEON_DREG_MODE (mode
)
6435 || VALID_NEON_QREG_MODE (mode
))));
6437 /* For some conditions, bail out when lower two bits are unaligned. */
6438 if ((val
& 0x3) != 0
6439 /* Coprocessor load/store indexes are 8-bits + '00' appended. */
6441 /* For DI, and DF under soft-float: */
6442 || ((mode
== DImode
|| mode
== DFmode
)
6443 /* Without ldrd, we use stm/ldm, which does not
6444 fair well with unaligned bits. */
6446 /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */
6447 || TARGET_THUMB2
))))
6450 /* When breaking down a [reg+index] reload address into [(reg+high)+low],
6451 of which the (reg+high) gets turned into a reload add insn,
6452 we try to decompose the index into high/low values that can often
6453 also lead to better reload CSE.
6455 ldr r0, [r2, #4100] // Offset too large
6456 ldr r1, [r2, #4104] // Offset too large
6458 is best reloaded as:
6464 which post-reload CSE can simplify in most cases to eliminate the
6465 second add instruction:
6470 The idea here is that we want to split out the bits of the constant
6471 as a mask, rather than as subtracting the maximum offset that the
6472 respective type of load/store used can handle.
6474 When encountering negative offsets, we can still utilize it even if
6475 the overall offset is positive; sometimes this may lead to an immediate
6476 that can be constructed with fewer instructions.
6478 ldr r0, [r2, #0x3FFFFC]
6480 This is best reloaded as:
6481 add t1, r2, #0x400000
6484 The trick for spotting this for a load insn with N bits of offset
6485 (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a
6486 negative offset that is going to make bit N and all the bits below
6487 it become zero in the remainder part.
6489 The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect
6490 to sign-magnitude addressing (i.e. separate +- bit, or 1's complement),
6491 used in most cases of ARM load/store instructions. */
6493 #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \
6494 (((VAL) & ((1 << (N)) - 1)) \
6495 ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \
6500 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 10);
6502 /* NEON quad-word load/stores are made of two double-word accesses,
6503 so the valid index range is reduced by 8. Treat as 9-bit range if
6505 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
) && low
>= 1016)
6506 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 9);
6508 else if (GET_MODE_SIZE (mode
) == 8)
6511 low
= (TARGET_THUMB2
6512 ? SIGN_MAG_LOW_ADDR_BITS (val
, 10)
6513 : SIGN_MAG_LOW_ADDR_BITS (val
, 8));
6515 /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib)
6516 to access doublewords. The supported load/store offsets are
6517 -8, -4, and 4, which we try to produce here. */
6518 low
= ((val
& 0xf) ^ 0x8) - 0x8;
6520 else if (GET_MODE_SIZE (mode
) < 8)
6522 /* NEON element load/stores do not have an offset. */
6523 if (TARGET_NEON_FP16
&& mode
== HFmode
)
6528 /* Thumb-2 has an asymmetrical index range of (-256,4096).
6529 Try the wider 12-bit range first, and re-try if the result
6531 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6533 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6537 if (mode
== HImode
|| mode
== HFmode
)
6540 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 8);
6543 /* The storehi/movhi_bytes fallbacks can use only
6544 [-4094,+4094] of the full ldrb/strb index range. */
6545 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6546 if (low
== 4095 || low
== -4095)
6551 low
= SIGN_MAG_LOW_ADDR_BITS (val
, 12);
6557 high
= ((((val
- low
) & (unsigned HOST_WIDE_INT
) 0xffffffff)
6558 ^ (unsigned HOST_WIDE_INT
) 0x80000000)
6559 - (unsigned HOST_WIDE_INT
) 0x80000000);
6560 /* Check for overflow or zero */
6561 if (low
== 0 || high
== 0 || (high
+ low
!= val
))
6564 /* Reload the high part into a base reg; leave the low part
6566 *p
= gen_rtx_PLUS (GET_MODE (*p
),
6567 gen_rtx_PLUS (GET_MODE (*p
), XEXP (*p
, 0),
6570 push_reload (XEXP (*p
, 0), NULL_RTX
, &XEXP (*p
, 0), NULL
,
6571 MODE_BASE_REG_CLASS (mode
), GET_MODE (*p
),
6572 VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6580 thumb_legitimize_reload_address (rtx
*x_p
,
6581 enum machine_mode mode
,
6582 int opnum
, int type
,
6583 int ind_levels ATTRIBUTE_UNUSED
)
6587 if (GET_CODE (x
) == PLUS
6588 && GET_MODE_SIZE (mode
) < 4
6589 && REG_P (XEXP (x
, 0))
6590 && XEXP (x
, 0) == stack_pointer_rtx
6591 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6592 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6597 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6598 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6602 /* If both registers are hi-regs, then it's better to reload the
6603 entire expression rather than each register individually. That
6604 only requires one reload register rather than two. */
6605 if (GET_CODE (x
) == PLUS
6606 && REG_P (XEXP (x
, 0))
6607 && REG_P (XEXP (x
, 1))
6608 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
6609 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
6614 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6615 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6622 /* Test for various thread-local symbols. */
6624 /* Return TRUE if X is a thread-local symbol. */
6627 arm_tls_symbol_p (rtx x
)
6629 if (! TARGET_HAVE_TLS
)
6632 if (GET_CODE (x
) != SYMBOL_REF
)
6635 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6638 /* Helper for arm_tls_referenced_p. */
6641 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
6643 if (GET_CODE (*x
) == SYMBOL_REF
)
6644 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
6646 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6647 TLS offsets, not real symbol references. */
6648 if (GET_CODE (*x
) == UNSPEC
6649 && XINT (*x
, 1) == UNSPEC_TLS
)
6655 /* Return TRUE if X contains any TLS symbol references. */
6658 arm_tls_referenced_p (rtx x
)
6660 if (! TARGET_HAVE_TLS
)
6663 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
6666 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
6668 On the ARM, allow any integer (invalid ones are removed later by insn
6669 patterns), nice doubles and symbol_refs which refer to the function's
6672 When generating pic allow anything. */
6675 arm_legitimate_constant_p_1 (enum machine_mode mode
, rtx x
)
6677 /* At present, we have no support for Neon structure constants, so forbid
6678 them here. It might be possible to handle simple cases like 0 and -1
6680 if (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
))
6683 return flag_pic
|| !label_mentioned_p (x
);
6687 thumb_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6689 return (GET_CODE (x
) == CONST_INT
6690 || GET_CODE (x
) == CONST_DOUBLE
6691 || CONSTANT_ADDRESS_P (x
)
6696 arm_legitimate_constant_p (enum machine_mode mode
, rtx x
)
6698 return (!arm_cannot_force_const_mem (mode
, x
)
6700 ? arm_legitimate_constant_p_1 (mode
, x
)
6701 : thumb_legitimate_constant_p (mode
, x
)));
6704 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6707 arm_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
6711 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
6713 split_const (x
, &base
, &offset
);
6714 if (GET_CODE (base
) == SYMBOL_REF
6715 && !offset_within_block_p (base
, INTVAL (offset
)))
6718 return arm_tls_referenced_p (x
);
6721 #define REG_OR_SUBREG_REG(X) \
6722 (GET_CODE (X) == REG \
6723 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6725 #define REG_OR_SUBREG_RTX(X) \
6726 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6729 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
6731 enum machine_mode mode
= GET_MODE (x
);
6745 return COSTS_N_INSNS (1);
6748 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6751 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
6758 return COSTS_N_INSNS (2) + cycles
;
6760 return COSTS_N_INSNS (1) + 16;
6763 return (COSTS_N_INSNS (1)
6764 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
6765 + GET_CODE (SET_DEST (x
)) == MEM
));
6770 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
6772 if (thumb_shiftable_const (INTVAL (x
)))
6773 return COSTS_N_INSNS (2);
6774 return COSTS_N_INSNS (3);
6776 else if ((outer
== PLUS
|| outer
== COMPARE
)
6777 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
6779 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
6780 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
6781 return COSTS_N_INSNS (1);
6782 else if (outer
== AND
)
6785 /* This duplicates the tests in the andsi3 expander. */
6786 for (i
= 9; i
<= 31; i
++)
6787 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
6788 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
6789 return COSTS_N_INSNS (2);
6791 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
6792 || outer
== LSHIFTRT
)
6794 return COSTS_N_INSNS (2);
6800 return COSTS_N_INSNS (3);
6818 /* XXX another guess. */
6819 /* Memory costs quite a lot for the first word, but subsequent words
6820 load at the equivalent of a single insn each. */
6821 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
6822 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
6827 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6833 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
6834 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
6840 return total
+ COSTS_N_INSNS (1);
6842 /* Assume a two-shift sequence. Increase the cost slightly so
6843 we prefer actual shifts over an extend operation. */
6844 return total
+ 1 + COSTS_N_INSNS (2);
6852 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
6854 enum machine_mode mode
= GET_MODE (x
);
6855 enum rtx_code subcode
;
6857 enum rtx_code code
= GET_CODE (x
);
6863 /* Memory costs quite a lot for the first word, but subsequent words
6864 load at the equivalent of a single insn each. */
6865 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
6872 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
6873 *total
= COSTS_N_INSNS (2);
6874 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
6875 *total
= COSTS_N_INSNS (4);
6877 *total
= COSTS_N_INSNS (20);
6881 if (GET_CODE (XEXP (x
, 1)) == REG
)
6882 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
6883 else if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6884 *total
= rtx_cost (XEXP (x
, 1), code
, speed
);
6890 *total
+= COSTS_N_INSNS (4);
6895 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
6896 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6899 *total
+= COSTS_N_INSNS (3);
6903 *total
+= COSTS_N_INSNS (1);
6904 /* Increase the cost of complex shifts because they aren't any faster,
6905 and reduce dual issue opportunities. */
6906 if (arm_tune_cortex_a9
6907 && outer
!= SET
&& GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6915 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
6916 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6917 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6919 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6923 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6924 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
6926 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6933 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6935 if (TARGET_HARD_FLOAT
6937 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6939 *total
= COSTS_N_INSNS (1);
6940 if (GET_CODE (XEXP (x
, 0)) == CONST_DOUBLE
6941 && arm_const_double_rtx (XEXP (x
, 0)))
6943 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6947 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6948 && arm_const_double_rtx (XEXP (x
, 1)))
6950 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6956 *total
= COSTS_N_INSNS (20);
6960 *total
= COSTS_N_INSNS (1);
6961 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6962 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6964 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6968 subcode
= GET_CODE (XEXP (x
, 1));
6969 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6970 || subcode
== LSHIFTRT
6971 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6973 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6974 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6978 /* A shift as a part of RSB costs no more than RSB itself. */
6979 if (GET_CODE (XEXP (x
, 0)) == MULT
6980 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6982 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, speed
);
6983 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6988 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
6990 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6991 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6995 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
6996 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
6998 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6999 if (GET_CODE (XEXP (XEXP (x
, 1), 0)) == REG
7000 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
7001 *total
+= COSTS_N_INSNS (1);
7009 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
7010 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7011 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7013 *total
= COSTS_N_INSNS (1);
7014 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
7016 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7020 /* MLA: All arguments must be registers. We filter out
7021 multiplication by a power of two, so that we fall down into
7023 if (GET_CODE (XEXP (x
, 0)) == MULT
7024 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7026 /* The cost comes from the cost of the multiply. */
7030 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7032 if (TARGET_HARD_FLOAT
7034 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7036 *total
= COSTS_N_INSNS (1);
7037 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
7038 && arm_const_double_rtx (XEXP (x
, 1)))
7040 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7047 *total
= COSTS_N_INSNS (20);
7051 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
7052 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
7054 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, speed
);
7055 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
7056 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
7057 *total
+= COSTS_N_INSNS (1);
7063 case AND
: case XOR
: case IOR
:
7065 /* Normally the frame registers will be spilt into reg+const during
7066 reload, so it is a bad idea to combine them with other instructions,
7067 since then they might not be moved outside of loops. As a compromise
7068 we allow integration with ops that have a constant as their second
7070 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
7071 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
7072 && GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7073 *total
= COSTS_N_INSNS (1);
7077 *total
+= COSTS_N_INSNS (2);
7078 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
7079 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7081 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7088 *total
+= COSTS_N_INSNS (1);
7089 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
7090 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7092 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7095 subcode
= GET_CODE (XEXP (x
, 0));
7096 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7097 || subcode
== LSHIFTRT
7098 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7100 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7101 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
7106 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7108 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7109 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
7113 if (subcode
== UMIN
|| subcode
== UMAX
7114 || subcode
== SMIN
|| subcode
== SMAX
)
7116 *total
= COSTS_N_INSNS (3);
7123 /* This should have been handled by the CPU specific routines. */
7127 if (arm_arch3m
&& mode
== SImode
7128 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
7129 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
7130 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
7131 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
7132 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
7133 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
7135 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, speed
);
7138 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
7142 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7144 if (TARGET_HARD_FLOAT
7146 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7148 *total
= COSTS_N_INSNS (1);
7151 *total
= COSTS_N_INSNS (2);
7157 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
7158 if (mode
== SImode
&& code
== NOT
)
7160 subcode
= GET_CODE (XEXP (x
, 0));
7161 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7162 || subcode
== LSHIFTRT
7163 || subcode
== ROTATE
|| subcode
== ROTATERT
7165 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
7167 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
7168 /* Register shifts cost an extra cycle. */
7169 if (GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
7170 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
7179 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7181 *total
= COSTS_N_INSNS (4);
7185 operand
= XEXP (x
, 0);
7187 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
7188 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
7189 && GET_CODE (XEXP (operand
, 0)) == REG
7190 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
7191 *total
+= COSTS_N_INSNS (1);
7192 *total
+= (rtx_cost (XEXP (x
, 1), code
, speed
)
7193 + rtx_cost (XEXP (x
, 2), code
, speed
));
7197 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7199 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
7205 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
7206 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7208 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
7214 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
7215 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
7217 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
7237 /* SCC insns. In the case where the comparison has already been
7238 performed, then they cost 2 instructions. Otherwise they need
7239 an additional comparison before them. */
7240 *total
= COSTS_N_INSNS (2);
7241 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7248 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7254 *total
+= COSTS_N_INSNS (1);
7255 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
7256 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7258 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7262 subcode
= GET_CODE (XEXP (x
, 0));
7263 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7264 || subcode
== LSHIFTRT
7265 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7267 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7268 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
7273 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7275 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7276 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
7286 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
7287 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
7288 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7289 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7293 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7295 if (TARGET_HARD_FLOAT
7297 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7299 *total
= COSTS_N_INSNS (1);
7302 *total
= COSTS_N_INSNS (20);
7305 *total
= COSTS_N_INSNS (1);
7307 *total
+= COSTS_N_INSNS (3);
7313 if (GET_MODE_CLASS (mode
) == MODE_INT
)
7315 rtx op
= XEXP (x
, 0);
7316 enum machine_mode opmode
= GET_MODE (op
);
7319 *total
+= COSTS_N_INSNS (1);
7321 if (opmode
!= SImode
)
7325 /* If !arm_arch4, we use one of the extendhisi2_mem
7326 or movhi_bytes patterns for HImode. For a QImode
7327 sign extension, we first zero-extend from memory
7328 and then perform a shift sequence. */
7329 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
7330 *total
+= COSTS_N_INSNS (2);
7333 *total
+= COSTS_N_INSNS (1);
7335 /* We don't have the necessary insn, so we need to perform some
7337 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
7338 /* An and with constant 255. */
7339 *total
+= COSTS_N_INSNS (1);
7341 /* A shift sequence. Increase costs slightly to avoid
7342 combining two shifts into an extend operation. */
7343 *total
+= COSTS_N_INSNS (2) + 1;
7349 switch (GET_MODE (XEXP (x
, 0)))
7356 *total
= COSTS_N_INSNS (1);
7366 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
7370 if (const_ok_for_arm (INTVAL (x
))
7371 || const_ok_for_arm (~INTVAL (x
)))
7372 *total
= COSTS_N_INSNS (1);
7374 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
7375 INTVAL (x
), NULL_RTX
,
7382 *total
= COSTS_N_INSNS (3);
7386 *total
= COSTS_N_INSNS (1);
7390 *total
= COSTS_N_INSNS (1);
7391 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7395 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
7396 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7397 *total
= COSTS_N_INSNS (1);
7399 *total
= COSTS_N_INSNS (4);
7403 *total
= COSTS_N_INSNS (4);
7408 /* Estimates the size cost of thumb1 instructions.
7409 For now most of the code is copied from thumb1_rtx_costs. We need more
7410 fine grain tuning when we have more related test cases. */
7412 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7414 enum machine_mode mode
= GET_MODE (x
);
7427 return COSTS_N_INSNS (1);
7430 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7432 /* Thumb1 mul instruction can't operate on const. We must Load it
7433 into a register first. */
7434 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
7435 return COSTS_N_INSNS (1) + const_size
;
7437 return COSTS_N_INSNS (1);
7440 return (COSTS_N_INSNS (1)
7441 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
7442 + GET_CODE (SET_DEST (x
)) == MEM
));
7447 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7448 return COSTS_N_INSNS (1);
7449 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7450 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
7451 return COSTS_N_INSNS (2);
7452 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7453 if (thumb_shiftable_const (INTVAL (x
)))
7454 return COSTS_N_INSNS (2);
7455 return COSTS_N_INSNS (3);
7457 else if ((outer
== PLUS
|| outer
== COMPARE
)
7458 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7460 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7461 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7462 return COSTS_N_INSNS (1);
7463 else if (outer
== AND
)
7466 /* This duplicates the tests in the andsi3 expander. */
7467 for (i
= 9; i
<= 31; i
++)
7468 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7469 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7470 return COSTS_N_INSNS (2);
7472 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7473 || outer
== LSHIFTRT
)
7475 return COSTS_N_INSNS (2);
7481 return COSTS_N_INSNS (3);
7499 /* XXX another guess. */
7500 /* Memory costs quite a lot for the first word, but subsequent words
7501 load at the equivalent of a single insn each. */
7502 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7503 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7508 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7513 /* XXX still guessing. */
7514 switch (GET_MODE (XEXP (x
, 0)))
7517 return (1 + (mode
== DImode
? 4 : 0)
7518 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7521 return (4 + (mode
== DImode
? 4 : 0)
7522 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7525 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7536 /* RTX costs when optimizing for size. */
7538 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7541 enum machine_mode mode
= GET_MODE (x
);
7544 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
7548 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7552 /* A memory access costs 1 insn if the mode is small, or the address is
7553 a single register, otherwise it costs one insn per word. */
7554 if (REG_P (XEXP (x
, 0)))
7555 *total
= COSTS_N_INSNS (1);
7557 && GET_CODE (XEXP (x
, 0)) == PLUS
7558 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
7559 /* This will be split into two instructions.
7560 See arm.md:calculate_pic_address. */
7561 *total
= COSTS_N_INSNS (2);
7563 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7570 /* Needs a libcall, so it costs about this. */
7571 *total
= COSTS_N_INSNS (2);
7575 if (mode
== SImode
&& GET_CODE (XEXP (x
, 1)) == REG
)
7577 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, false);
7585 if (mode
== DImode
&& GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7587 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, false);
7590 else if (mode
== SImode
)
7592 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, false);
7593 /* Slightly disparage register shifts, but not by much. */
7594 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7595 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, false);
7599 /* Needs a libcall. */
7600 *total
= COSTS_N_INSNS (2);
7604 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7605 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7607 *total
= COSTS_N_INSNS (1);
7613 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
7614 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
7616 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
7617 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
7618 || subcode1
== ROTATE
|| subcode1
== ROTATERT
7619 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
7620 || subcode1
== ASHIFTRT
)
7622 /* It's just the cost of the two operands. */
7627 *total
= COSTS_N_INSNS (1);
7631 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7635 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7636 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7638 *total
= COSTS_N_INSNS (1);
7642 /* A shift as a part of ADD costs nothing. */
7643 if (GET_CODE (XEXP (x
, 0)) == MULT
7644 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7646 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
7647 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, false);
7648 *total
+= rtx_cost (XEXP (x
, 1), code
, false);
7653 case AND
: case XOR
: case IOR
:
7656 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
7658 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
7659 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
7660 || (code
== AND
&& subcode
== NOT
))
7662 /* It's just the cost of the two operands. */
7668 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7672 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7676 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7677 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7679 *total
= COSTS_N_INSNS (1);
7685 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7694 if (cc_register (XEXP (x
, 0), VOIDmode
))
7697 *total
= COSTS_N_INSNS (1);
7701 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7702 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7703 *total
= COSTS_N_INSNS (1);
7705 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
7710 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
7713 if (const_ok_for_arm (INTVAL (x
)))
7714 /* A multiplication by a constant requires another instruction
7715 to load the constant to a register. */
7716 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
7718 else if (const_ok_for_arm (~INTVAL (x
)))
7719 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
7720 else if (const_ok_for_arm (-INTVAL (x
)))
7722 if (outer_code
== COMPARE
|| outer_code
== PLUS
7723 || outer_code
== MINUS
)
7726 *total
= COSTS_N_INSNS (1);
7729 *total
= COSTS_N_INSNS (2);
7735 *total
= COSTS_N_INSNS (2);
7739 *total
= COSTS_N_INSNS (4);
7744 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7745 cost of these slightly. */
7746 *total
= COSTS_N_INSNS (1) + 1;
7750 if (mode
!= VOIDmode
)
7751 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7753 *total
= COSTS_N_INSNS (4); /* How knows? */
7758 /* RTX costs when optimizing for size. */
7760 arm_rtx_costs (rtx x
, int code
, int outer_code
, int *total
,
7764 return arm_size_rtx_costs (x
, (enum rtx_code
) code
,
7765 (enum rtx_code
) outer_code
, total
);
7767 return current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
7768 (enum rtx_code
) outer_code
,
7772 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7773 supported on any "slowmul" cores, so it can be ignored. */
7776 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7777 int *total
, bool speed
)
7779 enum machine_mode mode
= GET_MODE (x
);
7783 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7790 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
7793 *total
= COSTS_N_INSNS (20);
7797 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7799 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7800 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7801 int cost
, const_ok
= const_ok_for_arm (i
);
7802 int j
, booth_unit_size
;
7804 /* Tune as appropriate. */
7805 cost
= const_ok
? 4 : 8;
7806 booth_unit_size
= 2;
7807 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7809 i
>>= booth_unit_size
;
7813 *total
= COSTS_N_INSNS (cost
);
7814 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7818 *total
= COSTS_N_INSNS (20);
7822 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
7827 /* RTX cost for cores with a fast multiply unit (M variants). */
7830 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7831 int *total
, bool speed
)
7833 enum machine_mode mode
= GET_MODE (x
);
7837 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7841 /* ??? should thumb2 use different costs? */
7845 /* There is no point basing this on the tuning, since it is always the
7846 fast variant if it exists at all. */
7848 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7849 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7850 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7852 *total
= COSTS_N_INSNS(2);
7859 *total
= COSTS_N_INSNS (5);
7863 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7865 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7866 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7867 int cost
, const_ok
= const_ok_for_arm (i
);
7868 int j
, booth_unit_size
;
7870 /* Tune as appropriate. */
7871 cost
= const_ok
? 4 : 8;
7872 booth_unit_size
= 8;
7873 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7875 i
>>= booth_unit_size
;
7879 *total
= COSTS_N_INSNS(cost
);
7885 *total
= COSTS_N_INSNS (4);
7889 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7891 if (TARGET_HARD_FLOAT
7893 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7895 *total
= COSTS_N_INSNS (1);
7900 /* Requires a lib call */
7901 *total
= COSTS_N_INSNS (20);
7905 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7910 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7911 so it can be ignored. */
7914 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7915 int *total
, bool speed
)
7917 enum machine_mode mode
= GET_MODE (x
);
7921 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7928 if (GET_CODE (XEXP (x
, 0)) != MULT
)
7929 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7931 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7932 will stall until the multiplication is complete. */
7933 *total
= COSTS_N_INSNS (3);
7937 /* There is no point basing this on the tuning, since it is always the
7938 fast variant if it exists at all. */
7940 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7941 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7942 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7944 *total
= COSTS_N_INSNS (2);
7951 *total
= COSTS_N_INSNS (5);
7955 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7957 /* If operand 1 is a constant we can more accurately
7958 calculate the cost of the multiply. The multiplier can
7959 retire 15 bits on the first cycle and a further 12 on the
7960 second. We do, of course, have to load the constant into
7961 a register first. */
7962 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7963 /* There's a general overhead of one cycle. */
7965 unsigned HOST_WIDE_INT masked_const
;
7970 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
7972 masked_const
= i
& 0xffff8000;
7973 if (masked_const
!= 0)
7976 masked_const
= i
& 0xf8000000;
7977 if (masked_const
!= 0)
7980 *total
= COSTS_N_INSNS (cost
);
7986 *total
= COSTS_N_INSNS (3);
7990 /* Requires a lib call */
7991 *total
= COSTS_N_INSNS (20);
7995 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8000 /* RTX costs for 9e (and later) cores. */
8003 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
8004 int *total
, bool speed
)
8006 enum machine_mode mode
= GET_MODE (x
);
8013 *total
= COSTS_N_INSNS (3);
8017 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
8025 /* There is no point basing this on the tuning, since it is always the
8026 fast variant if it exists at all. */
8028 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
8029 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
8030 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
8032 *total
= COSTS_N_INSNS (2);
8039 *total
= COSTS_N_INSNS (5);
8045 *total
= COSTS_N_INSNS (2);
8049 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8051 if (TARGET_HARD_FLOAT
8053 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
8055 *total
= COSTS_N_INSNS (1);
8060 *total
= COSTS_N_INSNS (20);
8064 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
8067 /* All address computations that can be done are free, but rtx cost returns
8068 the same for practically all of them. So we weight the different types
8069 of address here in the order (most pref first):
8070 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
8072 arm_arm_address_cost (rtx x
)
8074 enum rtx_code c
= GET_CODE (x
);
8076 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
8078 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
8083 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
8086 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
8096 arm_thumb_address_cost (rtx x
)
8098 enum rtx_code c
= GET_CODE (x
);
8103 && GET_CODE (XEXP (x
, 0)) == REG
8104 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
8111 arm_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
8113 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
8116 /* Adjust cost hook for XScale. */
8118 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8120 /* Some true dependencies can have a higher cost depending
8121 on precisely how certain input operands are used. */
8122 if (REG_NOTE_KIND(link
) == 0
8123 && recog_memoized (insn
) >= 0
8124 && recog_memoized (dep
) >= 0)
8126 int shift_opnum
= get_attr_shift (insn
);
8127 enum attr_type attr_type
= get_attr_type (dep
);
8129 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
8130 operand for INSN. If we have a shifted input operand and the
8131 instruction we depend on is another ALU instruction, then we may
8132 have to account for an additional stall. */
8133 if (shift_opnum
!= 0
8134 && (attr_type
== TYPE_ALU_SHIFT
|| attr_type
== TYPE_ALU_SHIFT_REG
))
8136 rtx shifted_operand
;
8139 /* Get the shifted operand. */
8140 extract_insn (insn
);
8141 shifted_operand
= recog_data
.operand
[shift_opnum
];
8143 /* Iterate over all the operands in DEP. If we write an operand
8144 that overlaps with SHIFTED_OPERAND, then we have increase the
8145 cost of this dependency. */
8147 preprocess_constraints ();
8148 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
8150 /* We can ignore strict inputs. */
8151 if (recog_data
.operand_type
[opno
] == OP_IN
)
8154 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
8166 /* Adjust cost hook for Cortex A9. */
8168 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8170 switch (REG_NOTE_KIND (link
))
8177 case REG_DEP_OUTPUT
:
8178 if (recog_memoized (insn
) >= 0
8179 && recog_memoized (dep
) >= 0)
8181 if (GET_CODE (PATTERN (insn
)) == SET
)
8184 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
8186 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
8188 enum attr_type attr_type_insn
= get_attr_type (insn
);
8189 enum attr_type attr_type_dep
= get_attr_type (dep
);
8191 /* By default all dependencies of the form
8194 have an extra latency of 1 cycle because
8195 of the input and output dependency in this
8196 case. However this gets modeled as an true
8197 dependency and hence all these checks. */
8198 if (REG_P (SET_DEST (PATTERN (insn
)))
8199 && REG_P (SET_DEST (PATTERN (dep
)))
8200 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
8201 SET_DEST (PATTERN (dep
))))
8203 /* FMACS is a special case where the dependant
8204 instruction can be issued 3 cycles before
8205 the normal latency in case of an output
8207 if ((attr_type_insn
== TYPE_FMACS
8208 || attr_type_insn
== TYPE_FMACD
)
8209 && (attr_type_dep
== TYPE_FMACS
8210 || attr_type_dep
== TYPE_FMACD
))
8212 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8213 *cost
= insn_default_latency (dep
) - 3;
8215 *cost
= insn_default_latency (dep
);
8220 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8221 *cost
= insn_default_latency (dep
) + 1;
8223 *cost
= insn_default_latency (dep
);
8239 /* Adjust cost hook for FA726TE. */
8241 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8243 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8244 have penalty of 3. */
8245 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
8246 && recog_memoized (insn
) >= 0
8247 && recog_memoized (dep
) >= 0
8248 && get_attr_conds (dep
) == CONDS_SET
)
8250 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8251 if (get_attr_conds (insn
) == CONDS_USE
8252 && get_attr_type (insn
) != TYPE_BRANCH
)
8258 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
8259 || get_attr_conds (insn
) == CONDS_USE
)
8269 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8270 It corrects the value of COST based on the relationship between
8271 INSN and DEP through the dependence LINK. It returns the new
8272 value. There is a per-core adjust_cost hook to adjust scheduler costs
8273 and the per-core hook can choose to completely override the generic
8274 adjust_cost function. Only put bits of code into arm_adjust_cost that
8275 are common across all cores. */
8277 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
8281 /* When generating Thumb-1 code, we want to place flag-setting operations
8282 close to a conditional branch which depends on them, so that we can
8283 omit the comparison. */
8285 && REG_NOTE_KIND (link
) == 0
8286 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
8287 && recog_memoized (dep
) >= 0
8288 && get_attr_conds (dep
) == CONDS_SET
)
8291 if (current_tune
->sched_adjust_cost
!= NULL
)
8293 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
8297 /* XXX This is not strictly true for the FPA. */
8298 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8299 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8302 /* Call insns don't incur a stall, even if they follow a load. */
8303 if (REG_NOTE_KIND (link
) == 0
8304 && GET_CODE (insn
) == CALL_INSN
)
8307 if ((i_pat
= single_set (insn
)) != NULL
8308 && GET_CODE (SET_SRC (i_pat
)) == MEM
8309 && (d_pat
= single_set (dep
)) != NULL
8310 && GET_CODE (SET_DEST (d_pat
)) == MEM
)
8312 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
8313 /* This is a load after a store, there is no conflict if the load reads
8314 from a cached area. Assume that loads from the stack, and from the
8315 constant pool are cached, and that others will miss. This is a
8318 if ((GET_CODE (src_mem
) == SYMBOL_REF
8319 && CONSTANT_POOL_ADDRESS_P (src_mem
))
8320 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
8321 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
8322 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
8330 arm_default_branch_cost (bool speed_p
, bool predictable_p ATTRIBUTE_UNUSED
)
8333 return (TARGET_THUMB2
&& !speed_p
) ? 1 : 4;
8335 return (optimize
> 0) ? 2 : 0;
8339 arm_cortex_a5_branch_cost (bool speed_p
, bool predictable_p
)
8341 return speed_p
? 0 : arm_default_branch_cost (speed_p
, predictable_p
);
8344 static int fp_consts_inited
= 0;
8346 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8347 static const char * const strings_fp
[8] =
8350 "4", "5", "0.5", "10"
8353 static REAL_VALUE_TYPE values_fp
[8];
8356 init_fp_table (void)
8362 fp_consts_inited
= 1;
8364 fp_consts_inited
= 8;
8366 for (i
= 0; i
< fp_consts_inited
; i
++)
8368 r
= REAL_VALUE_ATOF (strings_fp
[i
], DFmode
);
8373 /* Return TRUE if rtx X is a valid immediate FP constant. */
8375 arm_const_double_rtx (rtx x
)
8380 if (!fp_consts_inited
)
8383 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8384 if (REAL_VALUE_MINUS_ZERO (r
))
8387 for (i
= 0; i
< fp_consts_inited
; i
++)
8388 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
8394 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8396 neg_const_double_rtx_ok_for_fpa (rtx x
)
8401 if (!fp_consts_inited
)
8404 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8405 r
= real_value_negate (&r
);
8406 if (REAL_VALUE_MINUS_ZERO (r
))
8409 for (i
= 0; i
< 8; i
++)
8410 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
8417 /* VFPv3 has a fairly wide range of representable immediates, formed from
8418 "quarter-precision" floating-point values. These can be evaluated using this
8419 formula (with ^ for exponentiation):
8423 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8424 16 <= n <= 31 and 0 <= r <= 7.
8426 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8428 - A (most-significant) is the sign bit.
8429 - BCD are the exponent (encoded as r XOR 3).
8430 - EFGH are the mantissa (encoded as n - 16).
8433 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8434 fconst[sd] instruction, or -1 if X isn't suitable. */
8436 vfp3_const_double_index (rtx x
)
8438 REAL_VALUE_TYPE r
, m
;
8440 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
8441 unsigned HOST_WIDE_INT mask
;
8442 HOST_WIDE_INT m1
, m2
;
8443 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8445 if (!TARGET_VFP3
|| GET_CODE (x
) != CONST_DOUBLE
)
8448 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8450 /* We can't represent these things, so detect them first. */
8451 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
8454 /* Extract sign, exponent and mantissa. */
8455 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
8456 r
= real_value_abs (&r
);
8457 exponent
= REAL_EXP (&r
);
8458 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8459 highest (sign) bit, with a fixed binary point at bit point_pos.
8460 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8461 bits for the mantissa, this may fail (low bits would be lost). */
8462 real_ldexp (&m
, &r
, point_pos
- exponent
);
8463 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
8467 /* If there are bits set in the low part of the mantissa, we can't
8468 represent this value. */
8472 /* Now make it so that mantissa contains the most-significant bits, and move
8473 the point_pos to indicate that the least-significant bits have been
8475 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8478 /* We can permit four significant bits of mantissa only, plus a high bit
8479 which is always 1. */
8480 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8481 if ((mantissa
& mask
) != 0)
8484 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8485 mantissa
>>= point_pos
- 5;
8487 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8488 floating-point immediate zero with Neon using an integer-zero load, but
8489 that case is handled elsewhere.) */
8493 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
8495 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8496 normalized significands are in the range [1, 2). (Our mantissa is shifted
8497 left 4 places at this point relative to normalized IEEE754 values). GCC
8498 internally uses [0.5, 1) (see real.c), so the exponent returned from
8499 REAL_EXP must be altered. */
8500 exponent
= 5 - exponent
;
8502 if (exponent
< 0 || exponent
> 7)
8505 /* Sign, mantissa and exponent are now in the correct form to plug into the
8506 formula described in the comment above. */
8507 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
8510 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8512 vfp3_const_double_rtx (rtx x
)
8517 return vfp3_const_double_index (x
) != -1;
8520 /* Recognize immediates which can be used in various Neon instructions. Legal
8521 immediates are described by the following table (for VMVN variants, the
8522 bitwise inverse of the constant shown is recognized. In either case, VMOV
8523 is output and the correct instruction to use for a given constant is chosen
8524 by the assembler). The constant shown is replicated across all elements of
8525 the destination vector.
8527 insn elems variant constant (binary)
8528 ---- ----- ------- -----------------
8529 vmov i32 0 00000000 00000000 00000000 abcdefgh
8530 vmov i32 1 00000000 00000000 abcdefgh 00000000
8531 vmov i32 2 00000000 abcdefgh 00000000 00000000
8532 vmov i32 3 abcdefgh 00000000 00000000 00000000
8533 vmov i16 4 00000000 abcdefgh
8534 vmov i16 5 abcdefgh 00000000
8535 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8536 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8537 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8538 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8539 vmvn i16 10 00000000 abcdefgh
8540 vmvn i16 11 abcdefgh 00000000
8541 vmov i32 12 00000000 00000000 abcdefgh 11111111
8542 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8543 vmov i32 14 00000000 abcdefgh 11111111 11111111
8544 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8546 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8547 eeeeeeee ffffffff gggggggg hhhhhhhh
8548 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8550 For case 18, B = !b. Representable values are exactly those accepted by
8551 vfp3_const_double_index, but are output as floating-point numbers rather
8554 Variants 0-5 (inclusive) may also be used as immediates for the second
8555 operand of VORR/VBIC instructions.
8557 The INVERSE argument causes the bitwise inverse of the given operand to be
8558 recognized instead (used for recognizing legal immediates for the VAND/VORN
8559 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8560 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8561 output, rather than the real insns vbic/vorr).
8563 INVERSE makes no difference to the recognition of float vectors.
8565 The return value is the variant of immediate as shown in the above table, or
8566 -1 if the given value doesn't match any of the listed patterns.
8569 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
8570 rtx
*modconst
, int *elementwidth
)
8572 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8574 for (i = 0; i < idx; i += (STRIDE)) \
8579 immtype = (CLASS); \
8580 elsize = (ELSIZE); \
8584 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
8585 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8586 unsigned char bytes
[16];
8587 int immtype
= -1, matches
;
8588 unsigned int invmask
= inverse
? 0xff : 0;
8590 /* Vectors of float constants. */
8591 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8593 rtx el0
= CONST_VECTOR_ELT (op
, 0);
8596 if (!vfp3_const_double_rtx (el0
))
8599 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
8601 for (i
= 1; i
< n_elts
; i
++)
8603 rtx elt
= CONST_VECTOR_ELT (op
, i
);
8606 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
8608 if (!REAL_VALUES_EQUAL (r0
, re
))
8613 *modconst
= CONST_VECTOR_ELT (op
, 0);
8621 /* Splat vector constant out into a byte vector. */
8622 for (i
= 0; i
< n_elts
; i
++)
8624 rtx el
= CONST_VECTOR_ELT (op
, i
);
8625 unsigned HOST_WIDE_INT elpart
;
8626 unsigned int part
, parts
;
8628 if (GET_CODE (el
) == CONST_INT
)
8630 elpart
= INTVAL (el
);
8633 else if (GET_CODE (el
) == CONST_DOUBLE
)
8635 elpart
= CONST_DOUBLE_LOW (el
);
8641 for (part
= 0; part
< parts
; part
++)
8644 for (byte
= 0; byte
< innersize
; byte
++)
8646 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8647 elpart
>>= BITS_PER_UNIT
;
8649 if (GET_CODE (el
) == CONST_DOUBLE
)
8650 elpart
= CONST_DOUBLE_HIGH (el
);
8655 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8659 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8660 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8662 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8663 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8665 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8666 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8668 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8669 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
8671 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
8673 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
8675 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8676 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8678 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8679 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8681 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8682 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8684 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8685 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
8687 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
8689 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
8691 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8692 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8694 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8695 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8697 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8698 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8700 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8701 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8703 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
8705 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8706 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
8714 *elementwidth
= elsize
;
8718 unsigned HOST_WIDE_INT imm
= 0;
8720 /* Un-invert bytes of recognized vector, if necessary. */
8722 for (i
= 0; i
< idx
; i
++)
8723 bytes
[i
] ^= invmask
;
8727 /* FIXME: Broken on 32-bit H_W_I hosts. */
8728 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8730 for (i
= 0; i
< 8; i
++)
8731 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8732 << (i
* BITS_PER_UNIT
);
8734 *modconst
= GEN_INT (imm
);
8738 unsigned HOST_WIDE_INT imm
= 0;
8740 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8741 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8743 *modconst
= GEN_INT (imm
);
8751 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8752 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8753 float elements), and a modified constant (whatever should be output for a
8754 VMOV) in *MODCONST. */
8757 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
8758 rtx
*modconst
, int *elementwidth
)
8762 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
8768 *modconst
= tmpconst
;
8771 *elementwidth
= tmpwidth
;
8776 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8777 the immediate is valid, write a constant suitable for using as an operand
8778 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8779 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8782 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
8783 rtx
*modconst
, int *elementwidth
)
8787 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
8789 if (retval
< 0 || retval
> 5)
8793 *modconst
= tmpconst
;
8796 *elementwidth
= tmpwidth
;
8801 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
8802 the immediate is valid, write a constant suitable for using as an operand
8803 to VSHR/VSHL to *MODCONST and the corresponding element width to
8804 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
8805 because they have different limitations. */
8808 neon_immediate_valid_for_shift (rtx op
, enum machine_mode mode
,
8809 rtx
*modconst
, int *elementwidth
,
8812 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8813 unsigned int n_elts
= CONST_VECTOR_NUNITS (op
), i
;
8814 unsigned HOST_WIDE_INT last_elt
= 0;
8815 unsigned HOST_WIDE_INT maxshift
;
8817 /* Split vector constant out into a byte vector. */
8818 for (i
= 0; i
< n_elts
; i
++)
8820 rtx el
= CONST_VECTOR_ELT (op
, i
);
8821 unsigned HOST_WIDE_INT elpart
;
8823 if (GET_CODE (el
) == CONST_INT
)
8824 elpart
= INTVAL (el
);
8825 else if (GET_CODE (el
) == CONST_DOUBLE
)
8830 if (i
!= 0 && elpart
!= last_elt
)
8836 /* Shift less than element size. */
8837 maxshift
= innersize
* 8;
8841 /* Left shift immediate value can be from 0 to <size>-1. */
8842 if (last_elt
>= maxshift
)
8847 /* Right shift immediate value can be from 1 to <size>. */
8848 if (last_elt
== 0 || last_elt
> maxshift
)
8853 *elementwidth
= innersize
* 8;
8856 *modconst
= CONST_VECTOR_ELT (op
, 0);
8861 /* Return a string suitable for output of Neon immediate logic operation
8865 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
8866 int inverse
, int quad
)
8868 int width
, is_valid
;
8869 static char templ
[40];
8871 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
8873 gcc_assert (is_valid
!= 0);
8876 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
8878 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
8883 /* Return a string suitable for output of Neon immediate shift operation
8884 (VSHR or VSHL) MNEM. */
8887 neon_output_shift_immediate (const char *mnem
, char sign
, rtx
*op2
,
8888 enum machine_mode mode
, int quad
,
8891 int width
, is_valid
;
8892 static char templ
[40];
8894 is_valid
= neon_immediate_valid_for_shift (*op2
, mode
, op2
, &width
, isleftshift
);
8895 gcc_assert (is_valid
!= 0);
8898 sprintf (templ
, "%s.%c%d\t%%q0, %%q1, %%2", mnem
, sign
, width
);
8900 sprintf (templ
, "%s.%c%d\t%%P0, %%P1, %%2", mnem
, sign
, width
);
8905 /* Output a sequence of pairwise operations to implement a reduction.
8906 NOTE: We do "too much work" here, because pairwise operations work on two
8907 registers-worth of operands in one go. Unfortunately we can't exploit those
8908 extra calculations to do the full operation in fewer steps, I don't think.
8909 Although all vector elements of the result but the first are ignored, we
8910 actually calculate the same result in each of the elements. An alternative
8911 such as initially loading a vector with zero to use as each of the second
8912 operands would use up an additional register and take an extra instruction,
8913 for no particular gain. */
8916 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
8917 rtx (*reduc
) (rtx
, rtx
, rtx
))
8919 enum machine_mode inner
= GET_MODE_INNER (mode
);
8920 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
8923 for (i
= parts
/ 2; i
>= 1; i
/= 2)
8925 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
8926 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
8931 /* If VALS is a vector constant that can be loaded into a register
8932 using VDUP, generate instructions to do so and return an RTX to
8933 assign to the register. Otherwise return NULL_RTX. */
8936 neon_vdup_constant (rtx vals
)
8938 enum machine_mode mode
= GET_MODE (vals
);
8939 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8940 int n_elts
= GET_MODE_NUNITS (mode
);
8941 bool all_same
= true;
8945 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
8948 for (i
= 0; i
< n_elts
; ++i
)
8950 x
= XVECEXP (vals
, 0, i
);
8951 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8956 /* The elements are not all the same. We could handle repeating
8957 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8958 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8962 /* We can load this constant by using VDUP and a constant in a
8963 single ARM register. This will be cheaper than a vector
8966 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8967 return gen_rtx_VEC_DUPLICATE (mode
, x
);
8970 /* Generate code to load VALS, which is a PARALLEL containing only
8971 constants (for vec_init) or CONST_VECTOR, efficiently into a
8972 register. Returns an RTX to copy into the register, or NULL_RTX
8973 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8976 neon_make_constant (rtx vals
)
8978 enum machine_mode mode
= GET_MODE (vals
);
8980 rtx const_vec
= NULL_RTX
;
8981 int n_elts
= GET_MODE_NUNITS (mode
);
8985 if (GET_CODE (vals
) == CONST_VECTOR
)
8987 else if (GET_CODE (vals
) == PARALLEL
)
8989 /* A CONST_VECTOR must contain only CONST_INTs and
8990 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8991 Only store valid constants in a CONST_VECTOR. */
8992 for (i
= 0; i
< n_elts
; ++i
)
8994 rtx x
= XVECEXP (vals
, 0, i
);
8995 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
8998 if (n_const
== n_elts
)
8999 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
9004 if (const_vec
!= NULL
9005 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
9006 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
9008 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
9009 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
9010 pipeline cycle; creating the constant takes one or two ARM
9013 else if (const_vec
!= NULL_RTX
)
9014 /* Load from constant pool. On Cortex-A8 this takes two cycles
9015 (for either double or quad vectors). We can not take advantage
9016 of single-cycle VLD1 because we need a PC-relative addressing
9020 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9021 We can not construct an initializer. */
9025 /* Initialize vector TARGET to VALS. */
9028 neon_expand_vector_init (rtx target
, rtx vals
)
9030 enum machine_mode mode
= GET_MODE (target
);
9031 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
9032 int n_elts
= GET_MODE_NUNITS (mode
);
9033 int n_var
= 0, one_var
= -1;
9034 bool all_same
= true;
9038 for (i
= 0; i
< n_elts
; ++i
)
9040 x
= XVECEXP (vals
, 0, i
);
9041 if (!CONSTANT_P (x
))
9042 ++n_var
, one_var
= i
;
9044 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
9050 rtx constant
= neon_make_constant (vals
);
9051 if (constant
!= NULL_RTX
)
9053 emit_move_insn (target
, constant
);
9058 /* Splat a single non-constant element if we can. */
9059 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
9061 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
9062 emit_insn (gen_rtx_SET (VOIDmode
, target
,
9063 gen_rtx_VEC_DUPLICATE (mode
, x
)));
9067 /* One field is non-constant. Load constant then overwrite varying
9068 field. This is more efficient than using the stack. */
9071 rtx copy
= copy_rtx (vals
);
9072 rtx index
= GEN_INT (one_var
);
9074 /* Load constant part of vector, substitute neighboring value for
9076 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
9077 neon_expand_vector_init (target
, copy
);
9079 /* Insert variable. */
9080 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
9084 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
9087 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
9090 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
9093 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
9096 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
9099 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
9102 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
9105 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
9108 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
9116 /* Construct the vector in memory one field at a time
9117 and load the whole vector. */
9118 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), 0);
9119 for (i
= 0; i
< n_elts
; i
++)
9120 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
9121 i
* GET_MODE_SIZE (inner_mode
)),
9122 XVECEXP (vals
, 0, i
));
9123 emit_move_insn (target
, mem
);
9126 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
9127 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
9128 reported source locations are bogus. */
9131 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
9136 gcc_assert (GET_CODE (operand
) == CONST_INT
);
9138 lane
= INTVAL (operand
);
9140 if (lane
< low
|| lane
>= high
)
9144 /* Bounds-check lanes. */
9147 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
9149 bounds_check (operand
, low
, high
, "lane out of range");
9152 /* Bounds-check constants. */
9155 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
9157 bounds_check (operand
, low
, high
, "constant out of range");
9161 neon_element_bits (enum machine_mode mode
)
9164 return GET_MODE_BITSIZE (mode
);
9166 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
9170 /* Predicates for `match_operand' and `match_operator'. */
9172 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
9174 cirrus_memory_offset (rtx op
)
9176 /* Reject eliminable registers. */
9177 if (! (reload_in_progress
|| reload_completed
)
9178 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9179 || reg_mentioned_p (arg_pointer_rtx
, op
)
9180 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9181 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9182 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9183 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9186 if (GET_CODE (op
) == MEM
)
9192 /* Match: (mem (reg)). */
9193 if (GET_CODE (ind
) == REG
)
9199 if (GET_CODE (ind
) == PLUS
9200 && GET_CODE (XEXP (ind
, 0)) == REG
9201 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
9202 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
)
9209 /* Return TRUE if OP is a valid coprocessor memory address pattern.
9210 WB is true if full writeback address modes are allowed and is false
9211 if limited writeback address modes (POST_INC and PRE_DEC) are
9215 arm_coproc_mem_operand (rtx op
, bool wb
)
9219 /* Reject eliminable registers. */
9220 if (! (reload_in_progress
|| reload_completed
)
9221 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9222 || reg_mentioned_p (arg_pointer_rtx
, op
)
9223 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9224 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9225 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9226 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9229 /* Constants are converted into offsets from labels. */
9230 if (GET_CODE (op
) != MEM
)
9235 if (reload_completed
9236 && (GET_CODE (ind
) == LABEL_REF
9237 || (GET_CODE (ind
) == CONST
9238 && GET_CODE (XEXP (ind
, 0)) == PLUS
9239 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9240 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
9243 /* Match: (mem (reg)). */
9244 if (GET_CODE (ind
) == REG
)
9245 return arm_address_register_rtx_p (ind
, 0);
9247 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
9248 acceptable in any case (subject to verification by
9249 arm_address_register_rtx_p). We need WB to be true to accept
9250 PRE_INC and POST_DEC. */
9251 if (GET_CODE (ind
) == POST_INC
9252 || GET_CODE (ind
) == PRE_DEC
9254 && (GET_CODE (ind
) == PRE_INC
9255 || GET_CODE (ind
) == POST_DEC
)))
9256 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9259 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
9260 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
9261 && GET_CODE (XEXP (ind
, 1)) == PLUS
9262 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
9263 ind
= XEXP (ind
, 1);
9268 if (GET_CODE (ind
) == PLUS
9269 && GET_CODE (XEXP (ind
, 0)) == REG
9270 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
9271 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
9272 && INTVAL (XEXP (ind
, 1)) > -1024
9273 && INTVAL (XEXP (ind
, 1)) < 1024
9274 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
9280 /* Return TRUE if OP is a memory operand which we can load or store a vector
9281 to/from. TYPE is one of the following values:
9282 0 - Vector load/stor (vldr)
9283 1 - Core registers (ldm)
9284 2 - Element/structure loads (vld1)
9287 neon_vector_mem_operand (rtx op
, int type
)
9291 /* Reject eliminable registers. */
9292 if (! (reload_in_progress
|| reload_completed
)
9293 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9294 || reg_mentioned_p (arg_pointer_rtx
, op
)
9295 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9296 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9297 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9298 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9301 /* Constants are converted into offsets from labels. */
9302 if (GET_CODE (op
) != MEM
)
9307 if (reload_completed
9308 && (GET_CODE (ind
) == LABEL_REF
9309 || (GET_CODE (ind
) == CONST
9310 && GET_CODE (XEXP (ind
, 0)) == PLUS
9311 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9312 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
9315 /* Match: (mem (reg)). */
9316 if (GET_CODE (ind
) == REG
)
9317 return arm_address_register_rtx_p (ind
, 0);
9319 /* Allow post-increment with Neon registers. */
9320 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
9321 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
9322 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9324 /* FIXME: vld1 allows register post-modify. */
9330 && GET_CODE (ind
) == PLUS
9331 && GET_CODE (XEXP (ind
, 0)) == REG
9332 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
9333 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
9334 && INTVAL (XEXP (ind
, 1)) > -1024
9335 && INTVAL (XEXP (ind
, 1)) < 1016
9336 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
9342 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9345 neon_struct_mem_operand (rtx op
)
9349 /* Reject eliminable registers. */
9350 if (! (reload_in_progress
|| reload_completed
)
9351 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9352 || reg_mentioned_p (arg_pointer_rtx
, op
)
9353 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9354 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9355 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9356 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9359 /* Constants are converted into offsets from labels. */
9360 if (GET_CODE (op
) != MEM
)
9365 if (reload_completed
9366 && (GET_CODE (ind
) == LABEL_REF
9367 || (GET_CODE (ind
) == CONST
9368 && GET_CODE (XEXP (ind
, 0)) == PLUS
9369 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9370 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
9373 /* Match: (mem (reg)). */
9374 if (GET_CODE (ind
) == REG
)
9375 return arm_address_register_rtx_p (ind
, 0);
9377 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
9378 if (GET_CODE (ind
) == POST_INC
9379 || GET_CODE (ind
) == PRE_DEC
)
9380 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9385 /* Return true if X is a register that will be eliminated later on. */
9387 arm_eliminable_register (rtx x
)
9389 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
9390 || REGNO (x
) == ARG_POINTER_REGNUM
9391 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
9392 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
9395 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9396 coprocessor registers. Otherwise return NO_REGS. */
9399 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
9403 if (!TARGET_NEON_FP16
)
9404 return GENERAL_REGS
;
9405 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2))
9407 return GENERAL_REGS
;
9410 /* The neon move patterns handle all legitimate vector and struct
9413 && (MEM_P (x
) || GET_CODE (x
) == CONST_VECTOR
)
9414 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
9415 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
9416 || VALID_NEON_STRUCT_MODE (mode
)))
9419 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
9422 return GENERAL_REGS
;
9425 /* Values which must be returned in the most-significant end of the return
9429 arm_return_in_msb (const_tree valtype
)
9431 return (TARGET_AAPCS_BASED
9433 && (AGGREGATE_TYPE_P (valtype
)
9434 || TREE_CODE (valtype
) == COMPLEX_TYPE
9435 || FIXED_POINT_TYPE_P (valtype
)));
9438 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9439 Use by the Cirrus Maverick code which has to workaround
9440 a hardware bug triggered by such instructions. */
9442 arm_memory_load_p (rtx insn
)
9444 rtx body
, lhs
, rhs
;;
9446 if (insn
== NULL_RTX
|| GET_CODE (insn
) != INSN
)
9449 body
= PATTERN (insn
);
9451 if (GET_CODE (body
) != SET
)
9454 lhs
= XEXP (body
, 0);
9455 rhs
= XEXP (body
, 1);
9457 lhs
= REG_OR_SUBREG_RTX (lhs
);
9459 /* If the destination is not a general purpose
9460 register we do not have to worry. */
9461 if (GET_CODE (lhs
) != REG
9462 || REGNO_REG_CLASS (REGNO (lhs
)) != GENERAL_REGS
)
9465 /* As well as loads from memory we also have to react
9466 to loads of invalid constants which will be turned
9467 into loads from the minipool. */
9468 return (GET_CODE (rhs
) == MEM
9469 || GET_CODE (rhs
) == SYMBOL_REF
9470 || note_invalid_constants (insn
, -1, false));
9473 /* Return TRUE if INSN is a Cirrus instruction. */
9475 arm_cirrus_insn_p (rtx insn
)
9477 enum attr_cirrus attr
;
9479 /* get_attr cannot accept USE or CLOBBER. */
9481 || GET_CODE (insn
) != INSN
9482 || GET_CODE (PATTERN (insn
)) == USE
9483 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
9486 attr
= get_attr_cirrus (insn
);
9488 return attr
!= CIRRUS_NOT
;
9491 /* Cirrus reorg for invalid instruction combinations. */
9493 cirrus_reorg (rtx first
)
9495 enum attr_cirrus attr
;
9496 rtx body
= PATTERN (first
);
9500 /* Any branch must be followed by 2 non Cirrus instructions. */
9501 if (GET_CODE (first
) == JUMP_INSN
&& GET_CODE (body
) != RETURN
)
9504 t
= next_nonnote_insn (first
);
9506 if (arm_cirrus_insn_p (t
))
9509 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
9513 emit_insn_after (gen_nop (), first
);
9518 /* (float (blah)) is in parallel with a clobber. */
9519 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
9520 body
= XVECEXP (body
, 0, 0);
9522 if (GET_CODE (body
) == SET
)
9524 rtx lhs
= XEXP (body
, 0), rhs
= XEXP (body
, 1);
9526 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9527 be followed by a non Cirrus insn. */
9528 if (get_attr_cirrus (first
) == CIRRUS_DOUBLE
)
9530 if (arm_cirrus_insn_p (next_nonnote_insn (first
)))
9531 emit_insn_after (gen_nop (), first
);
9535 else if (arm_memory_load_p (first
))
9537 unsigned int arm_regno
;
9539 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9540 ldr/cfmv64hr combination where the Rd field is the same
9541 in both instructions must be split with a non Cirrus
9548 /* Get Arm register number for ldr insn. */
9549 if (GET_CODE (lhs
) == REG
)
9550 arm_regno
= REGNO (lhs
);
9553 gcc_assert (GET_CODE (rhs
) == REG
);
9554 arm_regno
= REGNO (rhs
);
9558 first
= next_nonnote_insn (first
);
9560 if (! arm_cirrus_insn_p (first
))
9563 body
= PATTERN (first
);
9565 /* (float (blah)) is in parallel with a clobber. */
9566 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0))
9567 body
= XVECEXP (body
, 0, 0);
9569 if (GET_CODE (body
) == FLOAT
)
9570 body
= XEXP (body
, 0);
9572 if (get_attr_cirrus (first
) == CIRRUS_MOVE
9573 && GET_CODE (XEXP (body
, 1)) == REG
9574 && arm_regno
== REGNO (XEXP (body
, 1)))
9575 emit_insn_after (gen_nop (), first
);
9581 /* get_attr cannot accept USE or CLOBBER. */
9583 || GET_CODE (first
) != INSN
9584 || GET_CODE (PATTERN (first
)) == USE
9585 || GET_CODE (PATTERN (first
)) == CLOBBER
)
9588 attr
= get_attr_cirrus (first
);
9590 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9591 must be followed by a non-coprocessor instruction. */
9592 if (attr
== CIRRUS_COMPARE
)
9596 t
= next_nonnote_insn (first
);
9598 if (arm_cirrus_insn_p (t
))
9601 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
9605 emit_insn_after (gen_nop (), first
);
9611 /* Return TRUE if X references a SYMBOL_REF. */
9613 symbol_mentioned_p (rtx x
)
9618 if (GET_CODE (x
) == SYMBOL_REF
)
9621 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9622 are constant offsets, not symbols. */
9623 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9626 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9628 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9634 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9635 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
9638 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
9645 /* Return TRUE if X references a LABEL_REF. */
9647 label_mentioned_p (rtx x
)
9652 if (GET_CODE (x
) == LABEL_REF
)
9655 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9656 instruction, but they are constant offsets, not symbols. */
9657 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9660 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9661 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9667 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9668 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
9671 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
9679 tls_mentioned_p (rtx x
)
9681 switch (GET_CODE (x
))
9684 return tls_mentioned_p (XEXP (x
, 0));
9687 if (XINT (x
, 1) == UNSPEC_TLS
)
9695 /* Must not copy any rtx that uses a pc-relative address. */
9698 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
9700 if (GET_CODE (*x
) == UNSPEC
9701 && XINT (*x
, 1) == UNSPEC_PIC_BASE
)
9707 arm_cannot_copy_insn_p (rtx insn
)
9709 /* The tls call insn cannot be copied, as it is paired with a data
9711 if (recog_memoized (insn
) == CODE_FOR_tlscall
)
9714 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
9720 enum rtx_code code
= GET_CODE (x
);
9737 /* Return 1 if memory locations are adjacent. */
9739 adjacent_mem_locations (rtx a
, rtx b
)
9741 /* We don't guarantee to preserve the order of these memory refs. */
9742 if (volatile_refs_p (a
) || volatile_refs_p (b
))
9745 if ((GET_CODE (XEXP (a
, 0)) == REG
9746 || (GET_CODE (XEXP (a
, 0)) == PLUS
9747 && GET_CODE (XEXP (XEXP (a
, 0), 1)) == CONST_INT
))
9748 && (GET_CODE (XEXP (b
, 0)) == REG
9749 || (GET_CODE (XEXP (b
, 0)) == PLUS
9750 && GET_CODE (XEXP (XEXP (b
, 0), 1)) == CONST_INT
)))
9752 HOST_WIDE_INT val0
= 0, val1
= 0;
9756 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
9758 reg0
= XEXP (XEXP (a
, 0), 0);
9759 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
9764 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
9766 reg1
= XEXP (XEXP (b
, 0), 0);
9767 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
9772 /* Don't accept any offset that will require multiple
9773 instructions to handle, since this would cause the
9774 arith_adjacentmem pattern to output an overlong sequence. */
9775 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
9778 /* Don't allow an eliminable register: register elimination can make
9779 the offset too large. */
9780 if (arm_eliminable_register (reg0
))
9783 val_diff
= val1
- val0
;
9787 /* If the target has load delay slots, then there's no benefit
9788 to using an ldm instruction unless the offset is zero and
9789 we are optimizing for size. */
9790 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
9791 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
9792 && (val_diff
== 4 || val_diff
== -4));
9795 return ((REGNO (reg0
) == REGNO (reg1
))
9796 && (val_diff
== 4 || val_diff
== -4));
9802 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9803 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9804 instruction. ADD_OFFSET is nonzero if the base address register needs
9805 to be modified with an add instruction before we can use it. */
9808 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
9809 int nops
, HOST_WIDE_INT add_offset
)
9811 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9812 if the offset isn't small enough. The reason 2 ldrs are faster
9813 is because these ARMs are able to do more than one cache access
9814 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9815 whilst the ARM8 has a double bandwidth cache. This means that
9816 these cores can do both an instruction fetch and a data fetch in
9817 a single cycle, so the trick of calculating the address into a
9818 scratch register (one of the result regs) and then doing a load
9819 multiple actually becomes slower (and no smaller in code size).
9820 That is the transformation
9822 ldr rd1, [rbase + offset]
9823 ldr rd2, [rbase + offset + 4]
9827 add rd1, rbase, offset
9828 ldmia rd1, {rd1, rd2}
9830 produces worse code -- '3 cycles + any stalls on rd2' instead of
9831 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9832 access per cycle, the first sequence could never complete in less
9833 than 6 cycles, whereas the ldm sequence would only take 5 and
9834 would make better use of sequential accesses if not hitting the
9837 We cheat here and test 'arm_ld_sched' which we currently know to
9838 only be true for the ARM8, ARM9 and StrongARM. If this ever
9839 changes, then the test below needs to be reworked. */
9840 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
9843 /* XScale has load-store double instructions, but they have stricter
9844 alignment requirements than load-store multiple, so we cannot
9847 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9848 the pipeline until completion.
9856 An ldr instruction takes 1-3 cycles, but does not block the
9865 Best case ldr will always win. However, the more ldr instructions
9866 we issue, the less likely we are to be able to schedule them well.
9867 Using ldr instructions also increases code size.
9869 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9870 for counts of 3 or 4 regs. */
9871 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
9876 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9877 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9878 an array ORDER which describes the sequence to use when accessing the
9879 offsets that produces an ascending order. In this sequence, each
9880 offset must be larger by exactly 4 than the previous one. ORDER[0]
9881 must have been filled in with the lowest offset by the caller.
9882 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9883 we use to verify that ORDER produces an ascending order of registers.
9884 Return true if it was possible to construct such an order, false if
9888 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
9892 for (i
= 1; i
< nops
; i
++)
9896 order
[i
] = order
[i
- 1];
9897 for (j
= 0; j
< nops
; j
++)
9898 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
9900 /* We must find exactly one offset that is higher than the
9901 previous one by 4. */
9902 if (order
[i
] != order
[i
- 1])
9906 if (order
[i
] == order
[i
- 1])
9908 /* The register numbers must be ascending. */
9909 if (unsorted_regs
!= NULL
9910 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
9916 /* Used to determine in a peephole whether a sequence of load
9917 instructions can be changed into a load-multiple instruction.
9918 NOPS is the number of separate load instructions we are examining. The
9919 first NOPS entries in OPERANDS are the destination registers, the
9920 next NOPS entries are memory operands. If this function is
9921 successful, *BASE is set to the common base register of the memory
9922 accesses; *LOAD_OFFSET is set to the first memory location's offset
9923 from that base register.
9924 REGS is an array filled in with the destination register numbers.
9925 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9926 insn numbers to an ascending order of stores. If CHECK_REGS is true,
9927 the sequence of registers in REGS matches the loads from ascending memory
9928 locations, and the function verifies that the register numbers are
9929 themselves ascending. If CHECK_REGS is false, the register numbers
9930 are stored in the order they are found in the operands. */
9932 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
9933 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
9935 int unsorted_regs
[MAX_LDM_STM_OPS
];
9936 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
9937 int order
[MAX_LDM_STM_OPS
];
9938 rtx base_reg_rtx
= NULL
;
9942 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9943 easily extended if required. */
9944 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
9946 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
9948 /* Loop over the operands and check that the memory references are
9949 suitable (i.e. immediate offsets from the same base register). At
9950 the same time, extract the target register, and the memory
9952 for (i
= 0; i
< nops
; i
++)
9957 /* Convert a subreg of a mem into the mem itself. */
9958 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9959 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9961 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9963 /* Don't reorder volatile memory references; it doesn't seem worth
9964 looking for the case where the order is ok anyway. */
9965 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9968 offset
= const0_rtx
;
9970 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9971 || (GET_CODE (reg
) == SUBREG
9972 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9973 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9974 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9976 || (GET_CODE (reg
) == SUBREG
9977 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9978 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9983 base_reg
= REGNO (reg
);
9985 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
9988 else if (base_reg
!= (int) REGNO (reg
))
9989 /* Not addressed from the same base register. */
9992 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
9993 ? REGNO (operands
[i
])
9994 : REGNO (SUBREG_REG (operands
[i
])));
9996 /* If it isn't an integer register, or if it overwrites the
9997 base register but isn't the last insn in the list, then
9998 we can't do this. */
9999 if (unsorted_regs
[i
] < 0
10000 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
10001 || unsorted_regs
[i
] > 14
10002 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
10005 unsorted_offsets
[i
] = INTVAL (offset
);
10006 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
10010 /* Not a suitable memory address. */
10014 /* All the useful information has now been extracted from the
10015 operands into unsorted_regs and unsorted_offsets; additionally,
10016 order[0] has been set to the lowest offset in the list. Sort
10017 the offsets into order, verifying that they are adjacent, and
10018 check that the register numbers are ascending. */
10019 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
10020 check_regs
? unsorted_regs
: NULL
))
10024 memcpy (saved_order
, order
, sizeof order
);
10030 for (i
= 0; i
< nops
; i
++)
10031 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
10033 *load_offset
= unsorted_offsets
[order
[0]];
10037 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
10040 if (unsorted_offsets
[order
[0]] == 0)
10041 ldm_case
= 1; /* ldmia */
10042 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
10043 ldm_case
= 2; /* ldmib */
10044 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
10045 ldm_case
= 3; /* ldmda */
10046 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
10047 ldm_case
= 4; /* ldmdb */
10048 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
10049 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
10054 if (!multiple_operation_profitable_p (false, nops
,
10056 ? unsorted_offsets
[order
[0]] : 0))
10062 /* Used to determine in a peephole whether a sequence of store instructions can
10063 be changed into a store-multiple instruction.
10064 NOPS is the number of separate store instructions we are examining.
10065 NOPS_TOTAL is the total number of instructions recognized by the peephole
10067 The first NOPS entries in OPERANDS are the source registers, the next
10068 NOPS entries are memory operands. If this function is successful, *BASE is
10069 set to the common base register of the memory accesses; *LOAD_OFFSET is set
10070 to the first memory location's offset from that base register. REGS is an
10071 array filled in with the source register numbers, REG_RTXS (if nonnull) is
10072 likewise filled with the corresponding rtx's.
10073 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
10074 numbers to an ascending order of stores.
10075 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
10076 from ascending memory locations, and the function verifies that the register
10077 numbers are themselves ascending. If CHECK_REGS is false, the register
10078 numbers are stored in the order they are found in the operands. */
10080 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
10081 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
10082 HOST_WIDE_INT
*load_offset
, bool check_regs
)
10084 int unsorted_regs
[MAX_LDM_STM_OPS
];
10085 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
10086 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
10087 int order
[MAX_LDM_STM_OPS
];
10089 rtx base_reg_rtx
= NULL
;
10092 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
10093 easily extended if required. */
10094 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
10096 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
10098 /* Loop over the operands and check that the memory references are
10099 suitable (i.e. immediate offsets from the same base register). At
10100 the same time, extract the target register, and the memory
10102 for (i
= 0; i
< nops
; i
++)
10107 /* Convert a subreg of a mem into the mem itself. */
10108 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
10109 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
10111 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
10113 /* Don't reorder volatile memory references; it doesn't seem worth
10114 looking for the case where the order is ok anyway. */
10115 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
10118 offset
= const0_rtx
;
10120 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
10121 || (GET_CODE (reg
) == SUBREG
10122 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
10123 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
10124 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
10126 || (GET_CODE (reg
) == SUBREG
10127 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
10128 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
10131 unsorted_reg_rtxs
[i
] = (GET_CODE (operands
[i
]) == REG
10132 ? operands
[i
] : SUBREG_REG (operands
[i
]));
10133 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
10137 base_reg
= REGNO (reg
);
10138 base_reg_rtx
= reg
;
10139 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
10142 else if (base_reg
!= (int) REGNO (reg
))
10143 /* Not addressed from the same base register. */
10146 /* If it isn't an integer register, then we can't do this. */
10147 if (unsorted_regs
[i
] < 0
10148 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
10149 || (TARGET_THUMB2
&& unsorted_regs
[i
] == base_reg
)
10150 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
10151 || unsorted_regs
[i
] > 14)
10154 unsorted_offsets
[i
] = INTVAL (offset
);
10155 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
10159 /* Not a suitable memory address. */
10163 /* All the useful information has now been extracted from the
10164 operands into unsorted_regs and unsorted_offsets; additionally,
10165 order[0] has been set to the lowest offset in the list. Sort
10166 the offsets into order, verifying that they are adjacent, and
10167 check that the register numbers are ascending. */
10168 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
10169 check_regs
? unsorted_regs
: NULL
))
10173 memcpy (saved_order
, order
, sizeof order
);
10179 for (i
= 0; i
< nops
; i
++)
10181 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
10183 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
10186 *load_offset
= unsorted_offsets
[order
[0]];
10190 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
10193 if (unsorted_offsets
[order
[0]] == 0)
10194 stm_case
= 1; /* stmia */
10195 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
10196 stm_case
= 2; /* stmib */
10197 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
10198 stm_case
= 3; /* stmda */
10199 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
10200 stm_case
= 4; /* stmdb */
10204 if (!multiple_operation_profitable_p (false, nops
, 0))
10210 /* Routines for use in generating RTL. */
10212 /* Generate a load-multiple instruction. COUNT is the number of loads in
10213 the instruction; REGS and MEMS are arrays containing the operands.
10214 BASEREG is the base register to be used in addressing the memory operands.
10215 WBACK_OFFSET is nonzero if the instruction should update the base
10219 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
10220 HOST_WIDE_INT wback_offset
)
10225 if (!multiple_operation_profitable_p (false, count
, 0))
10231 for (i
= 0; i
< count
; i
++)
10232 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
10234 if (wback_offset
!= 0)
10235 emit_move_insn (basereg
, plus_constant (basereg
, wback_offset
));
10237 seq
= get_insns ();
10243 result
= gen_rtx_PARALLEL (VOIDmode
,
10244 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
10245 if (wback_offset
!= 0)
10247 XVECEXP (result
, 0, 0)
10248 = gen_rtx_SET (VOIDmode
, basereg
,
10249 plus_constant (basereg
, wback_offset
));
10254 for (j
= 0; i
< count
; i
++, j
++)
10255 XVECEXP (result
, 0, i
)
10256 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
10261 /* Generate a store-multiple instruction. COUNT is the number of stores in
10262 the instruction; REGS and MEMS are arrays containing the operands.
10263 BASEREG is the base register to be used in addressing the memory operands.
10264 WBACK_OFFSET is nonzero if the instruction should update the base
10268 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
10269 HOST_WIDE_INT wback_offset
)
10274 if (GET_CODE (basereg
) == PLUS
)
10275 basereg
= XEXP (basereg
, 0);
10277 if (!multiple_operation_profitable_p (false, count
, 0))
10283 for (i
= 0; i
< count
; i
++)
10284 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
10286 if (wback_offset
!= 0)
10287 emit_move_insn (basereg
, plus_constant (basereg
, wback_offset
));
10289 seq
= get_insns ();
10295 result
= gen_rtx_PARALLEL (VOIDmode
,
10296 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
10297 if (wback_offset
!= 0)
10299 XVECEXP (result
, 0, 0)
10300 = gen_rtx_SET (VOIDmode
, basereg
,
10301 plus_constant (basereg
, wback_offset
));
10306 for (j
= 0; i
< count
; i
++, j
++)
10307 XVECEXP (result
, 0, i
)
10308 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
10313 /* Generate either a load-multiple or a store-multiple instruction. This
10314 function can be used in situations where we can start with a single MEM
10315 rtx and adjust its address upwards.
10316 COUNT is the number of operations in the instruction, not counting a
10317 possible update of the base register. REGS is an array containing the
10319 BASEREG is the base register to be used in addressing the memory operands,
10320 which are constructed from BASEMEM.
10321 WRITE_BACK specifies whether the generated instruction should include an
10322 update of the base register.
10323 OFFSETP is used to pass an offset to and from this function; this offset
10324 is not used when constructing the address (instead BASEMEM should have an
10325 appropriate offset in its address), it is used only for setting
10326 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10329 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
10330 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
10332 rtx mems
[MAX_LDM_STM_OPS
];
10333 HOST_WIDE_INT offset
= *offsetp
;
10336 gcc_assert (count
<= MAX_LDM_STM_OPS
);
10338 if (GET_CODE (basereg
) == PLUS
)
10339 basereg
= XEXP (basereg
, 0);
10341 for (i
= 0; i
< count
; i
++)
10343 rtx addr
= plus_constant (basereg
, i
* 4);
10344 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
10352 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
10353 write_back
? 4 * count
: 0);
10355 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
10356 write_back
? 4 * count
: 0);
10360 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10361 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10363 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
10368 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10369 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10371 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
10375 /* Called from a peephole2 expander to turn a sequence of loads into an
10376 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10377 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10378 is true if we can reorder the registers because they are used commutatively
10380 Returns true iff we could generate a new instruction. */
10383 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
10385 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10386 rtx mems
[MAX_LDM_STM_OPS
];
10387 int i
, j
, base_reg
;
10389 HOST_WIDE_INT offset
;
10390 int write_back
= FALSE
;
10394 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
10395 &base_reg
, &offset
, !sort_regs
);
10401 for (i
= 0; i
< nops
- 1; i
++)
10402 for (j
= i
+ 1; j
< nops
; j
++)
10403 if (regs
[i
] > regs
[j
])
10409 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10413 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
10414 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
10420 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
10421 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
10423 if (!TARGET_THUMB1
)
10425 base_reg
= regs
[0];
10426 base_reg_rtx
= newbase
;
10430 for (i
= 0; i
< nops
; i
++)
10432 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10433 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10436 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10437 write_back
? offset
+ i
* 4 : 0));
10441 /* Called from a peephole2 expander to turn a sequence of stores into an
10442 STM instruction. OPERANDS are the operands found by the peephole matcher;
10443 NOPS indicates how many separate stores we are trying to combine.
10444 Returns true iff we could generate a new instruction. */
10447 gen_stm_seq (rtx
*operands
, int nops
)
10450 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10451 rtx mems
[MAX_LDM_STM_OPS
];
10454 HOST_WIDE_INT offset
;
10455 int write_back
= FALSE
;
10458 bool base_reg_dies
;
10460 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
10461 mem_order
, &base_reg
, &offset
, true);
10466 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10468 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
10471 gcc_assert (base_reg_dies
);
10477 gcc_assert (base_reg_dies
);
10478 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10482 addr
= plus_constant (base_reg_rtx
, offset
);
10484 for (i
= 0; i
< nops
; i
++)
10486 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10487 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10490 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10491 write_back
? offset
+ i
* 4 : 0));
10495 /* Called from a peephole2 expander to turn a sequence of stores that are
10496 preceded by constant loads into an STM instruction. OPERANDS are the
10497 operands found by the peephole matcher; NOPS indicates how many
10498 separate stores we are trying to combine; there are 2 * NOPS
10499 instructions in the peephole.
10500 Returns true iff we could generate a new instruction. */
10503 gen_const_stm_seq (rtx
*operands
, int nops
)
10505 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
10506 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10507 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
10508 rtx mems
[MAX_LDM_STM_OPS
];
10511 HOST_WIDE_INT offset
;
10512 int write_back
= FALSE
;
10515 bool base_reg_dies
;
10517 HARD_REG_SET allocated
;
10519 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
10520 mem_order
, &base_reg
, &offset
, false);
10525 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
10527 /* If the same register is used more than once, try to find a free
10529 CLEAR_HARD_REG_SET (allocated
);
10530 for (i
= 0; i
< nops
; i
++)
10532 for (j
= i
+ 1; j
< nops
; j
++)
10533 if (regs
[i
] == regs
[j
])
10535 rtx t
= peep2_find_free_register (0, nops
* 2,
10536 TARGET_THUMB1
? "l" : "r",
10537 SImode
, &allocated
);
10541 regs
[i
] = REGNO (t
);
10545 /* Compute an ordering that maps the register numbers to an ascending
10548 for (i
= 0; i
< nops
; i
++)
10549 if (regs
[i
] < regs
[reg_order
[0]])
10552 for (i
= 1; i
< nops
; i
++)
10554 int this_order
= reg_order
[i
- 1];
10555 for (j
= 0; j
< nops
; j
++)
10556 if (regs
[j
] > regs
[reg_order
[i
- 1]]
10557 && (this_order
== reg_order
[i
- 1]
10558 || regs
[j
] < regs
[this_order
]))
10560 reg_order
[i
] = this_order
;
10563 /* Ensure that registers that must be live after the instruction end
10564 up with the correct value. */
10565 for (i
= 0; i
< nops
; i
++)
10567 int this_order
= reg_order
[i
];
10568 if ((this_order
!= mem_order
[i
]
10569 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
10570 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
10574 /* Load the constants. */
10575 for (i
= 0; i
< nops
; i
++)
10577 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
10578 sorted_regs
[i
] = regs
[reg_order
[i
]];
10579 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
10582 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10584 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
10587 gcc_assert (base_reg_dies
);
10593 gcc_assert (base_reg_dies
);
10594 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10598 addr
= plus_constant (base_reg_rtx
, offset
);
10600 for (i
= 0; i
< nops
; i
++)
10602 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10603 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10606 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
10607 write_back
? offset
+ i
* 4 : 0));
10612 arm_gen_movmemqi (rtx
*operands
)
10614 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
10615 HOST_WIDE_INT srcoffset
, dstoffset
;
10617 rtx src
, dst
, srcbase
, dstbase
;
10618 rtx part_bytes_reg
= NULL
;
10621 if (GET_CODE (operands
[2]) != CONST_INT
10622 || GET_CODE (operands
[3]) != CONST_INT
10623 || INTVAL (operands
[2]) > 64
10624 || INTVAL (operands
[3]) & 3)
10627 dstbase
= operands
[0];
10628 srcbase
= operands
[1];
10630 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
10631 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
10633 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
10634 out_words_to_go
= INTVAL (operands
[2]) / 4;
10635 last_bytes
= INTVAL (operands
[2]) & 3;
10636 dstoffset
= srcoffset
= 0;
10638 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
10639 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
10641 for (i
= 0; in_words_to_go
>= 2; i
+=4)
10643 if (in_words_to_go
> 4)
10644 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
10645 TRUE
, srcbase
, &srcoffset
));
10647 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
10648 src
, FALSE
, srcbase
,
10651 if (out_words_to_go
)
10653 if (out_words_to_go
> 4)
10654 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
10655 TRUE
, dstbase
, &dstoffset
));
10656 else if (out_words_to_go
!= 1)
10657 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
10658 out_words_to_go
, dst
,
10661 dstbase
, &dstoffset
));
10664 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
10665 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
10666 if (last_bytes
!= 0)
10668 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
10674 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
10675 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
10678 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10679 if (out_words_to_go
)
10683 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
10684 sreg
= copy_to_reg (mem
);
10686 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
10687 emit_move_insn (mem
, sreg
);
10690 gcc_assert (!in_words_to_go
); /* Sanity check */
10693 if (in_words_to_go
)
10695 gcc_assert (in_words_to_go
> 0);
10697 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
10698 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
10701 gcc_assert (!last_bytes
|| part_bytes_reg
);
10703 if (BYTES_BIG_ENDIAN
&& last_bytes
)
10705 rtx tmp
= gen_reg_rtx (SImode
);
10707 /* The bytes we want are in the top end of the word. */
10708 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
10709 GEN_INT (8 * (4 - last_bytes
))));
10710 part_bytes_reg
= tmp
;
10714 mem
= adjust_automodify_address (dstbase
, QImode
,
10715 plus_constant (dst
, last_bytes
- 1),
10716 dstoffset
+ last_bytes
- 1);
10717 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
10721 tmp
= gen_reg_rtx (SImode
);
10722 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
10723 part_bytes_reg
= tmp
;
10730 if (last_bytes
> 1)
10732 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
10733 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
10737 rtx tmp
= gen_reg_rtx (SImode
);
10738 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
10739 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
10740 part_bytes_reg
= tmp
;
10747 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
10748 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
10755 /* Select a dominance comparison mode if possible for a test of the general
10756 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10757 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10758 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10759 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10760 In all cases OP will be either EQ or NE, but we don't need to know which
10761 here. If we are unable to support a dominance comparison we return
10762 CC mode. This will then fail to match for the RTL expressions that
10763 generate this call. */
10765 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
10767 enum rtx_code cond1
, cond2
;
10770 /* Currently we will probably get the wrong result if the individual
10771 comparisons are not simple. This also ensures that it is safe to
10772 reverse a comparison if necessary. */
10773 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
10775 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
10779 /* The if_then_else variant of this tests the second condition if the
10780 first passes, but is true if the first fails. Reverse the first
10781 condition to get a true "inclusive-or" expression. */
10782 if (cond_or
== DOM_CC_NX_OR_Y
)
10783 cond1
= reverse_condition (cond1
);
10785 /* If the comparisons are not equal, and one doesn't dominate the other,
10786 then we can't do this. */
10788 && !comparison_dominates_p (cond1
, cond2
)
10789 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
10794 enum rtx_code temp
= cond1
;
10802 if (cond_or
== DOM_CC_X_AND_Y
)
10807 case EQ
: return CC_DEQmode
;
10808 case LE
: return CC_DLEmode
;
10809 case LEU
: return CC_DLEUmode
;
10810 case GE
: return CC_DGEmode
;
10811 case GEU
: return CC_DGEUmode
;
10812 default: gcc_unreachable ();
10816 if (cond_or
== DOM_CC_X_AND_Y
)
10828 gcc_unreachable ();
10832 if (cond_or
== DOM_CC_X_AND_Y
)
10844 gcc_unreachable ();
10848 if (cond_or
== DOM_CC_X_AND_Y
)
10849 return CC_DLTUmode
;
10854 return CC_DLTUmode
;
10856 return CC_DLEUmode
;
10860 gcc_unreachable ();
10864 if (cond_or
== DOM_CC_X_AND_Y
)
10865 return CC_DGTUmode
;
10870 return CC_DGTUmode
;
10872 return CC_DGEUmode
;
10876 gcc_unreachable ();
10879 /* The remaining cases only occur when both comparisons are the
10882 gcc_assert (cond1
== cond2
);
10886 gcc_assert (cond1
== cond2
);
10890 gcc_assert (cond1
== cond2
);
10894 gcc_assert (cond1
== cond2
);
10895 return CC_DLEUmode
;
10898 gcc_assert (cond1
== cond2
);
10899 return CC_DGEUmode
;
10902 gcc_unreachable ();
10907 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
10909 /* All floating point compares return CCFP if it is an equality
10910 comparison, and CCFPE otherwise. */
10911 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
10931 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10936 gcc_unreachable ();
10940 /* A compare with a shifted operand. Because of canonicalization, the
10941 comparison will have to be swapped when we emit the assembler. */
10942 if (GET_MODE (y
) == SImode
10943 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
10944 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
10945 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
10946 || GET_CODE (x
) == ROTATERT
))
10949 /* This operation is performed swapped, but since we only rely on the Z
10950 flag we don't need an additional mode. */
10951 if (GET_MODE (y
) == SImode
10952 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
10953 && GET_CODE (x
) == NEG
10954 && (op
== EQ
|| op
== NE
))
10957 /* This is a special case that is used by combine to allow a
10958 comparison of a shifted byte load to be split into a zero-extend
10959 followed by a comparison of the shifted integer (only valid for
10960 equalities and unsigned inequalities). */
10961 if (GET_MODE (x
) == SImode
10962 && GET_CODE (x
) == ASHIFT
10963 && GET_CODE (XEXP (x
, 1)) == CONST_INT
&& INTVAL (XEXP (x
, 1)) == 24
10964 && GET_CODE (XEXP (x
, 0)) == SUBREG
10965 && GET_CODE (SUBREG_REG (XEXP (x
, 0))) == MEM
10966 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
10967 && (op
== EQ
|| op
== NE
10968 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
10969 && GET_CODE (y
) == CONST_INT
)
10972 /* A construct for a conditional compare, if the false arm contains
10973 0, then both conditions must be true, otherwise either condition
10974 must be true. Not all conditions are possible, so CCmode is
10975 returned if it can't be done. */
10976 if (GET_CODE (x
) == IF_THEN_ELSE
10977 && (XEXP (x
, 2) == const0_rtx
10978 || XEXP (x
, 2) == const1_rtx
)
10979 && COMPARISON_P (XEXP (x
, 0))
10980 && COMPARISON_P (XEXP (x
, 1)))
10981 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10982 INTVAL (XEXP (x
, 2)));
10984 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10985 if (GET_CODE (x
) == AND
10986 && (op
== EQ
|| op
== NE
)
10987 && COMPARISON_P (XEXP (x
, 0))
10988 && COMPARISON_P (XEXP (x
, 1)))
10989 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10992 if (GET_CODE (x
) == IOR
10993 && (op
== EQ
|| op
== NE
)
10994 && COMPARISON_P (XEXP (x
, 0))
10995 && COMPARISON_P (XEXP (x
, 1)))
10996 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10999 /* An operation (on Thumb) where we want to test for a single bit.
11000 This is done by shifting that bit up into the top bit of a
11001 scratch register; we can then branch on the sign bit. */
11003 && GET_MODE (x
) == SImode
11004 && (op
== EQ
|| op
== NE
)
11005 && GET_CODE (x
) == ZERO_EXTRACT
11006 && XEXP (x
, 1) == const1_rtx
)
11009 /* An operation that sets the condition codes as a side-effect, the
11010 V flag is not set correctly, so we can only use comparisons where
11011 this doesn't matter. (For LT and GE we can use "mi" and "pl"
11013 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
11014 if (GET_MODE (x
) == SImode
11016 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
11017 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
11018 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
11019 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
11020 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
11021 || GET_CODE (x
) == LSHIFTRT
11022 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
11023 || GET_CODE (x
) == ROTATERT
11024 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
11025 return CC_NOOVmode
;
11027 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
11030 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
11031 && GET_CODE (x
) == PLUS
11032 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
11035 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
11037 /* To keep things simple, always use the Cirrus cfcmp64 if it is
11039 if (TARGET_ARM
&& TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
11046 /* A DImode comparison against zero can be implemented by
11047 or'ing the two halves together. */
11048 if (y
== const0_rtx
)
11051 /* We can do an equality test in three Thumb instructions. */
11061 /* DImode unsigned comparisons can be implemented by cmp +
11062 cmpeq without a scratch register. Not worth doing in
11073 /* DImode signed and unsigned comparisons can be implemented
11074 by cmp + sbcs with a scratch register, but that does not
11075 set the Z flag - we must reverse GT/LE/GTU/LEU. */
11076 gcc_assert (op
!= EQ
&& op
!= NE
);
11080 gcc_unreachable ();
11087 /* X and Y are two things to compare using CODE. Emit the compare insn and
11088 return the rtx for register 0 in the proper mode. FP means this is a
11089 floating point compare: I don't think that it is needed on the arm. */
11091 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
)
11093 enum machine_mode mode
;
11095 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
11097 /* We might have X as a constant, Y as a register because of the predicates
11098 used for cmpdi. If so, force X to a register here. */
11099 if (dimode_comparison
&& !REG_P (x
))
11100 x
= force_reg (DImode
, x
);
11102 mode
= SELECT_CC_MODE (code
, x
, y
);
11103 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
11105 if (dimode_comparison
11106 && !(TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
11107 && mode
!= CC_CZmode
)
11111 /* To compare two non-zero values for equality, XOR them and
11112 then compare against zero. Not used for ARM mode; there
11113 CC_CZmode is cheaper. */
11114 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
11116 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
11119 /* A scratch register is required. */
11120 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (SImode
));
11121 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
11122 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
11125 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
11130 /* Generate a sequence of insns that will generate the correct return
11131 address mask depending on the physical architecture that the program
11134 arm_gen_return_addr_mask (void)
11136 rtx reg
= gen_reg_rtx (Pmode
);
11138 emit_insn (gen_return_addr_mask (reg
));
11143 arm_reload_in_hi (rtx
*operands
)
11145 rtx ref
= operands
[1];
11147 HOST_WIDE_INT offset
= 0;
11149 if (GET_CODE (ref
) == SUBREG
)
11151 offset
= SUBREG_BYTE (ref
);
11152 ref
= SUBREG_REG (ref
);
11155 if (GET_CODE (ref
) == REG
)
11157 /* We have a pseudo which has been spilt onto the stack; there
11158 are two cases here: the first where there is a simple
11159 stack-slot replacement and a second where the stack-slot is
11160 out of range, or is used as a subreg. */
11161 if (reg_equiv_mem (REGNO (ref
)))
11163 ref
= reg_equiv_mem (REGNO (ref
));
11164 base
= find_replacement (&XEXP (ref
, 0));
11167 /* The slot is out of range, or was dressed up in a SUBREG. */
11168 base
= reg_equiv_address (REGNO (ref
));
11171 base
= find_replacement (&XEXP (ref
, 0));
11173 /* Handle the case where the address is too complex to be offset by 1. */
11174 if (GET_CODE (base
) == MINUS
11175 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
11177 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11179 emit_set_insn (base_plus
, base
);
11182 else if (GET_CODE (base
) == PLUS
)
11184 /* The addend must be CONST_INT, or we would have dealt with it above. */
11185 HOST_WIDE_INT hi
, lo
;
11187 offset
+= INTVAL (XEXP (base
, 1));
11188 base
= XEXP (base
, 0);
11190 /* Rework the address into a legal sequence of insns. */
11191 /* Valid range for lo is -4095 -> 4095 */
11194 : -((-offset
) & 0xfff));
11196 /* Corner case, if lo is the max offset then we would be out of range
11197 once we have added the additional 1 below, so bump the msb into the
11198 pre-loading insn(s). */
11202 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
11203 ^ (HOST_WIDE_INT
) 0x80000000)
11204 - (HOST_WIDE_INT
) 0x80000000);
11206 gcc_assert (hi
+ lo
== offset
);
11210 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11212 /* Get the base address; addsi3 knows how to handle constants
11213 that require more than one insn. */
11214 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
11220 /* Operands[2] may overlap operands[0] (though it won't overlap
11221 operands[1]), that's why we asked for a DImode reg -- so we can
11222 use the bit that does not overlap. */
11223 if (REGNO (operands
[2]) == REGNO (operands
[0]))
11224 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11226 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
11228 emit_insn (gen_zero_extendqisi2 (scratch
,
11229 gen_rtx_MEM (QImode
,
11230 plus_constant (base
,
11232 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11233 gen_rtx_MEM (QImode
,
11234 plus_constant (base
,
11236 if (!BYTES_BIG_ENDIAN
)
11237 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11238 gen_rtx_IOR (SImode
,
11241 gen_rtx_SUBREG (SImode
, operands
[0], 0),
11245 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
11246 gen_rtx_IOR (SImode
,
11247 gen_rtx_ASHIFT (SImode
, scratch
,
11249 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
11252 /* Handle storing a half-word to memory during reload by synthesizing as two
11253 byte stores. Take care not to clobber the input values until after we
11254 have moved them somewhere safe. This code assumes that if the DImode
11255 scratch in operands[2] overlaps either the input value or output address
11256 in some way, then that value must die in this insn (we absolutely need
11257 two scratch registers for some corner cases). */
11259 arm_reload_out_hi (rtx
*operands
)
11261 rtx ref
= operands
[0];
11262 rtx outval
= operands
[1];
11264 HOST_WIDE_INT offset
= 0;
11266 if (GET_CODE (ref
) == SUBREG
)
11268 offset
= SUBREG_BYTE (ref
);
11269 ref
= SUBREG_REG (ref
);
11272 if (GET_CODE (ref
) == REG
)
11274 /* We have a pseudo which has been spilt onto the stack; there
11275 are two cases here: the first where there is a simple
11276 stack-slot replacement and a second where the stack-slot is
11277 out of range, or is used as a subreg. */
11278 if (reg_equiv_mem (REGNO (ref
)))
11280 ref
= reg_equiv_mem (REGNO (ref
));
11281 base
= find_replacement (&XEXP (ref
, 0));
11284 /* The slot is out of range, or was dressed up in a SUBREG. */
11285 base
= reg_equiv_address (REGNO (ref
));
11288 base
= find_replacement (&XEXP (ref
, 0));
11290 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
11292 /* Handle the case where the address is too complex to be offset by 1. */
11293 if (GET_CODE (base
) == MINUS
11294 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
11296 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11298 /* Be careful not to destroy OUTVAL. */
11299 if (reg_overlap_mentioned_p (base_plus
, outval
))
11301 /* Updating base_plus might destroy outval, see if we can
11302 swap the scratch and base_plus. */
11303 if (!reg_overlap_mentioned_p (scratch
, outval
))
11306 scratch
= base_plus
;
11311 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
11313 /* Be conservative and copy OUTVAL into the scratch now,
11314 this should only be necessary if outval is a subreg
11315 of something larger than a word. */
11316 /* XXX Might this clobber base? I can't see how it can,
11317 since scratch is known to overlap with OUTVAL, and
11318 must be wider than a word. */
11319 emit_insn (gen_movhi (scratch_hi
, outval
));
11320 outval
= scratch_hi
;
11324 emit_set_insn (base_plus
, base
);
11327 else if (GET_CODE (base
) == PLUS
)
11329 /* The addend must be CONST_INT, or we would have dealt with it above. */
11330 HOST_WIDE_INT hi
, lo
;
11332 offset
+= INTVAL (XEXP (base
, 1));
11333 base
= XEXP (base
, 0);
11335 /* Rework the address into a legal sequence of insns. */
11336 /* Valid range for lo is -4095 -> 4095 */
11339 : -((-offset
) & 0xfff));
11341 /* Corner case, if lo is the max offset then we would be out of range
11342 once we have added the additional 1 below, so bump the msb into the
11343 pre-loading insn(s). */
11347 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
11348 ^ (HOST_WIDE_INT
) 0x80000000)
11349 - (HOST_WIDE_INT
) 0x80000000);
11351 gcc_assert (hi
+ lo
== offset
);
11355 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11357 /* Be careful not to destroy OUTVAL. */
11358 if (reg_overlap_mentioned_p (base_plus
, outval
))
11360 /* Updating base_plus might destroy outval, see if we
11361 can swap the scratch and base_plus. */
11362 if (!reg_overlap_mentioned_p (scratch
, outval
))
11365 scratch
= base_plus
;
11370 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
11372 /* Be conservative and copy outval into scratch now,
11373 this should only be necessary if outval is a
11374 subreg of something larger than a word. */
11375 /* XXX Might this clobber base? I can't see how it
11376 can, since scratch is known to overlap with
11378 emit_insn (gen_movhi (scratch_hi
, outval
));
11379 outval
= scratch_hi
;
11383 /* Get the base address; addsi3 knows how to handle constants
11384 that require more than one insn. */
11385 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
11391 if (BYTES_BIG_ENDIAN
)
11393 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
11394 plus_constant (base
, offset
+ 1)),
11395 gen_lowpart (QImode
, outval
)));
11396 emit_insn (gen_lshrsi3 (scratch
,
11397 gen_rtx_SUBREG (SImode
, outval
, 0),
11399 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
11400 gen_lowpart (QImode
, scratch
)));
11404 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
11405 gen_lowpart (QImode
, outval
)));
11406 emit_insn (gen_lshrsi3 (scratch
,
11407 gen_rtx_SUBREG (SImode
, outval
, 0),
11409 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
11410 plus_constant (base
, offset
+ 1)),
11411 gen_lowpart (QImode
, scratch
)));
11415 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11416 (padded to the size of a word) should be passed in a register. */
11419 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
11421 if (TARGET_AAPCS_BASED
)
11422 return must_pass_in_stack_var_size (mode
, type
);
11424 return must_pass_in_stack_var_size_or_pad (mode
, type
);
11428 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11429 Return true if an argument passed on the stack should be padded upwards,
11430 i.e. if the least-significant byte has useful data.
11431 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11432 aggregate types are placed in the lowest memory address. */
11435 arm_pad_arg_upward (enum machine_mode mode
, const_tree type
)
11437 if (!TARGET_AAPCS_BASED
)
11438 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
11440 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
11443 /* Half-float values are only passed to libcalls, not regular functions.
11444 They should be passed and returned as "short"s (see RTABI). To achieve
11445 that effect in big-endian mode, pad downwards so the value is passed in
11446 the least-significant end of the register. ??? This needs to be here
11447 rather than in arm_pad_reg_upward due to peculiarity in the handling of
11448 libcall arguments. */
11449 if (BYTES_BIG_ENDIAN
&& mode
== HFmode
)
11456 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11457 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11458 byte of the register has useful data, and return the opposite if the
11459 most significant byte does.
11460 For AAPCS, small aggregates and small complex types are always padded
11464 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
,
11465 tree type
, int first ATTRIBUTE_UNUSED
)
11467 if (TARGET_AAPCS_BASED
11468 && BYTES_BIG_ENDIAN
11469 && (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
11470 || FIXED_POINT_TYPE_P (type
))
11471 && int_size_in_bytes (type
) <= 4)
11474 /* Otherwise, use default padding. */
11475 return !BYTES_BIG_ENDIAN
;
11479 /* Print a symbolic form of X to the debug file, F. */
11481 arm_print_value (FILE *f
, rtx x
)
11483 switch (GET_CODE (x
))
11486 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
11490 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
11498 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
11500 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
11501 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
11509 fprintf (f
, "\"%s\"", XSTR (x
, 0));
11513 fprintf (f
, "`%s'", XSTR (x
, 0));
11517 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
11521 arm_print_value (f
, XEXP (x
, 0));
11525 arm_print_value (f
, XEXP (x
, 0));
11527 arm_print_value (f
, XEXP (x
, 1));
11535 fprintf (f
, "????");
11540 /* Routines for manipulation of the constant pool. */
11542 /* Arm instructions cannot load a large constant directly into a
11543 register; they have to come from a pc relative load. The constant
11544 must therefore be placed in the addressable range of the pc
11545 relative load. Depending on the precise pc relative load
11546 instruction the range is somewhere between 256 bytes and 4k. This
11547 means that we often have to dump a constant inside a function, and
11548 generate code to branch around it.
11550 It is important to minimize this, since the branches will slow
11551 things down and make the code larger.
11553 Normally we can hide the table after an existing unconditional
11554 branch so that there is no interruption of the flow, but in the
11555 worst case the code looks like this:
11573 We fix this by performing a scan after scheduling, which notices
11574 which instructions need to have their operands fetched from the
11575 constant table and builds the table.
11577 The algorithm starts by building a table of all the constants that
11578 need fixing up and all the natural barriers in the function (places
11579 where a constant table can be dropped without breaking the flow).
11580 For each fixup we note how far the pc-relative replacement will be
11581 able to reach and the offset of the instruction into the function.
11583 Having built the table we then group the fixes together to form
11584 tables that are as large as possible (subject to addressing
11585 constraints) and emit each table of constants after the last
11586 barrier that is within range of all the instructions in the group.
11587 If a group does not contain a barrier, then we forcibly create one
11588 by inserting a jump instruction into the flow. Once the table has
11589 been inserted, the insns are then modified to reference the
11590 relevant entry in the pool.
11592 Possible enhancements to the algorithm (not implemented) are:
11594 1) For some processors and object formats, there may be benefit in
11595 aligning the pools to the start of cache lines; this alignment
11596 would need to be taken into account when calculating addressability
11599 /* These typedefs are located at the start of this file, so that
11600 they can be used in the prototypes there. This comment is to
11601 remind readers of that fact so that the following structures
11602 can be understood more easily.
11604 typedef struct minipool_node Mnode;
11605 typedef struct minipool_fixup Mfix; */
11607 struct minipool_node
11609 /* Doubly linked chain of entries. */
11612 /* The maximum offset into the code that this entry can be placed. While
11613 pushing fixes for forward references, all entries are sorted in order
11614 of increasing max_address. */
11615 HOST_WIDE_INT max_address
;
11616 /* Similarly for an entry inserted for a backwards ref. */
11617 HOST_WIDE_INT min_address
;
11618 /* The number of fixes referencing this entry. This can become zero
11619 if we "unpush" an entry. In this case we ignore the entry when we
11620 come to emit the code. */
11622 /* The offset from the start of the minipool. */
11623 HOST_WIDE_INT offset
;
11624 /* The value in table. */
11626 /* The mode of value. */
11627 enum machine_mode mode
;
11628 /* The size of the value. With iWMMXt enabled
11629 sizes > 4 also imply an alignment of 8-bytes. */
11633 struct minipool_fixup
11637 HOST_WIDE_INT address
;
11639 enum machine_mode mode
;
11643 HOST_WIDE_INT forwards
;
11644 HOST_WIDE_INT backwards
;
11647 /* Fixes less than a word need padding out to a word boundary. */
11648 #define MINIPOOL_FIX_SIZE(mode) \
11649 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11651 static Mnode
* minipool_vector_head
;
11652 static Mnode
* minipool_vector_tail
;
11653 static rtx minipool_vector_label
;
11654 static int minipool_pad
;
11656 /* The linked list of all minipool fixes required for this function. */
11657 Mfix
* minipool_fix_head
;
11658 Mfix
* minipool_fix_tail
;
11659 /* The fix entry for the current minipool, once it has been placed. */
11660 Mfix
* minipool_barrier
;
11662 /* Determines if INSN is the start of a jump table. Returns the end
11663 of the TABLE or NULL_RTX. */
11665 is_jump_table (rtx insn
)
11669 if (jump_to_label_p (insn
)
11670 && ((table
= next_real_insn (JUMP_LABEL (insn
)))
11671 == next_real_insn (insn
))
11673 && GET_CODE (table
) == JUMP_INSN
11674 && (GET_CODE (PATTERN (table
)) == ADDR_VEC
11675 || GET_CODE (PATTERN (table
)) == ADDR_DIFF_VEC
))
11681 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11682 #define JUMP_TABLES_IN_TEXT_SECTION 0
11685 static HOST_WIDE_INT
11686 get_jump_table_size (rtx insn
)
11688 /* ADDR_VECs only take room if read-only data does into the text
11690 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
11692 rtx body
= PATTERN (insn
);
11693 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
11694 HOST_WIDE_INT size
;
11695 HOST_WIDE_INT modesize
;
11697 modesize
= GET_MODE_SIZE (GET_MODE (body
));
11698 size
= modesize
* XVECLEN (body
, elt
);
11702 /* Round up size of TBB table to a halfword boundary. */
11703 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
11706 /* No padding necessary for TBH. */
11709 /* Add two bytes for alignment on Thumb. */
11714 gcc_unreachable ();
11722 /* Move a minipool fix MP from its current location to before MAX_MP.
11723 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11724 constraints may need updating. */
11726 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
11727 HOST_WIDE_INT max_address
)
11729 /* The code below assumes these are different. */
11730 gcc_assert (mp
!= max_mp
);
11732 if (max_mp
== NULL
)
11734 if (max_address
< mp
->max_address
)
11735 mp
->max_address
= max_address
;
11739 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
11740 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
11742 mp
->max_address
= max_address
;
11744 /* Unlink MP from its current position. Since max_mp is non-null,
11745 mp->prev must be non-null. */
11746 mp
->prev
->next
= mp
->next
;
11747 if (mp
->next
!= NULL
)
11748 mp
->next
->prev
= mp
->prev
;
11750 minipool_vector_tail
= mp
->prev
;
11752 /* Re-insert it before MAX_MP. */
11754 mp
->prev
= max_mp
->prev
;
11757 if (mp
->prev
!= NULL
)
11758 mp
->prev
->next
= mp
;
11760 minipool_vector_head
= mp
;
11763 /* Save the new entry. */
11766 /* Scan over the preceding entries and adjust their addresses as
11768 while (mp
->prev
!= NULL
11769 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
11771 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
11778 /* Add a constant to the minipool for a forward reference. Returns the
11779 node added or NULL if the constant will not fit in this pool. */
11781 add_minipool_forward_ref (Mfix
*fix
)
11783 /* If set, max_mp is the first pool_entry that has a lower
11784 constraint than the one we are trying to add. */
11785 Mnode
* max_mp
= NULL
;
11786 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
11789 /* If the minipool starts before the end of FIX->INSN then this FIX
11790 can not be placed into the current pool. Furthermore, adding the
11791 new constant pool entry may cause the pool to start FIX_SIZE bytes
11793 if (minipool_vector_head
&&
11794 (fix
->address
+ get_attr_length (fix
->insn
)
11795 >= minipool_vector_head
->max_address
- fix
->fix_size
))
11798 /* Scan the pool to see if a constant with the same value has
11799 already been added. While we are doing this, also note the
11800 location where we must insert the constant if it doesn't already
11802 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11804 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
11805 && fix
->mode
== mp
->mode
11806 && (GET_CODE (fix
->value
) != CODE_LABEL
11807 || (CODE_LABEL_NUMBER (fix
->value
)
11808 == CODE_LABEL_NUMBER (mp
->value
)))
11809 && rtx_equal_p (fix
->value
, mp
->value
))
11811 /* More than one fix references this entry. */
11813 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
11816 /* Note the insertion point if necessary. */
11818 && mp
->max_address
> max_address
)
11821 /* If we are inserting an 8-bytes aligned quantity and
11822 we have not already found an insertion point, then
11823 make sure that all such 8-byte aligned quantities are
11824 placed at the start of the pool. */
11825 if (ARM_DOUBLEWORD_ALIGN
11827 && fix
->fix_size
>= 8
11828 && mp
->fix_size
< 8)
11831 max_address
= mp
->max_address
;
11835 /* The value is not currently in the minipool, so we need to create
11836 a new entry for it. If MAX_MP is NULL, the entry will be put on
11837 the end of the list since the placement is less constrained than
11838 any existing entry. Otherwise, we insert the new fix before
11839 MAX_MP and, if necessary, adjust the constraints on the other
11842 mp
->fix_size
= fix
->fix_size
;
11843 mp
->mode
= fix
->mode
;
11844 mp
->value
= fix
->value
;
11846 /* Not yet required for a backwards ref. */
11847 mp
->min_address
= -65536;
11849 if (max_mp
== NULL
)
11851 mp
->max_address
= max_address
;
11853 mp
->prev
= minipool_vector_tail
;
11855 if (mp
->prev
== NULL
)
11857 minipool_vector_head
= mp
;
11858 minipool_vector_label
= gen_label_rtx ();
11861 mp
->prev
->next
= mp
;
11863 minipool_vector_tail
= mp
;
11867 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
11868 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
11870 mp
->max_address
= max_address
;
11873 mp
->prev
= max_mp
->prev
;
11875 if (mp
->prev
!= NULL
)
11876 mp
->prev
->next
= mp
;
11878 minipool_vector_head
= mp
;
11881 /* Save the new entry. */
11884 /* Scan over the preceding entries and adjust their addresses as
11886 while (mp
->prev
!= NULL
11887 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
11889 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
11897 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
11898 HOST_WIDE_INT min_address
)
11900 HOST_WIDE_INT offset
;
11902 /* The code below assumes these are different. */
11903 gcc_assert (mp
!= min_mp
);
11905 if (min_mp
== NULL
)
11907 if (min_address
> mp
->min_address
)
11908 mp
->min_address
= min_address
;
11912 /* We will adjust this below if it is too loose. */
11913 mp
->min_address
= min_address
;
11915 /* Unlink MP from its current position. Since min_mp is non-null,
11916 mp->next must be non-null. */
11917 mp
->next
->prev
= mp
->prev
;
11918 if (mp
->prev
!= NULL
)
11919 mp
->prev
->next
= mp
->next
;
11921 minipool_vector_head
= mp
->next
;
11923 /* Reinsert it after MIN_MP. */
11925 mp
->next
= min_mp
->next
;
11927 if (mp
->next
!= NULL
)
11928 mp
->next
->prev
= mp
;
11930 minipool_vector_tail
= mp
;
11936 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11938 mp
->offset
= offset
;
11939 if (mp
->refcount
> 0)
11940 offset
+= mp
->fix_size
;
11942 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
11943 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
11949 /* Add a constant to the minipool for a backward reference. Returns the
11950 node added or NULL if the constant will not fit in this pool.
11952 Note that the code for insertion for a backwards reference can be
11953 somewhat confusing because the calculated offsets for each fix do
11954 not take into account the size of the pool (which is still under
11957 add_minipool_backward_ref (Mfix
*fix
)
11959 /* If set, min_mp is the last pool_entry that has a lower constraint
11960 than the one we are trying to add. */
11961 Mnode
*min_mp
= NULL
;
11962 /* This can be negative, since it is only a constraint. */
11963 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
11966 /* If we can't reach the current pool from this insn, or if we can't
11967 insert this entry at the end of the pool without pushing other
11968 fixes out of range, then we don't try. This ensures that we
11969 can't fail later on. */
11970 if (min_address
>= minipool_barrier
->address
11971 || (minipool_vector_tail
->min_address
+ fix
->fix_size
11972 >= minipool_barrier
->address
))
11975 /* Scan the pool to see if a constant with the same value has
11976 already been added. While we are doing this, also note the
11977 location where we must insert the constant if it doesn't already
11979 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
11981 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
11982 && fix
->mode
== mp
->mode
11983 && (GET_CODE (fix
->value
) != CODE_LABEL
11984 || (CODE_LABEL_NUMBER (fix
->value
)
11985 == CODE_LABEL_NUMBER (mp
->value
)))
11986 && rtx_equal_p (fix
->value
, mp
->value
)
11987 /* Check that there is enough slack to move this entry to the
11988 end of the table (this is conservative). */
11989 && (mp
->max_address
11990 > (minipool_barrier
->address
11991 + minipool_vector_tail
->offset
11992 + minipool_vector_tail
->fix_size
)))
11995 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
11998 if (min_mp
!= NULL
)
11999 mp
->min_address
+= fix
->fix_size
;
12002 /* Note the insertion point if necessary. */
12003 if (mp
->min_address
< min_address
)
12005 /* For now, we do not allow the insertion of 8-byte alignment
12006 requiring nodes anywhere but at the start of the pool. */
12007 if (ARM_DOUBLEWORD_ALIGN
12008 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
12013 else if (mp
->max_address
12014 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
12016 /* Inserting before this entry would push the fix beyond
12017 its maximum address (which can happen if we have
12018 re-located a forwards fix); force the new fix to come
12020 if (ARM_DOUBLEWORD_ALIGN
12021 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
12026 min_address
= mp
->min_address
+ fix
->fix_size
;
12029 /* Do not insert a non-8-byte aligned quantity before 8-byte
12030 aligned quantities. */
12031 else if (ARM_DOUBLEWORD_ALIGN
12032 && fix
->fix_size
< 8
12033 && mp
->fix_size
>= 8)
12036 min_address
= mp
->min_address
+ fix
->fix_size
;
12041 /* We need to create a new entry. */
12043 mp
->fix_size
= fix
->fix_size
;
12044 mp
->mode
= fix
->mode
;
12045 mp
->value
= fix
->value
;
12047 mp
->max_address
= minipool_barrier
->address
+ 65536;
12049 mp
->min_address
= min_address
;
12051 if (min_mp
== NULL
)
12054 mp
->next
= minipool_vector_head
;
12056 if (mp
->next
== NULL
)
12058 minipool_vector_tail
= mp
;
12059 minipool_vector_label
= gen_label_rtx ();
12062 mp
->next
->prev
= mp
;
12064 minipool_vector_head
= mp
;
12068 mp
->next
= min_mp
->next
;
12072 if (mp
->next
!= NULL
)
12073 mp
->next
->prev
= mp
;
12075 minipool_vector_tail
= mp
;
12078 /* Save the new entry. */
12086 /* Scan over the following entries and adjust their offsets. */
12087 while (mp
->next
!= NULL
)
12089 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
12090 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
12093 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
12095 mp
->next
->offset
= mp
->offset
;
12104 assign_minipool_offsets (Mfix
*barrier
)
12106 HOST_WIDE_INT offset
= 0;
12109 minipool_barrier
= barrier
;
12111 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12113 mp
->offset
= offset
;
12115 if (mp
->refcount
> 0)
12116 offset
+= mp
->fix_size
;
12120 /* Output the literal table */
12122 dump_minipool (rtx scan
)
12128 if (ARM_DOUBLEWORD_ALIGN
)
12129 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
12130 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
12137 fprintf (dump_file
,
12138 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
12139 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
12141 scan
= emit_label_after (gen_label_rtx (), scan
);
12142 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
12143 scan
= emit_label_after (minipool_vector_label
, scan
);
12145 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
12147 if (mp
->refcount
> 0)
12151 fprintf (dump_file
,
12152 ";; Offset %u, min %ld, max %ld ",
12153 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
12154 (unsigned long) mp
->max_address
);
12155 arm_print_value (dump_file
, mp
->value
);
12156 fputc ('\n', dump_file
);
12159 switch (mp
->fix_size
)
12161 #ifdef HAVE_consttable_1
12163 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
12167 #ifdef HAVE_consttable_2
12169 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
12173 #ifdef HAVE_consttable_4
12175 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
12179 #ifdef HAVE_consttable_8
12181 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
12185 #ifdef HAVE_consttable_16
12187 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
12192 gcc_unreachable ();
12200 minipool_vector_head
= minipool_vector_tail
= NULL
;
12201 scan
= emit_insn_after (gen_consttable_end (), scan
);
12202 scan
= emit_barrier_after (scan
);
12205 /* Return the cost of forcibly inserting a barrier after INSN. */
12207 arm_barrier_cost (rtx insn
)
12209 /* Basing the location of the pool on the loop depth is preferable,
12210 but at the moment, the basic block information seems to be
12211 corrupt by this stage of the compilation. */
12212 int base_cost
= 50;
12213 rtx next
= next_nonnote_insn (insn
);
12215 if (next
!= NULL
&& GET_CODE (next
) == CODE_LABEL
)
12218 switch (GET_CODE (insn
))
12221 /* It will always be better to place the table before the label, rather
12230 return base_cost
- 10;
12233 return base_cost
+ 10;
12237 /* Find the best place in the insn stream in the range
12238 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
12239 Create the barrier by inserting a jump and add a new fix entry for
12242 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
12244 HOST_WIDE_INT count
= 0;
12246 rtx from
= fix
->insn
;
12247 /* The instruction after which we will insert the jump. */
12248 rtx selected
= NULL
;
12250 /* The address at which the jump instruction will be placed. */
12251 HOST_WIDE_INT selected_address
;
12253 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
12254 rtx label
= gen_label_rtx ();
12256 selected_cost
= arm_barrier_cost (from
);
12257 selected_address
= fix
->address
;
12259 while (from
&& count
< max_count
)
12264 /* This code shouldn't have been called if there was a natural barrier
12266 gcc_assert (GET_CODE (from
) != BARRIER
);
12268 /* Count the length of this insn. */
12269 count
+= get_attr_length (from
);
12271 /* If there is a jump table, add its length. */
12272 tmp
= is_jump_table (from
);
12275 count
+= get_jump_table_size (tmp
);
12277 /* Jump tables aren't in a basic block, so base the cost on
12278 the dispatch insn. If we select this location, we will
12279 still put the pool after the table. */
12280 new_cost
= arm_barrier_cost (from
);
12282 if (count
< max_count
12283 && (!selected
|| new_cost
<= selected_cost
))
12286 selected_cost
= new_cost
;
12287 selected_address
= fix
->address
+ count
;
12290 /* Continue after the dispatch table. */
12291 from
= NEXT_INSN (tmp
);
12295 new_cost
= arm_barrier_cost (from
);
12297 if (count
< max_count
12298 && (!selected
|| new_cost
<= selected_cost
))
12301 selected_cost
= new_cost
;
12302 selected_address
= fix
->address
+ count
;
12305 from
= NEXT_INSN (from
);
12308 /* Make sure that we found a place to insert the jump. */
12309 gcc_assert (selected
);
12311 /* Make sure we do not split a call and its corresponding
12312 CALL_ARG_LOCATION note. */
12313 if (CALL_P (selected
))
12315 rtx next
= NEXT_INSN (selected
);
12316 if (next
&& NOTE_P (next
)
12317 && NOTE_KIND (next
) == NOTE_INSN_CALL_ARG_LOCATION
)
12321 /* Create a new JUMP_INSN that branches around a barrier. */
12322 from
= emit_jump_insn_after (gen_jump (label
), selected
);
12323 JUMP_LABEL (from
) = label
;
12324 barrier
= emit_barrier_after (from
);
12325 emit_label_after (label
, barrier
);
12327 /* Create a minipool barrier entry for the new barrier. */
12328 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
12329 new_fix
->insn
= barrier
;
12330 new_fix
->address
= selected_address
;
12331 new_fix
->next
= fix
->next
;
12332 fix
->next
= new_fix
;
12337 /* Record that there is a natural barrier in the insn stream at
12340 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
12342 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
12345 fix
->address
= address
;
12348 if (minipool_fix_head
!= NULL
)
12349 minipool_fix_tail
->next
= fix
;
12351 minipool_fix_head
= fix
;
12353 minipool_fix_tail
= fix
;
12356 /* Record INSN, which will need fixing up to load a value from the
12357 minipool. ADDRESS is the offset of the insn since the start of the
12358 function; LOC is a pointer to the part of the insn which requires
12359 fixing; VALUE is the constant that must be loaded, which is of type
12362 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
12363 enum machine_mode mode
, rtx value
)
12365 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
12368 fix
->address
= address
;
12371 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
12372 fix
->value
= value
;
12373 fix
->forwards
= get_attr_pool_range (insn
);
12374 fix
->backwards
= get_attr_neg_pool_range (insn
);
12375 fix
->minipool
= NULL
;
12377 /* If an insn doesn't have a range defined for it, then it isn't
12378 expecting to be reworked by this code. Better to stop now than
12379 to generate duff assembly code. */
12380 gcc_assert (fix
->forwards
|| fix
->backwards
);
12382 /* If an entry requires 8-byte alignment then assume all constant pools
12383 require 4 bytes of padding. Trying to do this later on a per-pool
12384 basis is awkward because existing pool entries have to be modified. */
12385 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
12390 fprintf (dump_file
,
12391 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12392 GET_MODE_NAME (mode
),
12393 INSN_UID (insn
), (unsigned long) address
,
12394 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
12395 arm_print_value (dump_file
, fix
->value
);
12396 fprintf (dump_file
, "\n");
12399 /* Add it to the chain of fixes. */
12402 if (minipool_fix_head
!= NULL
)
12403 minipool_fix_tail
->next
= fix
;
12405 minipool_fix_head
= fix
;
12407 minipool_fix_tail
= fix
;
12410 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12411 Returns the number of insns needed, or 99 if we don't know how to
12414 arm_const_double_inline_cost (rtx val
)
12416 rtx lowpart
, highpart
;
12417 enum machine_mode mode
;
12419 mode
= GET_MODE (val
);
12421 if (mode
== VOIDmode
)
12424 gcc_assert (GET_MODE_SIZE (mode
) == 8);
12426 lowpart
= gen_lowpart (SImode
, val
);
12427 highpart
= gen_highpart_mode (SImode
, mode
, val
);
12429 gcc_assert (GET_CODE (lowpart
) == CONST_INT
);
12430 gcc_assert (GET_CODE (highpart
) == CONST_INT
);
12432 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
12433 NULL_RTX
, NULL_RTX
, 0, 0)
12434 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
12435 NULL_RTX
, NULL_RTX
, 0, 0));
12438 /* Return true if it is worthwhile to split a 64-bit constant into two
12439 32-bit operations. This is the case if optimizing for size, or
12440 if we have load delay slots, or if one 32-bit part can be done with
12441 a single data operation. */
12443 arm_const_double_by_parts (rtx val
)
12445 enum machine_mode mode
= GET_MODE (val
);
12448 if (optimize_size
|| arm_ld_sched
)
12451 if (mode
== VOIDmode
)
12454 part
= gen_highpart_mode (SImode
, mode
, val
);
12456 gcc_assert (GET_CODE (part
) == CONST_INT
);
12458 if (const_ok_for_arm (INTVAL (part
))
12459 || const_ok_for_arm (~INTVAL (part
)))
12462 part
= gen_lowpart (SImode
, val
);
12464 gcc_assert (GET_CODE (part
) == CONST_INT
);
12466 if (const_ok_for_arm (INTVAL (part
))
12467 || const_ok_for_arm (~INTVAL (part
)))
12473 /* Return true if it is possible to inline both the high and low parts
12474 of a 64-bit constant into 32-bit data processing instructions. */
12476 arm_const_double_by_immediates (rtx val
)
12478 enum machine_mode mode
= GET_MODE (val
);
12481 if (mode
== VOIDmode
)
12484 part
= gen_highpart_mode (SImode
, mode
, val
);
12486 gcc_assert (GET_CODE (part
) == CONST_INT
);
12488 if (!const_ok_for_arm (INTVAL (part
)))
12491 part
= gen_lowpart (SImode
, val
);
12493 gcc_assert (GET_CODE (part
) == CONST_INT
);
12495 if (!const_ok_for_arm (INTVAL (part
)))
12501 /* Scan INSN and note any of its operands that need fixing.
12502 If DO_PUSHES is false we do not actually push any of the fixups
12503 needed. The function returns TRUE if any fixups were needed/pushed.
12504 This is used by arm_memory_load_p() which needs to know about loads
12505 of constants that will be converted into minipool loads. */
12507 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
12509 bool result
= false;
12512 extract_insn (insn
);
12514 if (!constrain_operands (1))
12515 fatal_insn_not_found (insn
);
12517 if (recog_data
.n_alternatives
== 0)
12520 /* Fill in recog_op_alt with information about the constraints of
12522 preprocess_constraints ();
12524 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12526 /* Things we need to fix can only occur in inputs. */
12527 if (recog_data
.operand_type
[opno
] != OP_IN
)
12530 /* If this alternative is a memory reference, then any mention
12531 of constants in this alternative is really to fool reload
12532 into allowing us to accept one there. We need to fix them up
12533 now so that we output the right code. */
12534 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
12536 rtx op
= recog_data
.operand
[opno
];
12538 if (CONSTANT_P (op
))
12541 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
12542 recog_data
.operand_mode
[opno
], op
);
12545 else if (GET_CODE (op
) == MEM
12546 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
12547 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
12551 rtx cop
= avoid_constant_pool_reference (op
);
12553 /* Casting the address of something to a mode narrower
12554 than a word can cause avoid_constant_pool_reference()
12555 to return the pool reference itself. That's no good to
12556 us here. Lets just hope that we can use the
12557 constant pool value directly. */
12559 cop
= get_pool_constant (XEXP (op
, 0));
12561 push_minipool_fix (insn
, address
,
12562 recog_data
.operand_loc
[opno
],
12563 recog_data
.operand_mode
[opno
], cop
);
12574 /* Convert instructions to their cc-clobbering variant if possible, since
12575 that allows us to use smaller encodings. */
12578 thumb2_reorg (void)
12583 INIT_REG_SET (&live
);
12585 /* We are freeing block_for_insn in the toplev to keep compatibility
12586 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12587 compute_bb_for_insn ();
12594 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
12595 df_simulate_initialize_backwards (bb
, &live
);
12596 FOR_BB_INSNS_REVERSE (bb
, insn
)
12598 if (NONJUMP_INSN_P (insn
)
12599 && !REGNO_REG_SET_P (&live
, CC_REGNUM
))
12601 rtx pat
= PATTERN (insn
);
12602 if (GET_CODE (pat
) == SET
12603 && low_register_operand (XEXP (pat
, 0), SImode
)
12604 && thumb_16bit_operator (XEXP (pat
, 1), SImode
)
12605 && low_register_operand (XEXP (XEXP (pat
, 1), 0), SImode
)
12606 && low_register_operand (XEXP (XEXP (pat
, 1), 1), SImode
))
12608 rtx dst
= XEXP (pat
, 0);
12609 rtx src
= XEXP (pat
, 1);
12610 rtx op0
= XEXP (src
, 0);
12611 rtx op1
= (GET_RTX_CLASS (GET_CODE (src
)) == RTX_COMM_ARITH
12612 ? XEXP (src
, 1) : NULL
);
12614 if (rtx_equal_p (dst
, op0
)
12615 || GET_CODE (src
) == PLUS
|| GET_CODE (src
) == MINUS
)
12617 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12618 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
12619 rtvec vec
= gen_rtvec (2, pat
, clobber
);
12621 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
12622 INSN_CODE (insn
) = -1;
12624 /* We can also handle a commutative operation where the
12625 second operand matches the destination. */
12626 else if (op1
&& rtx_equal_p (dst
, op1
))
12628 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12629 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
12632 src
= copy_rtx (src
);
12633 XEXP (src
, 0) = op1
;
12634 XEXP (src
, 1) = op0
;
12635 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
12636 vec
= gen_rtvec (2, pat
, clobber
);
12637 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
12638 INSN_CODE (insn
) = -1;
12643 if (NONDEBUG_INSN_P (insn
))
12644 df_simulate_one_insn_backwards (bb
, insn
, &live
);
12648 CLEAR_REG_SET (&live
);
12651 /* Gcc puts the pool in the wrong place for ARM, since we can only
12652 load addresses a limited distance around the pc. We do some
12653 special munging to move the constant pool values to the correct
12654 point in the code. */
12659 HOST_WIDE_INT address
= 0;
12665 minipool_fix_head
= minipool_fix_tail
= NULL
;
12667 /* The first insn must always be a note, or the code below won't
12668 scan it properly. */
12669 insn
= get_insns ();
12670 gcc_assert (GET_CODE (insn
) == NOTE
);
12673 /* Scan all the insns and record the operands that will need fixing. */
12674 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
12676 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12677 && (arm_cirrus_insn_p (insn
)
12678 || GET_CODE (insn
) == JUMP_INSN
12679 || arm_memory_load_p (insn
)))
12680 cirrus_reorg (insn
);
12682 if (GET_CODE (insn
) == BARRIER
)
12683 push_minipool_barrier (insn
, address
);
12684 else if (INSN_P (insn
))
12688 note_invalid_constants (insn
, address
, true);
12689 address
+= get_attr_length (insn
);
12691 /* If the insn is a vector jump, add the size of the table
12692 and skip the table. */
12693 if ((table
= is_jump_table (insn
)) != NULL
)
12695 address
+= get_jump_table_size (table
);
12701 fix
= minipool_fix_head
;
12703 /* Now scan the fixups and perform the required changes. */
12708 Mfix
* last_added_fix
;
12709 Mfix
* last_barrier
= NULL
;
12712 /* Skip any further barriers before the next fix. */
12713 while (fix
&& GET_CODE (fix
->insn
) == BARRIER
)
12716 /* No more fixes. */
12720 last_added_fix
= NULL
;
12722 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
12724 if (GET_CODE (ftmp
->insn
) == BARRIER
)
12726 if (ftmp
->address
>= minipool_vector_head
->max_address
)
12729 last_barrier
= ftmp
;
12731 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
12734 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
12737 /* If we found a barrier, drop back to that; any fixes that we
12738 could have reached but come after the barrier will now go in
12739 the next mini-pool. */
12740 if (last_barrier
!= NULL
)
12742 /* Reduce the refcount for those fixes that won't go into this
12744 for (fdel
= last_barrier
->next
;
12745 fdel
&& fdel
!= ftmp
;
12748 fdel
->minipool
->refcount
--;
12749 fdel
->minipool
= NULL
;
12752 ftmp
= last_barrier
;
12756 /* ftmp is first fix that we can't fit into this pool and
12757 there no natural barriers that we could use. Insert a
12758 new barrier in the code somewhere between the previous
12759 fix and this one, and arrange to jump around it. */
12760 HOST_WIDE_INT max_address
;
12762 /* The last item on the list of fixes must be a barrier, so
12763 we can never run off the end of the list of fixes without
12764 last_barrier being set. */
12767 max_address
= minipool_vector_head
->max_address
;
12768 /* Check that there isn't another fix that is in range that
12769 we couldn't fit into this pool because the pool was
12770 already too large: we need to put the pool before such an
12771 instruction. The pool itself may come just after the
12772 fix because create_fix_barrier also allows space for a
12773 jump instruction. */
12774 if (ftmp
->address
< max_address
)
12775 max_address
= ftmp
->address
+ 1;
12777 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
12780 assign_minipool_offsets (last_barrier
);
12784 if (GET_CODE (ftmp
->insn
) != BARRIER
12785 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
12792 /* Scan over the fixes we have identified for this pool, fixing them
12793 up and adding the constants to the pool itself. */
12794 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
12795 this_fix
= this_fix
->next
)
12796 if (GET_CODE (this_fix
->insn
) != BARRIER
)
12799 = plus_constant (gen_rtx_LABEL_REF (VOIDmode
,
12800 minipool_vector_label
),
12801 this_fix
->minipool
->offset
);
12802 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
12805 dump_minipool (last_barrier
->insn
);
12809 /* From now on we must synthesize any constants that we can't handle
12810 directly. This can happen if the RTL gets split during final
12811 instruction generation. */
12812 after_arm_reorg
= 1;
12814 /* Free the minipool memory. */
12815 obstack_free (&minipool_obstack
, minipool_startobj
);
12818 /* Routines to output assembly language. */
12820 /* If the rtx is the correct value then return the string of the number.
12821 In this way we can ensure that valid double constants are generated even
12822 when cross compiling. */
12824 fp_immediate_constant (rtx x
)
12829 if (!fp_consts_inited
)
12832 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12833 for (i
= 0; i
< 8; i
++)
12834 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
12835 return strings_fp
[i
];
12837 gcc_unreachable ();
12840 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12841 static const char *
12842 fp_const_from_val (REAL_VALUE_TYPE
*r
)
12846 if (!fp_consts_inited
)
12849 for (i
= 0; i
< 8; i
++)
12850 if (REAL_VALUES_EQUAL (*r
, values_fp
[i
]))
12851 return strings_fp
[i
];
12853 gcc_unreachable ();
12856 /* Output the operands of a LDM/STM instruction to STREAM.
12857 MASK is the ARM register set mask of which only bits 0-15 are important.
12858 REG is the base register, either the frame pointer or the stack pointer,
12859 INSTR is the possibly suffixed load or store instruction.
12860 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12863 print_multi_reg (FILE *stream
, const char *instr
, unsigned reg
,
12864 unsigned long mask
, int rfe
)
12867 bool not_first
= FALSE
;
12869 gcc_assert (!rfe
|| (mask
& (1 << PC_REGNUM
)));
12870 fputc ('\t', stream
);
12871 asm_fprintf (stream
, instr
, reg
);
12872 fputc ('{', stream
);
12874 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
12875 if (mask
& (1 << i
))
12878 fprintf (stream
, ", ");
12880 asm_fprintf (stream
, "%r", i
);
12885 fprintf (stream
, "}^\n");
12887 fprintf (stream
, "}\n");
12891 /* Output a FLDMD instruction to STREAM.
12892 BASE if the register containing the address.
12893 REG and COUNT specify the register range.
12894 Extra registers may be added to avoid hardware bugs.
12896 We output FLDMD even for ARMv5 VFP implementations. Although
12897 FLDMD is technically not supported until ARMv6, it is believed
12898 that all VFP implementations support its use in this context. */
12901 vfp_output_fldmd (FILE * stream
, unsigned int base
, int reg
, int count
)
12905 /* Workaround ARM10 VFPr1 bug. */
12906 if (count
== 2 && !arm_arch6
)
12913 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12914 load into multiple parts if we have to handle more than 16 registers. */
12917 vfp_output_fldmd (stream
, base
, reg
, 16);
12918 vfp_output_fldmd (stream
, base
, reg
+ 16, count
- 16);
12922 fputc ('\t', stream
);
12923 asm_fprintf (stream
, "fldmfdd\t%r!, {", base
);
12925 for (i
= reg
; i
< reg
+ count
; i
++)
12928 fputs (", ", stream
);
12929 asm_fprintf (stream
, "d%d", i
);
12931 fputs ("}\n", stream
);
12936 /* Output the assembly for a store multiple. */
12939 vfp_output_fstmd (rtx
* operands
)
12946 strcpy (pattern
, "fstmfdd\t%m0!, {%P1");
12947 p
= strlen (pattern
);
12949 gcc_assert (GET_CODE (operands
[1]) == REG
);
12951 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
12952 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
12954 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
12956 strcpy (&pattern
[p
], "}");
12958 output_asm_insn (pattern
, operands
);
12963 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12964 number of bytes pushed. */
12967 vfp_emit_fstmd (int base_reg
, int count
)
12974 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12975 register pairs are stored by a store multiple insn. We avoid this
12976 by pushing an extra pair. */
12977 if (count
== 2 && !arm_arch6
)
12979 if (base_reg
== LAST_VFP_REGNUM
- 3)
12984 /* FSTMD may not store more than 16 doubleword registers at once. Split
12985 larger stores into multiple parts (up to a maximum of two, in
12990 /* NOTE: base_reg is an internal register number, so each D register
12992 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
12993 saved
+= vfp_emit_fstmd (base_reg
, 16);
12997 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
12998 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
13000 reg
= gen_rtx_REG (DFmode
, base_reg
);
13003 XVECEXP (par
, 0, 0)
13004 = gen_rtx_SET (VOIDmode
,
13007 gen_rtx_PRE_MODIFY (Pmode
,
13010 (stack_pointer_rtx
,
13013 gen_rtx_UNSPEC (BLKmode
,
13014 gen_rtvec (1, reg
),
13015 UNSPEC_PUSH_MULT
));
13017 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
13018 plus_constant (stack_pointer_rtx
, -(count
* 8)));
13019 RTX_FRAME_RELATED_P (tmp
) = 1;
13020 XVECEXP (dwarf
, 0, 0) = tmp
;
13022 tmp
= gen_rtx_SET (VOIDmode
,
13023 gen_frame_mem (DFmode
, stack_pointer_rtx
),
13025 RTX_FRAME_RELATED_P (tmp
) = 1;
13026 XVECEXP (dwarf
, 0, 1) = tmp
;
13028 for (i
= 1; i
< count
; i
++)
13030 reg
= gen_rtx_REG (DFmode
, base_reg
);
13032 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
13034 tmp
= gen_rtx_SET (VOIDmode
,
13035 gen_frame_mem (DFmode
,
13036 plus_constant (stack_pointer_rtx
,
13039 RTX_FRAME_RELATED_P (tmp
) = 1;
13040 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
13043 par
= emit_insn (par
);
13044 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
13045 RTX_FRAME_RELATED_P (par
) = 1;
13050 /* Emit a call instruction with pattern PAT. ADDR is the address of
13051 the call target. */
13054 arm_emit_call_insn (rtx pat
, rtx addr
)
13058 insn
= emit_call_insn (pat
);
13060 /* The PIC register is live on entry to VxWorks PIC PLT entries.
13061 If the call might use such an entry, add a use of the PIC register
13062 to the instruction's CALL_INSN_FUNCTION_USAGE. */
13063 if (TARGET_VXWORKS_RTP
13065 && GET_CODE (addr
) == SYMBOL_REF
13066 && (SYMBOL_REF_DECL (addr
)
13067 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
13068 : !SYMBOL_REF_LOCAL_P (addr
)))
13070 require_pic_register ();
13071 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
13075 /* Output a 'call' insn. */
13077 output_call (rtx
*operands
)
13079 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
13081 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
13082 if (REGNO (operands
[0]) == LR_REGNUM
)
13084 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
13085 output_asm_insn ("mov%?\t%0, %|lr", operands
);
13088 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
13090 if (TARGET_INTERWORK
|| arm_arch4t
)
13091 output_asm_insn ("bx%?\t%0", operands
);
13093 output_asm_insn ("mov%?\t%|pc, %0", operands
);
13098 /* Output a 'call' insn that is a reference in memory. This is
13099 disabled for ARMv5 and we prefer a blx instead because otherwise
13100 there's a significant performance overhead. */
13102 output_call_mem (rtx
*operands
)
13104 gcc_assert (!arm_arch5
);
13105 if (TARGET_INTERWORK
)
13107 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
13108 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
13109 output_asm_insn ("bx%?\t%|ip", operands
);
13111 else if (regno_use_in (LR_REGNUM
, operands
[0]))
13113 /* LR is used in the memory address. We load the address in the
13114 first instruction. It's safe to use IP as the target of the
13115 load since the call will kill it anyway. */
13116 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
13117 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
13119 output_asm_insn ("bx%?\t%|ip", operands
);
13121 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
13125 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
13126 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
13133 /* Output a move from arm registers to an fpa registers.
13134 OPERANDS[0] is an fpa register.
13135 OPERANDS[1] is the first registers of an arm register pair. */
13137 output_mov_long_double_fpa_from_arm (rtx
*operands
)
13139 int arm_reg0
= REGNO (operands
[1]);
13142 gcc_assert (arm_reg0
!= IP_REGNUM
);
13144 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
13145 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
13146 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
13148 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
13149 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands
);
13154 /* Output a move from an fpa register to arm registers.
13155 OPERANDS[0] is the first registers of an arm register pair.
13156 OPERANDS[1] is an fpa register. */
13158 output_mov_long_double_arm_from_fpa (rtx
*operands
)
13160 int arm_reg0
= REGNO (operands
[0]);
13163 gcc_assert (arm_reg0
!= IP_REGNUM
);
13165 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
13166 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
13167 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
13169 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands
);
13170 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
13174 /* Output a move from arm registers to arm registers of a long double
13175 OPERANDS[0] is the destination.
13176 OPERANDS[1] is the source. */
13178 output_mov_long_double_arm_from_arm (rtx
*operands
)
13180 /* We have to be careful here because the two might overlap. */
13181 int dest_start
= REGNO (operands
[0]);
13182 int src_start
= REGNO (operands
[1]);
13186 if (dest_start
< src_start
)
13188 for (i
= 0; i
< 3; i
++)
13190 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
13191 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
13192 output_asm_insn ("mov%?\t%0, %1", ops
);
13197 for (i
= 2; i
>= 0; i
--)
13199 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
13200 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
13201 output_asm_insn ("mov%?\t%0, %1", ops
);
13209 arm_emit_movpair (rtx dest
, rtx src
)
13211 /* If the src is an immediate, simplify it. */
13212 if (CONST_INT_P (src
))
13214 HOST_WIDE_INT val
= INTVAL (src
);
13215 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
13216 if ((val
>> 16) & 0x0000ffff)
13217 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
13219 GEN_INT ((val
>> 16) & 0x0000ffff));
13222 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
13223 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
13226 /* Output a move from arm registers to an fpa registers.
13227 OPERANDS[0] is an fpa register.
13228 OPERANDS[1] is the first registers of an arm register pair. */
13230 output_mov_double_fpa_from_arm (rtx
*operands
)
13232 int arm_reg0
= REGNO (operands
[1]);
13235 gcc_assert (arm_reg0
!= IP_REGNUM
);
13237 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
13238 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
13239 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops
);
13240 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands
);
13244 /* Output a move from an fpa register to arm registers.
13245 OPERANDS[0] is the first registers of an arm register pair.
13246 OPERANDS[1] is an fpa register. */
13248 output_mov_double_arm_from_fpa (rtx
*operands
)
13250 int arm_reg0
= REGNO (operands
[0]);
13253 gcc_assert (arm_reg0
!= IP_REGNUM
);
13255 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
13256 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
13257 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands
);
13258 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops
);
13262 /* Output a move between double words. It must be REG<-MEM
13265 output_move_double (rtx
*operands
)
13267 enum rtx_code code0
= GET_CODE (operands
[0]);
13268 enum rtx_code code1
= GET_CODE (operands
[1]);
13273 unsigned int reg0
= REGNO (operands
[0]);
13275 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
13277 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
13279 switch (GET_CODE (XEXP (operands
[1], 0)))
13283 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
13284 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
13286 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
13290 gcc_assert (TARGET_LDRD
);
13291 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
13296 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
13298 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
13303 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
13305 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
13309 gcc_assert (TARGET_LDRD
);
13310 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
13315 /* Autoicrement addressing modes should never have overlapping
13316 base and destination registers, and overlapping index registers
13317 are already prohibited, so this doesn't need to worry about
13319 otherops
[0] = operands
[0];
13320 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
13321 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
13323 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
13325 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
13327 /* Registers overlap so split out the increment. */
13328 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
13329 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
13333 /* Use a single insn if we can.
13334 FIXME: IWMMXT allows offsets larger than ldrd can
13335 handle, fix these up with a pair of ldr. */
13337 || GET_CODE (otherops
[2]) != CONST_INT
13338 || (INTVAL (otherops
[2]) > -256
13339 && INTVAL (otherops
[2]) < 256))
13340 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
13343 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
13344 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
13350 /* Use a single insn if we can.
13351 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13352 fix these up with a pair of ldr. */
13354 || GET_CODE (otherops
[2]) != CONST_INT
13355 || (INTVAL (otherops
[2]) > -256
13356 && INTVAL (otherops
[2]) < 256))
13357 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
13360 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
13361 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
13368 /* We might be able to use ldrd %0, %1 here. However the range is
13369 different to ldr/adr, and it is broken on some ARMv7-M
13370 implementations. */
13371 /* Use the second register of the pair to avoid problematic
13373 otherops
[1] = operands
[1];
13374 output_asm_insn ("adr%?\t%0, %1", otherops
);
13375 operands
[1] = otherops
[0];
13377 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
13379 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
13382 /* ??? This needs checking for thumb2. */
13384 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
13385 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
13387 otherops
[0] = operands
[0];
13388 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
13389 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
13391 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
13393 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
13395 switch ((int) INTVAL (otherops
[2]))
13398 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
13403 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
13408 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
13412 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
13413 operands
[1] = otherops
[0];
13415 && (GET_CODE (otherops
[2]) == REG
13417 || (GET_CODE (otherops
[2]) == CONST_INT
13418 && INTVAL (otherops
[2]) > -256
13419 && INTVAL (otherops
[2]) < 256)))
13421 if (reg_overlap_mentioned_p (operands
[0],
13425 /* Swap base and index registers over to
13426 avoid a conflict. */
13428 otherops
[1] = otherops
[2];
13431 /* If both registers conflict, it will usually
13432 have been fixed by a splitter. */
13433 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
13434 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
13436 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13437 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
13441 otherops
[0] = operands
[0];
13442 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
13447 if (GET_CODE (otherops
[2]) == CONST_INT
)
13449 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
13450 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
13452 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13455 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13458 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
13461 return "ldr%(d%)\t%0, [%1]";
13463 return "ldm%(ia%)\t%1, %M0";
13467 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
13468 /* Take care of overlapping base/data reg. */
13469 if (reg_mentioned_p (operands
[0], operands
[1]))
13471 output_asm_insn ("ldr%?\t%0, %1", otherops
);
13472 output_asm_insn ("ldr%?\t%0, %1", operands
);
13476 output_asm_insn ("ldr%?\t%0, %1", operands
);
13477 output_asm_insn ("ldr%?\t%0, %1", otherops
);
13484 /* Constraints should ensure this. */
13485 gcc_assert (code0
== MEM
&& code1
== REG
);
13486 gcc_assert (REGNO (operands
[1]) != IP_REGNUM
);
13488 switch (GET_CODE (XEXP (operands
[0], 0)))
13492 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
13494 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
13498 gcc_assert (TARGET_LDRD
);
13499 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
13504 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
13506 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
13511 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
13513 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
13517 gcc_assert (TARGET_LDRD
);
13518 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
13523 otherops
[0] = operands
[1];
13524 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
13525 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
13527 /* IWMMXT allows offsets larger than ldrd can handle,
13528 fix these up with a pair of ldr. */
13530 && GET_CODE (otherops
[2]) == CONST_INT
13531 && (INTVAL(otherops
[2]) <= -256
13532 || INTVAL(otherops
[2]) >= 256))
13534 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
13536 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
13537 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
13541 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
13542 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
13545 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
13546 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
13548 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
13552 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
13553 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
13555 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
13558 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
13564 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
13570 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
13575 && (GET_CODE (otherops
[2]) == REG
13577 || (GET_CODE (otherops
[2]) == CONST_INT
13578 && INTVAL (otherops
[2]) > -256
13579 && INTVAL (otherops
[2]) < 256)))
13581 otherops
[0] = operands
[1];
13582 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
13583 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
13589 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
13590 otherops
[1] = operands
[1];
13591 output_asm_insn ("str%?\t%1, %0", operands
);
13592 output_asm_insn ("str%?\t%H1, %0", otherops
);
13599 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13600 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13603 output_move_quad (rtx
*operands
)
13605 if (REG_P (operands
[0]))
13607 /* Load, or reg->reg move. */
13609 if (MEM_P (operands
[1]))
13611 switch (GET_CODE (XEXP (operands
[1], 0)))
13614 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
13619 output_asm_insn ("adr%?\t%0, %1", operands
);
13620 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
13624 gcc_unreachable ();
13632 gcc_assert (REG_P (operands
[1]));
13634 dest
= REGNO (operands
[0]);
13635 src
= REGNO (operands
[1]);
13637 /* This seems pretty dumb, but hopefully GCC won't try to do it
13640 for (i
= 0; i
< 4; i
++)
13642 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
13643 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
13644 output_asm_insn ("mov%?\t%0, %1", ops
);
13647 for (i
= 3; i
>= 0; i
--)
13649 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
13650 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
13651 output_asm_insn ("mov%?\t%0, %1", ops
);
13657 gcc_assert (MEM_P (operands
[0]));
13658 gcc_assert (REG_P (operands
[1]));
13659 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
13661 switch (GET_CODE (XEXP (operands
[0], 0)))
13664 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
13668 gcc_unreachable ();
13675 /* Output a VFP load or store instruction. */
13678 output_move_vfp (rtx
*operands
)
13680 rtx reg
, mem
, addr
, ops
[2];
13681 int load
= REG_P (operands
[0]);
13682 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
13683 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
13686 enum machine_mode mode
;
13688 reg
= operands
[!load
];
13689 mem
= operands
[load
];
13691 mode
= GET_MODE (reg
);
13693 gcc_assert (REG_P (reg
));
13694 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
13695 gcc_assert (mode
== SFmode
13699 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
13700 gcc_assert (MEM_P (mem
));
13702 addr
= XEXP (mem
, 0);
13704 switch (GET_CODE (addr
))
13707 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13708 ops
[0] = XEXP (addr
, 0);
13713 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
13714 ops
[0] = XEXP (addr
, 0);
13719 templ
= "f%s%c%%?\t%%%s0, %%1%s";
13725 sprintf (buff
, templ
,
13726 load
? "ld" : "st",
13729 integer_p
? "\t%@ int" : "");
13730 output_asm_insn (buff
, ops
);
13735 /* Output a Neon quad-word load or store, or a load or store for
13736 larger structure modes.
13738 WARNING: The ordering of elements is weird in big-endian mode,
13739 because we use VSTM, as required by the EABI. GCC RTL defines
13740 element ordering based on in-memory order. This can be differ
13741 from the architectural ordering of elements within a NEON register.
13742 The intrinsics defined in arm_neon.h use the NEON register element
13743 ordering, not the GCC RTL element ordering.
13745 For example, the in-memory ordering of a big-endian a quadword
13746 vector with 16-bit elements when stored from register pair {d0,d1}
13747 will be (lowest address first, d0[N] is NEON register element N):
13749 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13751 When necessary, quadword registers (dN, dN+1) are moved to ARM
13752 registers from rN in the order:
13754 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13756 So that STM/LDM can be used on vectors in ARM registers, and the
13757 same memory layout will result as if VSTM/VLDM were used. */
13760 output_move_neon (rtx
*operands
)
13762 rtx reg
, mem
, addr
, ops
[2];
13763 int regno
, load
= REG_P (operands
[0]);
13766 enum machine_mode mode
;
13768 reg
= operands
[!load
];
13769 mem
= operands
[load
];
13771 mode
= GET_MODE (reg
);
13773 gcc_assert (REG_P (reg
));
13774 regno
= REGNO (reg
);
13775 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
13776 || NEON_REGNO_OK_FOR_QUAD (regno
));
13777 gcc_assert (VALID_NEON_DREG_MODE (mode
)
13778 || VALID_NEON_QREG_MODE (mode
)
13779 || VALID_NEON_STRUCT_MODE (mode
));
13780 gcc_assert (MEM_P (mem
));
13782 addr
= XEXP (mem
, 0);
13784 /* Strip off const from addresses like (const (plus (...))). */
13785 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
13786 addr
= XEXP (addr
, 0);
13788 switch (GET_CODE (addr
))
13791 templ
= "v%smia%%?\t%%0!, %%h1";
13792 ops
[0] = XEXP (addr
, 0);
13797 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13798 templ
= "v%smdb%%?\t%%0!, %%h1";
13799 ops
[0] = XEXP (addr
, 0);
13804 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13805 gcc_unreachable ();
13810 int nregs
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
13813 for (i
= 0; i
< nregs
; i
++)
13815 /* We're only using DImode here because it's a convenient size. */
13816 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
13817 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
13818 if (reg_overlap_mentioned_p (ops
[0], mem
))
13820 gcc_assert (overlap
== -1);
13825 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
13826 output_asm_insn (buff
, ops
);
13831 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
13832 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
13833 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
13834 output_asm_insn (buff
, ops
);
13841 templ
= "v%smia%%?\t%%m0, %%h1";
13846 sprintf (buff
, templ
, load
? "ld" : "st");
13847 output_asm_insn (buff
, ops
);
13852 /* Compute and return the length of neon_mov<mode>, where <mode> is
13853 one of VSTRUCT modes: EI, OI, CI or XI. */
13855 arm_attr_length_move_neon (rtx insn
)
13857 rtx reg
, mem
, addr
;
13859 enum machine_mode mode
;
13861 extract_insn_cached (insn
);
13863 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
13865 mode
= GET_MODE (recog_data
.operand
[0]);
13876 gcc_unreachable ();
13880 load
= REG_P (recog_data
.operand
[0]);
13881 reg
= recog_data
.operand
[!load
];
13882 mem
= recog_data
.operand
[load
];
13884 gcc_assert (MEM_P (mem
));
13886 mode
= GET_MODE (reg
);
13887 addr
= XEXP (mem
, 0);
13889 /* Strip off const from addresses like (const (plus (...))). */
13890 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
13891 addr
= XEXP (addr
, 0);
13893 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
13895 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
13902 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13906 arm_address_offset_is_imm (rtx insn
)
13910 extract_insn_cached (insn
);
13912 if (REG_P (recog_data
.operand
[0]))
13915 mem
= recog_data
.operand
[0];
13917 gcc_assert (MEM_P (mem
));
13919 addr
= XEXP (mem
, 0);
13921 if (GET_CODE (addr
) == REG
13922 || (GET_CODE (addr
) == PLUS
13923 && GET_CODE (XEXP (addr
, 0)) == REG
13924 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
))
13930 /* Output an ADD r, s, #n where n may be too big for one instruction.
13931 If adding zero to one register, output nothing. */
13933 output_add_immediate (rtx
*operands
)
13935 HOST_WIDE_INT n
= INTVAL (operands
[2]);
13937 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
13940 output_multi_immediate (operands
,
13941 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13944 output_multi_immediate (operands
,
13945 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13952 /* Output a multiple immediate operation.
13953 OPERANDS is the vector of operands referred to in the output patterns.
13954 INSTR1 is the output pattern to use for the first constant.
13955 INSTR2 is the output pattern to use for subsequent constants.
13956 IMMED_OP is the index of the constant slot in OPERANDS.
13957 N is the constant value. */
13958 static const char *
13959 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
13960 int immed_op
, HOST_WIDE_INT n
)
13962 #if HOST_BITS_PER_WIDE_INT > 32
13968 /* Quick and easy output. */
13969 operands
[immed_op
] = const0_rtx
;
13970 output_asm_insn (instr1
, operands
);
13975 const char * instr
= instr1
;
13977 /* Note that n is never zero here (which would give no output). */
13978 for (i
= 0; i
< 32; i
+= 2)
13982 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
13983 output_asm_insn (instr
, operands
);
13993 /* Return the name of a shifter operation. */
13994 static const char *
13995 arm_shift_nmem(enum rtx_code code
)
14000 return ARM_LSL_NAME
;
14016 /* Return the appropriate ARM instruction for the operation code.
14017 The returned result should not be overwritten. OP is the rtx of the
14018 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
14021 arithmetic_instr (rtx op
, int shift_first_arg
)
14023 switch (GET_CODE (op
))
14029 return shift_first_arg
? "rsb" : "sub";
14044 return arm_shift_nmem(GET_CODE(op
));
14047 gcc_unreachable ();
14051 /* Ensure valid constant shifts and return the appropriate shift mnemonic
14052 for the operation code. The returned result should not be overwritten.
14053 OP is the rtx code of the shift.
14054 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
14056 static const char *
14057 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
14060 enum rtx_code code
= GET_CODE (op
);
14062 switch (GET_CODE (XEXP (op
, 1)))
14070 *amountp
= INTVAL (XEXP (op
, 1));
14074 gcc_unreachable ();
14080 gcc_assert (*amountp
!= -1);
14081 *amountp
= 32 - *amountp
;
14084 /* Fall through. */
14090 mnem
= arm_shift_nmem(code
);
14094 /* We never have to worry about the amount being other than a
14095 power of 2, since this case can never be reloaded from a reg. */
14096 gcc_assert (*amountp
!= -1);
14097 *amountp
= int_log2 (*amountp
);
14098 return ARM_LSL_NAME
;
14101 gcc_unreachable ();
14104 if (*amountp
!= -1)
14106 /* This is not 100% correct, but follows from the desire to merge
14107 multiplication by a power of 2 with the recognizer for a
14108 shift. >=32 is not a valid shift for "lsl", so we must try and
14109 output a shift that produces the correct arithmetical result.
14110 Using lsr #32 is identical except for the fact that the carry bit
14111 is not set correctly if we set the flags; but we never use the
14112 carry bit from such an operation, so we can ignore that. */
14113 if (code
== ROTATERT
)
14114 /* Rotate is just modulo 32. */
14116 else if (*amountp
!= (*amountp
& 31))
14118 if (code
== ASHIFT
)
14123 /* Shifts of 0 are no-ops. */
14131 /* Obtain the shift from the POWER of two. */
14133 static HOST_WIDE_INT
14134 int_log2 (HOST_WIDE_INT power
)
14136 HOST_WIDE_INT shift
= 0;
14138 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
14140 gcc_assert (shift
<= 31);
14147 /* Output a .ascii pseudo-op, keeping track of lengths. This is
14148 because /bin/as is horribly restrictive. The judgement about
14149 whether or not each character is 'printable' (and can be output as
14150 is) or not (and must be printed with an octal escape) must be made
14151 with reference to the *host* character set -- the situation is
14152 similar to that discussed in the comments above pp_c_char in
14153 c-pretty-print.c. */
14155 #define MAX_ASCII_LEN 51
14158 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
14161 int len_so_far
= 0;
14163 fputs ("\t.ascii\t\"", stream
);
14165 for (i
= 0; i
< len
; i
++)
14169 if (len_so_far
>= MAX_ASCII_LEN
)
14171 fputs ("\"\n\t.ascii\t\"", stream
);
14177 if (c
== '\\' || c
== '\"')
14179 putc ('\\', stream
);
14187 fprintf (stream
, "\\%03o", c
);
14192 fputs ("\"\n", stream
);
14195 /* Compute the register save mask for registers 0 through 12
14196 inclusive. This code is used by arm_compute_save_reg_mask. */
14198 static unsigned long
14199 arm_compute_save_reg0_reg12_mask (void)
14201 unsigned long func_type
= arm_current_func_type ();
14202 unsigned long save_reg_mask
= 0;
14205 if (IS_INTERRUPT (func_type
))
14207 unsigned int max_reg
;
14208 /* Interrupt functions must not corrupt any registers,
14209 even call clobbered ones. If this is a leaf function
14210 we can just examine the registers used by the RTL, but
14211 otherwise we have to assume that whatever function is
14212 called might clobber anything, and so we have to save
14213 all the call-clobbered registers as well. */
14214 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
14215 /* FIQ handlers have registers r8 - r12 banked, so
14216 we only need to check r0 - r7, Normal ISRs only
14217 bank r14 and r15, so we must check up to r12.
14218 r13 is the stack pointer which is always preserved,
14219 so we do not need to consider it here. */
14224 for (reg
= 0; reg
<= max_reg
; reg
++)
14225 if (df_regs_ever_live_p (reg
)
14226 || (! current_function_is_leaf
&& call_used_regs
[reg
]))
14227 save_reg_mask
|= (1 << reg
);
14229 /* Also save the pic base register if necessary. */
14231 && !TARGET_SINGLE_PIC_BASE
14232 && arm_pic_register
!= INVALID_REGNUM
14233 && crtl
->uses_pic_offset_table
)
14234 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
14236 else if (IS_VOLATILE(func_type
))
14238 /* For noreturn functions we historically omitted register saves
14239 altogether. However this really messes up debugging. As a
14240 compromise save just the frame pointers. Combined with the link
14241 register saved elsewhere this should be sufficient to get
14243 if (frame_pointer_needed
)
14244 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
14245 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
14246 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
14247 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
14248 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
14252 /* In the normal case we only need to save those registers
14253 which are call saved and which are used by this function. */
14254 for (reg
= 0; reg
<= 11; reg
++)
14255 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
14256 save_reg_mask
|= (1 << reg
);
14258 /* Handle the frame pointer as a special case. */
14259 if (frame_pointer_needed
)
14260 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
14262 /* If we aren't loading the PIC register,
14263 don't stack it even though it may be live. */
14265 && !TARGET_SINGLE_PIC_BASE
14266 && arm_pic_register
!= INVALID_REGNUM
14267 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
14268 || crtl
->uses_pic_offset_table
))
14269 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
14271 /* The prologue will copy SP into R0, so save it. */
14272 if (IS_STACKALIGN (func_type
))
14273 save_reg_mask
|= 1;
14276 /* Save registers so the exception handler can modify them. */
14277 if (crtl
->calls_eh_return
)
14283 reg
= EH_RETURN_DATA_REGNO (i
);
14284 if (reg
== INVALID_REGNUM
)
14286 save_reg_mask
|= 1 << reg
;
14290 return save_reg_mask
;
14294 /* Compute the number of bytes used to store the static chain register on the
14295 stack, above the stack frame. We need to know this accurately to get the
14296 alignment of the rest of the stack frame correct. */
14298 static int arm_compute_static_chain_stack_bytes (void)
14300 unsigned long func_type
= arm_current_func_type ();
14301 int static_chain_stack_bytes
= 0;
14303 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
&&
14304 IS_NESTED (func_type
) &&
14305 df_regs_ever_live_p (3) && crtl
->args
.pretend_args_size
== 0)
14306 static_chain_stack_bytes
= 4;
14308 return static_chain_stack_bytes
;
14312 /* Compute a bit mask of which registers need to be
14313 saved on the stack for the current function.
14314 This is used by arm_get_frame_offsets, which may add extra registers. */
14316 static unsigned long
14317 arm_compute_save_reg_mask (void)
14319 unsigned int save_reg_mask
= 0;
14320 unsigned long func_type
= arm_current_func_type ();
14323 if (IS_NAKED (func_type
))
14324 /* This should never really happen. */
14327 /* If we are creating a stack frame, then we must save the frame pointer,
14328 IP (which will hold the old stack pointer), LR and the PC. */
14329 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
14331 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
14334 | (1 << PC_REGNUM
);
14336 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
14338 /* Decide if we need to save the link register.
14339 Interrupt routines have their own banked link register,
14340 so they never need to save it.
14341 Otherwise if we do not use the link register we do not need to save
14342 it. If we are pushing other registers onto the stack however, we
14343 can save an instruction in the epilogue by pushing the link register
14344 now and then popping it back into the PC. This incurs extra memory
14345 accesses though, so we only do it when optimizing for size, and only
14346 if we know that we will not need a fancy return sequence. */
14347 if (df_regs_ever_live_p (LR_REGNUM
)
14350 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
14351 && !crtl
->calls_eh_return
))
14352 save_reg_mask
|= 1 << LR_REGNUM
;
14354 if (cfun
->machine
->lr_save_eliminated
)
14355 save_reg_mask
&= ~ (1 << LR_REGNUM
);
14357 if (TARGET_REALLY_IWMMXT
14358 && ((bit_count (save_reg_mask
)
14359 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
14360 arm_compute_static_chain_stack_bytes())
14363 /* The total number of registers that are going to be pushed
14364 onto the stack is odd. We need to ensure that the stack
14365 is 64-bit aligned before we start to save iWMMXt registers,
14366 and also before we start to create locals. (A local variable
14367 might be a double or long long which we will load/store using
14368 an iWMMXt instruction). Therefore we need to push another
14369 ARM register, so that the stack will be 64-bit aligned. We
14370 try to avoid using the arg registers (r0 -r3) as they might be
14371 used to pass values in a tail call. */
14372 for (reg
= 4; reg
<= 12; reg
++)
14373 if ((save_reg_mask
& (1 << reg
)) == 0)
14377 save_reg_mask
|= (1 << reg
);
14380 cfun
->machine
->sibcall_blocked
= 1;
14381 save_reg_mask
|= (1 << 3);
14385 /* We may need to push an additional register for use initializing the
14386 PIC base register. */
14387 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
14388 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
14390 reg
= thumb_find_work_register (1 << 4);
14391 if (!call_used_regs
[reg
])
14392 save_reg_mask
|= (1 << reg
);
14395 return save_reg_mask
;
14399 /* Compute a bit mask of which registers need to be
14400 saved on the stack for the current function. */
14401 static unsigned long
14402 thumb1_compute_save_reg_mask (void)
14404 unsigned long mask
;
14408 for (reg
= 0; reg
< 12; reg
++)
14409 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14413 && !TARGET_SINGLE_PIC_BASE
14414 && arm_pic_register
!= INVALID_REGNUM
14415 && crtl
->uses_pic_offset_table
)
14416 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
14418 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14419 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
14420 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
14422 /* LR will also be pushed if any lo regs are pushed. */
14423 if (mask
& 0xff || thumb_force_lr_save ())
14424 mask
|= (1 << LR_REGNUM
);
14426 /* Make sure we have a low work register if we need one.
14427 We will need one if we are going to push a high register,
14428 but we are not currently intending to push a low register. */
14429 if ((mask
& 0xff) == 0
14430 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
14432 /* Use thumb_find_work_register to choose which register
14433 we will use. If the register is live then we will
14434 have to push it. Use LAST_LO_REGNUM as our fallback
14435 choice for the register to select. */
14436 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
14437 /* Make sure the register returned by thumb_find_work_register is
14438 not part of the return value. */
14439 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
14440 reg
= LAST_LO_REGNUM
;
14442 if (! call_used_regs
[reg
])
14446 /* The 504 below is 8 bytes less than 512 because there are two possible
14447 alignment words. We can't tell here if they will be present or not so we
14448 have to play it safe and assume that they are. */
14449 if ((CALLER_INTERWORKING_SLOT_SIZE
+
14450 ROUND_UP_WORD (get_frame_size ()) +
14451 crtl
->outgoing_args_size
) >= 504)
14453 /* This is the same as the code in thumb1_expand_prologue() which
14454 determines which register to use for stack decrement. */
14455 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
14456 if (mask
& (1 << reg
))
14459 if (reg
> LAST_LO_REGNUM
)
14461 /* Make sure we have a register available for stack decrement. */
14462 mask
|= 1 << LAST_LO_REGNUM
;
14470 /* Return the number of bytes required to save VFP registers. */
14472 arm_get_vfp_saved_size (void)
14474 unsigned int regno
;
14479 /* Space for saved VFP registers. */
14480 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14483 for (regno
= FIRST_VFP_REGNUM
;
14484 regno
< LAST_VFP_REGNUM
;
14487 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
14488 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
14492 /* Workaround ARM10 VFPr1 bug. */
14493 if (count
== 2 && !arm_arch6
)
14495 saved
+= count
* 8;
14504 if (count
== 2 && !arm_arch6
)
14506 saved
+= count
* 8;
14513 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14514 everything bar the final return instruction. */
14516 output_return_instruction (rtx operand
, int really_return
, int reverse
)
14518 char conditional
[10];
14521 unsigned long live_regs_mask
;
14522 unsigned long func_type
;
14523 arm_stack_offsets
*offsets
;
14525 func_type
= arm_current_func_type ();
14527 if (IS_NAKED (func_type
))
14530 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
14532 /* If this function was declared non-returning, and we have
14533 found a tail call, then we have to trust that the called
14534 function won't return. */
14539 /* Otherwise, trap an attempted return by aborting. */
14541 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
14543 assemble_external_libcall (ops
[1]);
14544 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
14550 gcc_assert (!cfun
->calls_alloca
|| really_return
);
14552 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
14554 cfun
->machine
->return_used_this_function
= 1;
14556 offsets
= arm_get_frame_offsets ();
14557 live_regs_mask
= offsets
->saved_regs_mask
;
14559 if (live_regs_mask
)
14561 const char * return_reg
;
14563 /* If we do not have any special requirements for function exit
14564 (e.g. interworking) then we can load the return address
14565 directly into the PC. Otherwise we must load it into LR. */
14567 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
14568 return_reg
= reg_names
[PC_REGNUM
];
14570 return_reg
= reg_names
[LR_REGNUM
];
14572 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
14574 /* There are three possible reasons for the IP register
14575 being saved. 1) a stack frame was created, in which case
14576 IP contains the old stack pointer, or 2) an ISR routine
14577 corrupted it, or 3) it was saved to align the stack on
14578 iWMMXt. In case 1, restore IP into SP, otherwise just
14580 if (frame_pointer_needed
)
14582 live_regs_mask
&= ~ (1 << IP_REGNUM
);
14583 live_regs_mask
|= (1 << SP_REGNUM
);
14586 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
14589 /* On some ARM architectures it is faster to use LDR rather than
14590 LDM to load a single register. On other architectures, the
14591 cost is the same. In 26 bit mode, or for exception handlers,
14592 we have to use LDM to load the PC so that the CPSR is also
14594 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
14595 if (live_regs_mask
== (1U << reg
))
14598 if (reg
<= LAST_ARM_REGNUM
14599 && (reg
!= LR_REGNUM
14601 || ! IS_INTERRUPT (func_type
)))
14603 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
14604 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
14611 /* Generate the load multiple instruction to restore the
14612 registers. Note we can get here, even if
14613 frame_pointer_needed is true, but only if sp already
14614 points to the base of the saved core registers. */
14615 if (live_regs_mask
& (1 << SP_REGNUM
))
14617 unsigned HOST_WIDE_INT stack_adjust
;
14619 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
14620 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
14622 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
14623 if (TARGET_UNIFIED_ASM
)
14624 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
14626 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
14629 /* If we can't use ldmib (SA110 bug),
14630 then try to pop r3 instead. */
14632 live_regs_mask
|= 1 << 3;
14634 if (TARGET_UNIFIED_ASM
)
14635 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
14637 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
14641 if (TARGET_UNIFIED_ASM
)
14642 sprintf (instr
, "pop%s\t{", conditional
);
14644 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
14646 p
= instr
+ strlen (instr
);
14648 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
14649 if (live_regs_mask
& (1 << reg
))
14651 int l
= strlen (reg_names
[reg
]);
14657 memcpy (p
, ", ", 2);
14661 memcpy (p
, "%|", 2);
14662 memcpy (p
+ 2, reg_names
[reg
], l
);
14666 if (live_regs_mask
& (1 << LR_REGNUM
))
14668 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
14669 /* If returning from an interrupt, restore the CPSR. */
14670 if (IS_INTERRUPT (func_type
))
14677 output_asm_insn (instr
, & operand
);
14679 /* See if we need to generate an extra instruction to
14680 perform the actual function return. */
14682 && func_type
!= ARM_FT_INTERWORKED
14683 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
14685 /* The return has already been handled
14686 by loading the LR into the PC. */
14693 switch ((int) ARM_FUNC_TYPE (func_type
))
14697 /* ??? This is wrong for unified assembly syntax. */
14698 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
14701 case ARM_FT_INTERWORKED
:
14702 sprintf (instr
, "bx%s\t%%|lr", conditional
);
14705 case ARM_FT_EXCEPTION
:
14706 /* ??? This is wrong for unified assembly syntax. */
14707 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
14711 /* Use bx if it's available. */
14712 if (arm_arch5
|| arm_arch4t
)
14713 sprintf (instr
, "bx%s\t%%|lr", conditional
);
14715 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
14719 output_asm_insn (instr
, & operand
);
14725 /* Write the function name into the code section, directly preceding
14726 the function prologue.
14728 Code will be output similar to this:
14730 .ascii "arm_poke_function_name", 0
14733 .word 0xff000000 + (t1 - t0)
14734 arm_poke_function_name
14736 stmfd sp!, {fp, ip, lr, pc}
14739 When performing a stack backtrace, code can inspect the value
14740 of 'pc' stored at 'fp' + 0. If the trace function then looks
14741 at location pc - 12 and the top 8 bits are set, then we know
14742 that there is a function name embedded immediately preceding this
14743 location and has length ((pc[-3]) & 0xff000000).
14745 We assume that pc is declared as a pointer to an unsigned long.
14747 It is of no benefit to output the function name if we are assembling
14748 a leaf function. These function types will not contain a stack
14749 backtrace structure, therefore it is not possible to determine the
14752 arm_poke_function_name (FILE *stream
, const char *name
)
14754 unsigned long alignlength
;
14755 unsigned long length
;
14758 length
= strlen (name
) + 1;
14759 alignlength
= ROUND_UP_WORD (length
);
14761 ASM_OUTPUT_ASCII (stream
, name
, length
);
14762 ASM_OUTPUT_ALIGN (stream
, 2);
14763 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
14764 assemble_aligned_integer (UNITS_PER_WORD
, x
);
14767 /* Place some comments into the assembler stream
14768 describing the current function. */
14770 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
14772 unsigned long func_type
;
14774 /* ??? Do we want to print some of the below anyway? */
14778 /* Sanity check. */
14779 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
14781 func_type
= arm_current_func_type ();
14783 switch ((int) ARM_FUNC_TYPE (func_type
))
14786 case ARM_FT_NORMAL
:
14788 case ARM_FT_INTERWORKED
:
14789 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
14792 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
14795 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
14797 case ARM_FT_EXCEPTION
:
14798 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
14802 if (IS_NAKED (func_type
))
14803 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14805 if (IS_VOLATILE (func_type
))
14806 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
14808 if (IS_NESTED (func_type
))
14809 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
14810 if (IS_STACKALIGN (func_type
))
14811 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14813 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14815 crtl
->args
.pretend_args_size
, frame_size
);
14817 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14818 frame_pointer_needed
,
14819 cfun
->machine
->uses_anonymous_args
);
14821 if (cfun
->machine
->lr_save_eliminated
)
14822 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
14824 if (crtl
->calls_eh_return
)
14825 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
14830 arm_output_epilogue (rtx sibling
)
14833 unsigned long saved_regs_mask
;
14834 unsigned long func_type
;
14835 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14836 frame that is $fp + 4 for a non-variadic function. */
14837 int floats_offset
= 0;
14839 FILE * f
= asm_out_file
;
14840 unsigned int lrm_count
= 0;
14841 int really_return
= (sibling
== NULL
);
14843 arm_stack_offsets
*offsets
;
14845 /* If we have already generated the return instruction
14846 then it is futile to generate anything else. */
14847 if (use_return_insn (FALSE
, sibling
) &&
14848 (cfun
->machine
->return_used_this_function
!= 0))
14851 func_type
= arm_current_func_type ();
14853 if (IS_NAKED (func_type
))
14854 /* Naked functions don't have epilogues. */
14857 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
14861 /* A volatile function should never return. Call abort. */
14862 op
= gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)" : "abort");
14863 assemble_external_libcall (op
);
14864 output_asm_insn ("bl\t%a0", &op
);
14869 /* If we are throwing an exception, then we really must be doing a
14870 return, so we can't tail-call. */
14871 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
14873 offsets
= arm_get_frame_offsets ();
14874 saved_regs_mask
= offsets
->saved_regs_mask
;
14877 lrm_count
= bit_count (saved_regs_mask
);
14879 floats_offset
= offsets
->saved_args
;
14880 /* Compute how far away the floats will be. */
14881 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
14882 if (saved_regs_mask
& (1 << reg
))
14883 floats_offset
+= 4;
14885 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
14887 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14888 int vfp_offset
= offsets
->frame
;
14890 if (TARGET_FPA_EMU2
)
14892 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14893 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14895 floats_offset
+= 12;
14896 asm_fprintf (f
, "\tldfe\t%r, [%r, #-%d]\n",
14897 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
14902 start_reg
= LAST_FPA_REGNUM
;
14904 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14906 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14908 floats_offset
+= 12;
14910 /* We can't unstack more than four registers at once. */
14911 if (start_reg
- reg
== 3)
14913 asm_fprintf (f
, "\tlfm\t%r, 4, [%r, #-%d]\n",
14914 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
14915 start_reg
= reg
- 1;
14920 if (reg
!= start_reg
)
14921 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
14922 reg
+ 1, start_reg
- reg
,
14923 FP_REGNUM
, floats_offset
- vfp_offset
);
14924 start_reg
= reg
- 1;
14928 /* Just in case the last register checked also needs unstacking. */
14929 if (reg
!= start_reg
)
14930 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
14931 reg
+ 1, start_reg
- reg
,
14932 FP_REGNUM
, floats_offset
- vfp_offset
);
14935 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14939 /* The fldmd insns do not have base+offset addressing
14940 modes, so we use IP to hold the address. */
14941 saved_size
= arm_get_vfp_saved_size ();
14943 if (saved_size
> 0)
14945 floats_offset
+= saved_size
;
14946 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", IP_REGNUM
,
14947 FP_REGNUM
, floats_offset
- vfp_offset
);
14949 start_reg
= FIRST_VFP_REGNUM
;
14950 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
14952 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
14953 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
14955 if (start_reg
!= reg
)
14956 vfp_output_fldmd (f
, IP_REGNUM
,
14957 (start_reg
- FIRST_VFP_REGNUM
) / 2,
14958 (reg
- start_reg
) / 2);
14959 start_reg
= reg
+ 2;
14962 if (start_reg
!= reg
)
14963 vfp_output_fldmd (f
, IP_REGNUM
,
14964 (start_reg
- FIRST_VFP_REGNUM
) / 2,
14965 (reg
- start_reg
) / 2);
14970 /* The frame pointer is guaranteed to be non-double-word aligned.
14971 This is because it is set to (old_stack_pointer - 4) and the
14972 old_stack_pointer was double word aligned. Thus the offset to
14973 the iWMMXt registers to be loaded must also be non-double-word
14974 sized, so that the resultant address *is* double-word aligned.
14975 We can ignore floats_offset since that was already included in
14976 the live_regs_mask. */
14977 lrm_count
+= (lrm_count
% 2 ? 2 : 1);
14979 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
14980 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14982 asm_fprintf (f
, "\twldrd\t%r, [%r, #-%d]\n",
14983 reg
, FP_REGNUM
, lrm_count
* 4);
14988 /* saved_regs_mask should contain the IP, which at the time of stack
14989 frame generation actually contains the old stack pointer. So a
14990 quick way to unwind the stack is just pop the IP register directly
14991 into the stack pointer. */
14992 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
14993 saved_regs_mask
&= ~ (1 << IP_REGNUM
);
14994 saved_regs_mask
|= (1 << SP_REGNUM
);
14996 /* There are two registers left in saved_regs_mask - LR and PC. We
14997 only need to restore the LR register (the return address), but to
14998 save time we can load it directly into the PC, unless we need a
14999 special function exit sequence, or we are not really returning. */
15001 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
15002 && !crtl
->calls_eh_return
)
15003 /* Delete the LR from the register mask, so that the LR on
15004 the stack is loaded into the PC in the register mask. */
15005 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
15007 saved_regs_mask
&= ~ (1 << PC_REGNUM
);
15009 /* We must use SP as the base register, because SP is one of the
15010 registers being restored. If an interrupt or page fault
15011 happens in the ldm instruction, the SP might or might not
15012 have been restored. That would be bad, as then SP will no
15013 longer indicate the safe area of stack, and we can get stack
15014 corruption. Using SP as the base register means that it will
15015 be reset correctly to the original value, should an interrupt
15016 occur. If the stack pointer already points at the right
15017 place, then omit the subtraction. */
15018 if (offsets
->outgoing_args
!= (1 + (int) bit_count (saved_regs_mask
))
15019 || cfun
->calls_alloca
)
15020 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", SP_REGNUM
, FP_REGNUM
,
15021 4 * bit_count (saved_regs_mask
));
15022 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
, 0);
15024 if (IS_INTERRUPT (func_type
))
15025 /* Interrupt handlers will have pushed the
15026 IP onto the stack, so restore it now. */
15027 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, 1 << IP_REGNUM
, 0);
15031 /* This branch is executed for ARM mode (non-apcs frames) and
15032 Thumb-2 mode. Frame layout is essentially the same for those
15033 cases, except that in ARM mode frame pointer points to the
15034 first saved register, while in Thumb-2 mode the frame pointer points
15035 to the last saved register.
15037 It is possible to make frame pointer point to last saved
15038 register in both cases, and remove some conditionals below.
15039 That means that fp setup in prologue would be just "mov fp, sp"
15040 and sp restore in epilogue would be just "mov sp, fp", whereas
15041 now we have to use add/sub in those cases. However, the value
15042 of that would be marginal, as both mov and add/sub are 32-bit
15043 in ARM mode, and it would require extra conditionals
15044 in arm_expand_prologue to distingish ARM-apcs-frame case
15045 (where frame pointer is required to point at first register)
15046 and ARM-non-apcs-frame. Therefore, such change is postponed
15047 until real need arise. */
15048 unsigned HOST_WIDE_INT amount
;
15050 /* Restore stack pointer if necessary. */
15051 if (TARGET_ARM
&& frame_pointer_needed
)
15053 operands
[0] = stack_pointer_rtx
;
15054 operands
[1] = hard_frame_pointer_rtx
;
15056 operands
[2] = GEN_INT (offsets
->frame
- offsets
->saved_regs
);
15057 output_add_immediate (operands
);
15061 if (frame_pointer_needed
)
15063 /* For Thumb-2 restore sp from the frame pointer.
15064 Operand restrictions mean we have to incrememnt FP, then copy
15066 amount
= offsets
->locals_base
- offsets
->saved_regs
;
15067 operands
[0] = hard_frame_pointer_rtx
;
15071 unsigned long count
;
15072 operands
[0] = stack_pointer_rtx
;
15073 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
15074 /* pop call clobbered registers if it avoids a
15075 separate stack adjustment. */
15076 count
= offsets
->saved_regs
- offsets
->saved_args
;
15079 && !crtl
->calls_eh_return
15080 && bit_count(saved_regs_mask
) * 4 == count
15081 && !IS_INTERRUPT (func_type
)
15082 && !crtl
->tail_call_emit
)
15084 unsigned long mask
;
15085 /* Preserve return values, of any size. */
15086 mask
= (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
15088 mask
&= ~saved_regs_mask
;
15090 while (bit_count (mask
) * 4 > amount
)
15092 while ((mask
& (1 << reg
)) == 0)
15094 mask
&= ~(1 << reg
);
15096 if (bit_count (mask
) * 4 == amount
) {
15098 saved_regs_mask
|= mask
;
15105 operands
[1] = operands
[0];
15106 operands
[2] = GEN_INT (amount
);
15107 output_add_immediate (operands
);
15109 if (frame_pointer_needed
)
15110 asm_fprintf (f
, "\tmov\t%r, %r\n",
15111 SP_REGNUM
, HARD_FRAME_POINTER_REGNUM
);
15114 if (TARGET_FPA_EMU2
)
15116 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
15117 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15118 asm_fprintf (f
, "\tldfe\t%r, [%r], #12\n",
15123 start_reg
= FIRST_FPA_REGNUM
;
15125 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
15127 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15129 if (reg
- start_reg
== 3)
15131 asm_fprintf (f
, "\tlfmfd\t%r, 4, [%r]!\n",
15132 start_reg
, SP_REGNUM
);
15133 start_reg
= reg
+ 1;
15138 if (reg
!= start_reg
)
15139 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
15140 start_reg
, reg
- start_reg
,
15143 start_reg
= reg
+ 1;
15147 /* Just in case the last register checked also needs unstacking. */
15148 if (reg
!= start_reg
)
15149 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
15150 start_reg
, reg
- start_reg
, SP_REGNUM
);
15153 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15155 int end_reg
= LAST_VFP_REGNUM
+ 1;
15157 /* Scan the registers in reverse order. We need to match
15158 any groupings made in the prologue and generate matching
15160 for (reg
= LAST_VFP_REGNUM
- 1; reg
>= FIRST_VFP_REGNUM
; reg
-= 2)
15162 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
15163 && (!df_regs_ever_live_p (reg
+ 1)
15164 || call_used_regs
[reg
+ 1]))
15166 if (end_reg
> reg
+ 2)
15167 vfp_output_fldmd (f
, SP_REGNUM
,
15168 (reg
+ 2 - FIRST_VFP_REGNUM
) / 2,
15169 (end_reg
- (reg
+ 2)) / 2);
15173 if (end_reg
> reg
+ 2)
15174 vfp_output_fldmd (f
, SP_REGNUM
, 0,
15175 (end_reg
- (reg
+ 2)) / 2);
15179 for (reg
= FIRST_IWMMXT_REGNUM
; reg
<= LAST_IWMMXT_REGNUM
; reg
++)
15180 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15181 asm_fprintf (f
, "\twldrd\t%r, [%r], #8\n", reg
, SP_REGNUM
);
15183 /* If we can, restore the LR into the PC. */
15184 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
15185 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
15186 && !IS_STACKALIGN (func_type
)
15188 && crtl
->args
.pretend_args_size
== 0
15189 && saved_regs_mask
& (1 << LR_REGNUM
)
15190 && !crtl
->calls_eh_return
)
15192 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
15193 saved_regs_mask
|= (1 << PC_REGNUM
);
15194 rfe
= IS_INTERRUPT (func_type
);
15199 /* Load the registers off the stack. If we only have one register
15200 to load use the LDR instruction - it is faster. For Thumb-2
15201 always use pop and the assembler will pick the best instruction.*/
15202 if (TARGET_ARM
&& saved_regs_mask
== (1 << LR_REGNUM
)
15203 && !IS_INTERRUPT(func_type
))
15205 asm_fprintf (f
, "\tldr\t%r, [%r], #4\n", LR_REGNUM
, SP_REGNUM
);
15207 else if (saved_regs_mask
)
15209 if (saved_regs_mask
& (1 << SP_REGNUM
))
15210 /* Note - write back to the stack register is not enabled
15211 (i.e. "ldmfd sp!..."). We know that the stack pointer is
15212 in the list of registers and if we add writeback the
15213 instruction becomes UNPREDICTABLE. */
15214 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
,
15216 else if (TARGET_ARM
)
15217 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, saved_regs_mask
,
15220 print_multi_reg (f
, "pop\t", SP_REGNUM
, saved_regs_mask
, 0);
15223 if (crtl
->args
.pretend_args_size
)
15225 /* Unwind the pre-pushed regs. */
15226 operands
[0] = operands
[1] = stack_pointer_rtx
;
15227 operands
[2] = GEN_INT (crtl
->args
.pretend_args_size
);
15228 output_add_immediate (operands
);
15232 /* We may have already restored PC directly from the stack. */
15233 if (!really_return
|| saved_regs_mask
& (1 << PC_REGNUM
))
15236 /* Stack adjustment for exception handler. */
15237 if (crtl
->calls_eh_return
)
15238 asm_fprintf (f
, "\tadd\t%r, %r, %r\n", SP_REGNUM
, SP_REGNUM
,
15239 ARM_EH_STACKADJ_REGNUM
);
15241 /* Generate the return instruction. */
15242 switch ((int) ARM_FUNC_TYPE (func_type
))
15246 asm_fprintf (f
, "\tsubs\t%r, %r, #4\n", PC_REGNUM
, LR_REGNUM
);
15249 case ARM_FT_EXCEPTION
:
15250 asm_fprintf (f
, "\tmovs\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
15253 case ARM_FT_INTERWORKED
:
15254 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
15258 if (IS_STACKALIGN (func_type
))
15260 /* See comment in arm_expand_prologue. */
15261 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, 0);
15263 if (arm_arch5
|| arm_arch4t
)
15264 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
15266 asm_fprintf (f
, "\tmov\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
15274 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
15275 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
15277 arm_stack_offsets
*offsets
;
15283 /* Emit any call-via-reg trampolines that are needed for v4t support
15284 of call_reg and call_value_reg type insns. */
15285 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
15287 rtx label
= cfun
->machine
->call_via
[regno
];
15291 switch_to_section (function_section (current_function_decl
));
15292 targetm
.asm_out
.internal_label (asm_out_file
, "L",
15293 CODE_LABEL_NUMBER (label
));
15294 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
15298 /* ??? Probably not safe to set this here, since it assumes that a
15299 function will be emitted as assembly immediately after we generate
15300 RTL for it. This does not happen for inline functions. */
15301 cfun
->machine
->return_used_this_function
= 0;
15303 else /* TARGET_32BIT */
15305 /* We need to take into account any stack-frame rounding. */
15306 offsets
= arm_get_frame_offsets ();
15308 gcc_assert (!use_return_insn (FALSE
, NULL
)
15309 || (cfun
->machine
->return_used_this_function
!= 0)
15310 || offsets
->saved_regs
== offsets
->outgoing_args
15311 || frame_pointer_needed
);
15313 /* Reset the ARM-specific per-function variables. */
15314 after_arm_reorg
= 0;
15318 /* Generate and emit an insn that we will recognize as a push_multi.
15319 Unfortunately, since this insn does not reflect very well the actual
15320 semantics of the operation, we need to annotate the insn for the benefit
15321 of DWARF2 frame unwind information. */
15323 emit_multi_reg_push (unsigned long mask
)
15326 int num_dwarf_regs
;
15330 int dwarf_par_index
;
15333 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15334 if (mask
& (1 << i
))
15337 gcc_assert (num_regs
&& num_regs
<= 16);
15339 /* We don't record the PC in the dwarf frame information. */
15340 num_dwarf_regs
= num_regs
;
15341 if (mask
& (1 << PC_REGNUM
))
15344 /* For the body of the insn we are going to generate an UNSPEC in
15345 parallel with several USEs. This allows the insn to be recognized
15346 by the push_multi pattern in the arm.md file.
15348 The body of the insn looks something like this:
15351 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15352 (const_int:SI <num>)))
15353 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15359 For the frame note however, we try to be more explicit and actually
15360 show each register being stored into the stack frame, plus a (single)
15361 decrement of the stack pointer. We do it this way in order to be
15362 friendly to the stack unwinding code, which only wants to see a single
15363 stack decrement per instruction. The RTL we generate for the note looks
15364 something like this:
15367 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15368 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15369 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15370 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15374 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15375 instead we'd have a parallel expression detailing all
15376 the stores to the various memory addresses so that debug
15377 information is more up-to-date. Remember however while writing
15378 this to take care of the constraints with the push instruction.
15380 Note also that this has to be taken care of for the VFP registers.
15382 For more see PR43399. */
15384 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
15385 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
15386 dwarf_par_index
= 1;
15388 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15390 if (mask
& (1 << i
))
15392 reg
= gen_rtx_REG (SImode
, i
);
15394 XVECEXP (par
, 0, 0)
15395 = gen_rtx_SET (VOIDmode
,
15398 gen_rtx_PRE_MODIFY (Pmode
,
15401 (stack_pointer_rtx
,
15404 gen_rtx_UNSPEC (BLKmode
,
15405 gen_rtvec (1, reg
),
15406 UNSPEC_PUSH_MULT
));
15408 if (i
!= PC_REGNUM
)
15410 tmp
= gen_rtx_SET (VOIDmode
,
15411 gen_frame_mem (SImode
, stack_pointer_rtx
),
15413 RTX_FRAME_RELATED_P (tmp
) = 1;
15414 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
15422 for (j
= 1, i
++; j
< num_regs
; i
++)
15424 if (mask
& (1 << i
))
15426 reg
= gen_rtx_REG (SImode
, i
);
15428 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
15430 if (i
!= PC_REGNUM
)
15433 = gen_rtx_SET (VOIDmode
,
15436 plus_constant (stack_pointer_rtx
,
15439 RTX_FRAME_RELATED_P (tmp
) = 1;
15440 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
15447 par
= emit_insn (par
);
15449 tmp
= gen_rtx_SET (VOIDmode
,
15451 plus_constant (stack_pointer_rtx
, -4 * num_regs
));
15452 RTX_FRAME_RELATED_P (tmp
) = 1;
15453 XVECEXP (dwarf
, 0, 0) = tmp
;
15455 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
15460 /* Calculate the size of the return value that is passed in registers. */
15462 arm_size_return_regs (void)
15464 enum machine_mode mode
;
15466 if (crtl
->return_rtx
!= 0)
15467 mode
= GET_MODE (crtl
->return_rtx
);
15469 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
15471 return GET_MODE_SIZE (mode
);
15475 emit_sfm (int base_reg
, int count
)
15482 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
15483 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
15485 reg
= gen_rtx_REG (XFmode
, base_reg
++);
15487 XVECEXP (par
, 0, 0)
15488 = gen_rtx_SET (VOIDmode
,
15491 gen_rtx_PRE_MODIFY (Pmode
,
15494 (stack_pointer_rtx
,
15497 gen_rtx_UNSPEC (BLKmode
,
15498 gen_rtvec (1, reg
),
15499 UNSPEC_PUSH_MULT
));
15500 tmp
= gen_rtx_SET (VOIDmode
,
15501 gen_frame_mem (XFmode
, stack_pointer_rtx
), reg
);
15502 RTX_FRAME_RELATED_P (tmp
) = 1;
15503 XVECEXP (dwarf
, 0, 1) = tmp
;
15505 for (i
= 1; i
< count
; i
++)
15507 reg
= gen_rtx_REG (XFmode
, base_reg
++);
15508 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
15510 tmp
= gen_rtx_SET (VOIDmode
,
15511 gen_frame_mem (XFmode
,
15512 plus_constant (stack_pointer_rtx
,
15515 RTX_FRAME_RELATED_P (tmp
) = 1;
15516 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
15519 tmp
= gen_rtx_SET (VOIDmode
,
15521 plus_constant (stack_pointer_rtx
, -12 * count
));
15523 RTX_FRAME_RELATED_P (tmp
) = 1;
15524 XVECEXP (dwarf
, 0, 0) = tmp
;
15526 par
= emit_insn (par
);
15527 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
15533 /* Return true if the current function needs to save/restore LR. */
15536 thumb_force_lr_save (void)
15538 return !cfun
->machine
->lr_save_eliminated
15539 && (!leaf_function_p ()
15540 || thumb_far_jump_used_p ()
15541 || df_regs_ever_live_p (LR_REGNUM
));
15545 /* Return true if r3 is used by any of the tail call insns in the
15546 current function. */
15549 any_sibcall_uses_r3 (void)
15554 if (!crtl
->tail_call_emit
)
15556 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
15557 if (e
->flags
& EDGE_SIBCALL
)
15559 rtx call
= BB_END (e
->src
);
15560 if (!CALL_P (call
))
15561 call
= prev_nonnote_nondebug_insn (call
);
15562 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
15563 if (find_regno_fusage (call
, USE
, 3))
15570 /* Compute the distance from register FROM to register TO.
15571 These can be the arg pointer (26), the soft frame pointer (25),
15572 the stack pointer (13) or the hard frame pointer (11).
15573 In thumb mode r7 is used as the soft frame pointer, if needed.
15574 Typical stack layout looks like this:
15576 old stack pointer -> | |
15579 | | saved arguments for
15580 | | vararg functions
15583 hard FP & arg pointer -> | | \
15591 soft frame pointer -> | | /
15596 locals base pointer -> | | /
15601 current stack pointer -> | | /
15604 For a given function some or all of these stack components
15605 may not be needed, giving rise to the possibility of
15606 eliminating some of the registers.
15608 The values returned by this function must reflect the behavior
15609 of arm_expand_prologue() and arm_compute_save_reg_mask().
15611 The sign of the number returned reflects the direction of stack
15612 growth, so the values are positive for all eliminations except
15613 from the soft frame pointer to the hard frame pointer.
15615 SFP may point just inside the local variables block to ensure correct
15619 /* Calculate stack offsets. These are used to calculate register elimination
15620 offsets and in prologue/epilogue code. Also calculates which registers
15621 should be saved. */
15623 static arm_stack_offsets
*
15624 arm_get_frame_offsets (void)
15626 struct arm_stack_offsets
*offsets
;
15627 unsigned long func_type
;
15631 HOST_WIDE_INT frame_size
;
15634 offsets
= &cfun
->machine
->stack_offsets
;
15636 /* We need to know if we are a leaf function. Unfortunately, it
15637 is possible to be called after start_sequence has been called,
15638 which causes get_insns to return the insns for the sequence,
15639 not the function, which will cause leaf_function_p to return
15640 the incorrect result.
15642 to know about leaf functions once reload has completed, and the
15643 frame size cannot be changed after that time, so we can safely
15644 use the cached value. */
15646 if (reload_completed
)
15649 /* Initially this is the size of the local variables. It will translated
15650 into an offset once we have determined the size of preceding data. */
15651 frame_size
= ROUND_UP_WORD (get_frame_size ());
15653 leaf
= leaf_function_p ();
15655 /* Space for variadic functions. */
15656 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
15658 /* In Thumb mode this is incorrect, but never used. */
15659 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
15660 arm_compute_static_chain_stack_bytes();
15664 unsigned int regno
;
15666 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
15667 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
15668 saved
= core_saved
;
15670 /* We know that SP will be doubleword aligned on entry, and we must
15671 preserve that condition at any subroutine call. We also require the
15672 soft frame pointer to be doubleword aligned. */
15674 if (TARGET_REALLY_IWMMXT
)
15676 /* Check for the call-saved iWMMXt registers. */
15677 for (regno
= FIRST_IWMMXT_REGNUM
;
15678 regno
<= LAST_IWMMXT_REGNUM
;
15680 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
15684 func_type
= arm_current_func_type ();
15685 if (! IS_VOLATILE (func_type
))
15687 /* Space for saved FPA registers. */
15688 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
15689 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
15692 /* Space for saved VFP registers. */
15693 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15694 saved
+= arm_get_vfp_saved_size ();
15697 else /* TARGET_THUMB1 */
15699 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
15700 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
15701 saved
= core_saved
;
15702 if (TARGET_BACKTRACE
)
15706 /* Saved registers include the stack frame. */
15707 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
15708 arm_compute_static_chain_stack_bytes();
15709 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
15710 /* A leaf function does not need any stack alignment if it has nothing
15712 if (leaf
&& frame_size
== 0
15713 /* However if it calls alloca(), we have a dynamically allocated
15714 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
15715 && ! cfun
->calls_alloca
)
15717 offsets
->outgoing_args
= offsets
->soft_frame
;
15718 offsets
->locals_base
= offsets
->soft_frame
;
15722 /* Ensure SFP has the correct alignment. */
15723 if (ARM_DOUBLEWORD_ALIGN
15724 && (offsets
->soft_frame
& 7))
15726 offsets
->soft_frame
+= 4;
15727 /* Try to align stack by pushing an extra reg. Don't bother doing this
15728 when there is a stack frame as the alignment will be rolled into
15729 the normal stack adjustment. */
15730 if (frame_size
+ crtl
->outgoing_args_size
== 0)
15734 /* If it is safe to use r3, then do so. This sometimes
15735 generates better code on Thumb-2 by avoiding the need to
15736 use 32-bit push/pop instructions. */
15737 if (! any_sibcall_uses_r3 ()
15738 && arm_size_return_regs () <= 12
15739 && (offsets
->saved_regs_mask
& (1 << 3)) == 0)
15744 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
15746 if ((offsets
->saved_regs_mask
& (1 << i
)) == 0)
15755 offsets
->saved_regs
+= 4;
15756 offsets
->saved_regs_mask
|= (1 << reg
);
15761 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
15762 offsets
->outgoing_args
= (offsets
->locals_base
15763 + crtl
->outgoing_args_size
);
15765 if (ARM_DOUBLEWORD_ALIGN
)
15767 /* Ensure SP remains doubleword aligned. */
15768 if (offsets
->outgoing_args
& 7)
15769 offsets
->outgoing_args
+= 4;
15770 gcc_assert (!(offsets
->outgoing_args
& 7));
15777 /* Calculate the relative offsets for the different stack pointers. Positive
15778 offsets are in the direction of stack growth. */
15781 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
15783 arm_stack_offsets
*offsets
;
15785 offsets
= arm_get_frame_offsets ();
15787 /* OK, now we have enough information to compute the distances.
15788 There must be an entry in these switch tables for each pair
15789 of registers in ELIMINABLE_REGS, even if some of the entries
15790 seem to be redundant or useless. */
15793 case ARG_POINTER_REGNUM
:
15796 case THUMB_HARD_FRAME_POINTER_REGNUM
:
15799 case FRAME_POINTER_REGNUM
:
15800 /* This is the reverse of the soft frame pointer
15801 to hard frame pointer elimination below. */
15802 return offsets
->soft_frame
- offsets
->saved_args
;
15804 case ARM_HARD_FRAME_POINTER_REGNUM
:
15805 /* This is only non-zero in the case where the static chain register
15806 is stored above the frame. */
15807 return offsets
->frame
- offsets
->saved_args
- 4;
15809 case STACK_POINTER_REGNUM
:
15810 /* If nothing has been pushed on the stack at all
15811 then this will return -4. This *is* correct! */
15812 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
15815 gcc_unreachable ();
15817 gcc_unreachable ();
15819 case FRAME_POINTER_REGNUM
:
15822 case THUMB_HARD_FRAME_POINTER_REGNUM
:
15825 case ARM_HARD_FRAME_POINTER_REGNUM
:
15826 /* The hard frame pointer points to the top entry in the
15827 stack frame. The soft frame pointer to the bottom entry
15828 in the stack frame. If there is no stack frame at all,
15829 then they are identical. */
15831 return offsets
->frame
- offsets
->soft_frame
;
15833 case STACK_POINTER_REGNUM
:
15834 return offsets
->outgoing_args
- offsets
->soft_frame
;
15837 gcc_unreachable ();
15839 gcc_unreachable ();
15842 /* You cannot eliminate from the stack pointer.
15843 In theory you could eliminate from the hard frame
15844 pointer to the stack pointer, but this will never
15845 happen, since if a stack frame is not needed the
15846 hard frame pointer will never be used. */
15847 gcc_unreachable ();
15851 /* Given FROM and TO register numbers, say whether this elimination is
15852 allowed. Frame pointer elimination is automatically handled.
15854 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15855 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15856 pointer, we must eliminate FRAME_POINTER_REGNUM into
15857 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15858 ARG_POINTER_REGNUM. */
15861 arm_can_eliminate (const int from
, const int to
)
15863 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
15864 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
15865 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
15866 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
15870 /* Emit RTL to save coprocessor registers on function entry. Returns the
15871 number of bytes pushed. */
15874 arm_save_coproc_regs(void)
15876 int saved_size
= 0;
15878 unsigned start_reg
;
15881 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
15882 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
15884 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
15885 insn
= gen_rtx_MEM (V2SImode
, insn
);
15886 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
15887 RTX_FRAME_RELATED_P (insn
) = 1;
15891 /* Save any floating point call-saved registers used by this
15893 if (TARGET_FPA_EMU2
)
15895 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
15896 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15898 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
15899 insn
= gen_rtx_MEM (XFmode
, insn
);
15900 insn
= emit_set_insn (insn
, gen_rtx_REG (XFmode
, reg
));
15901 RTX_FRAME_RELATED_P (insn
) = 1;
15907 start_reg
= LAST_FPA_REGNUM
;
15909 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
15911 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15913 if (start_reg
- reg
== 3)
15915 insn
= emit_sfm (reg
, 4);
15916 RTX_FRAME_RELATED_P (insn
) = 1;
15918 start_reg
= reg
- 1;
15923 if (start_reg
!= reg
)
15925 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
15926 RTX_FRAME_RELATED_P (insn
) = 1;
15927 saved_size
+= (start_reg
- reg
) * 12;
15929 start_reg
= reg
- 1;
15933 if (start_reg
!= reg
)
15935 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
15936 saved_size
+= (start_reg
- reg
) * 12;
15937 RTX_FRAME_RELATED_P (insn
) = 1;
15940 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15942 start_reg
= FIRST_VFP_REGNUM
;
15944 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
15946 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
15947 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
15949 if (start_reg
!= reg
)
15950 saved_size
+= vfp_emit_fstmd (start_reg
,
15951 (reg
- start_reg
) / 2);
15952 start_reg
= reg
+ 2;
15955 if (start_reg
!= reg
)
15956 saved_size
+= vfp_emit_fstmd (start_reg
,
15957 (reg
- start_reg
) / 2);
15963 /* Set the Thumb frame pointer from the stack pointer. */
15966 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
15968 HOST_WIDE_INT amount
;
15971 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
15973 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15974 stack_pointer_rtx
, GEN_INT (amount
)));
15977 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
15978 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15979 expects the first two operands to be the same. */
15982 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15984 hard_frame_pointer_rtx
));
15988 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15989 hard_frame_pointer_rtx
,
15990 stack_pointer_rtx
));
15992 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
15993 plus_constant (stack_pointer_rtx
, amount
));
15994 RTX_FRAME_RELATED_P (dwarf
) = 1;
15995 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15998 RTX_FRAME_RELATED_P (insn
) = 1;
16001 /* Generate the prologue instructions for entry into an ARM or Thumb-2
16004 arm_expand_prologue (void)
16009 unsigned long live_regs_mask
;
16010 unsigned long func_type
;
16012 int saved_pretend_args
= 0;
16013 int saved_regs
= 0;
16014 unsigned HOST_WIDE_INT args_to_push
;
16015 arm_stack_offsets
*offsets
;
16017 func_type
= arm_current_func_type ();
16019 /* Naked functions don't have prologues. */
16020 if (IS_NAKED (func_type
))
16023 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
16024 args_to_push
= crtl
->args
.pretend_args_size
;
16026 /* Compute which register we will have to save onto the stack. */
16027 offsets
= arm_get_frame_offsets ();
16028 live_regs_mask
= offsets
->saved_regs_mask
;
16030 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
16032 if (IS_STACKALIGN (func_type
))
16036 /* Handle a word-aligned stack pointer. We generate the following:
16041 <save and restore r0 in normal prologue/epilogue>
16045 The unwinder doesn't need to know about the stack realignment.
16046 Just tell it we saved SP in r0. */
16047 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
16049 r0
= gen_rtx_REG (SImode
, 0);
16050 r1
= gen_rtx_REG (SImode
, 1);
16052 insn
= emit_insn (gen_movsi (r0
, stack_pointer_rtx
));
16053 RTX_FRAME_RELATED_P (insn
) = 1;
16054 add_reg_note (insn
, REG_CFA_REGISTER
, NULL
);
16056 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
16058 /* ??? The CFA changes here, which may cause GDB to conclude that it
16059 has entered a different function. That said, the unwind info is
16060 correct, individually, before and after this instruction because
16061 we've described the save of SP, which will override the default
16062 handling of SP as restoring from the CFA. */
16063 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
16066 /* For APCS frames, if IP register is clobbered
16067 when creating frame, save that register in a special
16069 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
16071 if (IS_INTERRUPT (func_type
))
16073 /* Interrupt functions must not corrupt any registers.
16074 Creating a frame pointer however, corrupts the IP
16075 register, so we must push it first. */
16076 emit_multi_reg_push (1 << IP_REGNUM
);
16078 /* Do not set RTX_FRAME_RELATED_P on this insn.
16079 The dwarf stack unwinding code only wants to see one
16080 stack decrement per function, and this is not it. If
16081 this instruction is labeled as being part of the frame
16082 creation sequence then dwarf2out_frame_debug_expr will
16083 die when it encounters the assignment of IP to FP
16084 later on, since the use of SP here establishes SP as
16085 the CFA register and not IP.
16087 Anyway this instruction is not really part of the stack
16088 frame creation although it is part of the prologue. */
16090 else if (IS_NESTED (func_type
))
16092 /* The Static chain register is the same as the IP register
16093 used as a scratch register during stack frame creation.
16094 To get around this need to find somewhere to store IP
16095 whilst the frame is being created. We try the following
16098 1. The last argument register.
16099 2. A slot on the stack above the frame. (This only
16100 works if the function is not a varargs function).
16101 3. Register r3, after pushing the argument registers
16104 Note - we only need to tell the dwarf2 backend about the SP
16105 adjustment in the second variant; the static chain register
16106 doesn't need to be unwound, as it doesn't contain a value
16107 inherited from the caller. */
16109 if (df_regs_ever_live_p (3) == false)
16110 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
16111 else if (args_to_push
== 0)
16115 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
16118 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
16119 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
16122 /* Just tell the dwarf backend that we adjusted SP. */
16123 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
16124 plus_constant (stack_pointer_rtx
,
16126 RTX_FRAME_RELATED_P (insn
) = 1;
16127 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
16131 /* Store the args on the stack. */
16132 if (cfun
->machine
->uses_anonymous_args
)
16133 insn
= emit_multi_reg_push
16134 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
16137 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16138 GEN_INT (- args_to_push
)));
16140 RTX_FRAME_RELATED_P (insn
) = 1;
16142 saved_pretend_args
= 1;
16143 fp_offset
= args_to_push
;
16146 /* Now reuse r3 to preserve IP. */
16147 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
16151 insn
= emit_set_insn (ip_rtx
,
16152 plus_constant (stack_pointer_rtx
, fp_offset
));
16153 RTX_FRAME_RELATED_P (insn
) = 1;
16158 /* Push the argument registers, or reserve space for them. */
16159 if (cfun
->machine
->uses_anonymous_args
)
16160 insn
= emit_multi_reg_push
16161 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
16164 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16165 GEN_INT (- args_to_push
)));
16166 RTX_FRAME_RELATED_P (insn
) = 1;
16169 /* If this is an interrupt service routine, and the link register
16170 is going to be pushed, and we're not generating extra
16171 push of IP (needed when frame is needed and frame layout if apcs),
16172 subtracting four from LR now will mean that the function return
16173 can be done with a single instruction. */
16174 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
16175 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
16176 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
16179 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
16181 emit_set_insn (lr
, plus_constant (lr
, -4));
16184 if (live_regs_mask
)
16186 saved_regs
+= bit_count (live_regs_mask
) * 4;
16187 if (optimize_size
&& !frame_pointer_needed
16188 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
16190 /* If no coprocessor registers are being pushed and we don't have
16191 to worry about a frame pointer then push extra registers to
16192 create the stack frame. This is done is a way that does not
16193 alter the frame layout, so is independent of the epilogue. */
16197 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
16199 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
16200 if (frame
&& n
* 4 >= frame
)
16203 live_regs_mask
|= (1 << n
) - 1;
16204 saved_regs
+= frame
;
16207 insn
= emit_multi_reg_push (live_regs_mask
);
16208 RTX_FRAME_RELATED_P (insn
) = 1;
16211 if (! IS_VOLATILE (func_type
))
16212 saved_regs
+= arm_save_coproc_regs ();
16214 if (frame_pointer_needed
&& TARGET_ARM
)
16216 /* Create the new frame pointer. */
16217 if (TARGET_APCS_FRAME
)
16219 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
16220 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
16221 RTX_FRAME_RELATED_P (insn
) = 1;
16223 if (IS_NESTED (func_type
))
16225 /* Recover the static chain register. */
16226 if (!df_regs_ever_live_p (3)
16227 || saved_pretend_args
)
16228 insn
= gen_rtx_REG (SImode
, 3);
16229 else /* if (crtl->args.pretend_args_size == 0) */
16231 insn
= plus_constant (hard_frame_pointer_rtx
, 4);
16232 insn
= gen_frame_mem (SImode
, insn
);
16234 emit_set_insn (ip_rtx
, insn
);
16235 /* Add a USE to stop propagate_one_insn() from barfing. */
16236 emit_insn (gen_prologue_use (ip_rtx
));
16241 insn
= GEN_INT (saved_regs
- 4);
16242 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
16243 stack_pointer_rtx
, insn
));
16244 RTX_FRAME_RELATED_P (insn
) = 1;
16248 if (flag_stack_usage_info
)
16249 current_function_static_stack_size
16250 = offsets
->outgoing_args
- offsets
->saved_args
;
16252 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
16254 /* This add can produce multiple insns for a large constant, so we
16255 need to get tricky. */
16256 rtx last
= get_last_insn ();
16258 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
16259 - offsets
->outgoing_args
);
16261 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16265 last
= last
? NEXT_INSN (last
) : get_insns ();
16266 RTX_FRAME_RELATED_P (last
) = 1;
16268 while (last
!= insn
);
16270 /* If the frame pointer is needed, emit a special barrier that
16271 will prevent the scheduler from moving stores to the frame
16272 before the stack adjustment. */
16273 if (frame_pointer_needed
)
16274 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
16275 hard_frame_pointer_rtx
));
16279 if (frame_pointer_needed
&& TARGET_THUMB2
)
16280 thumb_set_frame_pointer (offsets
);
16282 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
16284 unsigned long mask
;
16286 mask
= live_regs_mask
;
16287 mask
&= THUMB2_WORK_REGS
;
16288 if (!IS_NESTED (func_type
))
16289 mask
|= (1 << IP_REGNUM
);
16290 arm_load_pic_register (mask
);
16293 /* If we are profiling, make sure no instructions are scheduled before
16294 the call to mcount. Similarly if the user has requested no
16295 scheduling in the prolog. Similarly if we want non-call exceptions
16296 using the EABI unwinder, to prevent faulting instructions from being
16297 swapped with a stack adjustment. */
16298 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
16299 || (arm_except_unwind_info (&global_options
) == UI_TARGET
16300 && cfun
->can_throw_non_call_exceptions
))
16301 emit_insn (gen_blockage ());
16303 /* If the link register is being kept alive, with the return address in it,
16304 then make sure that it does not get reused by the ce2 pass. */
16305 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
16306 cfun
->machine
->lr_save_eliminated
= 1;
16309 /* Print condition code to STREAM. Helper function for arm_print_operand. */
16311 arm_print_condition (FILE *stream
)
16313 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
16315 /* Branch conversion is not implemented for Thumb-2. */
16318 output_operand_lossage ("predicated Thumb instruction");
16321 if (current_insn_predicate
!= NULL
)
16323 output_operand_lossage
16324 ("predicated instruction in conditional sequence");
16328 fputs (arm_condition_codes
[arm_current_cc
], stream
);
16330 else if (current_insn_predicate
)
16332 enum arm_cond_code code
;
16336 output_operand_lossage ("predicated Thumb instruction");
16340 code
= get_arm_condition_code (current_insn_predicate
);
16341 fputs (arm_condition_codes
[code
], stream
);
16346 /* If CODE is 'd', then the X is a condition operand and the instruction
16347 should only be executed if the condition is true.
16348 if CODE is 'D', then the X is a condition operand and the instruction
16349 should only be executed if the condition is false: however, if the mode
16350 of the comparison is CCFPEmode, then always execute the instruction -- we
16351 do this because in these circumstances !GE does not necessarily imply LT;
16352 in these cases the instruction pattern will take care to make sure that
16353 an instruction containing %d will follow, thereby undoing the effects of
16354 doing this instruction unconditionally.
16355 If CODE is 'N' then X is a floating point operand that must be negated
16357 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16358 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16360 arm_print_operand (FILE *stream
, rtx x
, int code
)
16365 fputs (ASM_COMMENT_START
, stream
);
16369 fputs (user_label_prefix
, stream
);
16373 fputs (REGISTER_PREFIX
, stream
);
16377 arm_print_condition (stream
);
16381 /* Nothing in unified syntax, otherwise the current condition code. */
16382 if (!TARGET_UNIFIED_ASM
)
16383 arm_print_condition (stream
);
16387 /* The current condition code in unified syntax, otherwise nothing. */
16388 if (TARGET_UNIFIED_ASM
)
16389 arm_print_condition (stream
);
16393 /* The current condition code for a condition code setting instruction.
16394 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16395 if (TARGET_UNIFIED_ASM
)
16397 fputc('s', stream
);
16398 arm_print_condition (stream
);
16402 arm_print_condition (stream
);
16403 fputc('s', stream
);
16408 /* If the instruction is conditionally executed then print
16409 the current condition code, otherwise print 's'. */
16410 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
16411 if (current_insn_predicate
)
16412 arm_print_condition (stream
);
16414 fputc('s', stream
);
16417 /* %# is a "break" sequence. It doesn't output anything, but is used to
16418 separate e.g. operand numbers from following text, if that text consists
16419 of further digits which we don't want to be part of the operand
16427 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
16428 r
= real_value_negate (&r
);
16429 fprintf (stream
, "%s", fp_const_from_val (&r
));
16433 /* An integer or symbol address without a preceding # sign. */
16435 switch (GET_CODE (x
))
16438 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
16442 output_addr_const (stream
, x
);
16446 if (GET_CODE (XEXP (x
, 0)) == PLUS
16447 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
16449 output_addr_const (stream
, x
);
16452 /* Fall through. */
16455 output_operand_lossage ("Unsupported operand for code '%c'", code
);
16460 if (GET_CODE (x
) == CONST_INT
)
16463 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
16464 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
16468 putc ('~', stream
);
16469 output_addr_const (stream
, x
);
16474 /* The low 16 bits of an immediate constant. */
16475 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
16479 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
16482 /* Truncate Cirrus shift counts. */
16484 if (GET_CODE (x
) == CONST_INT
)
16486 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 0x3f);
16489 arm_print_operand (stream
, x
, 0);
16493 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
16501 if (!shift_operator (x
, SImode
))
16503 output_operand_lossage ("invalid shift operand");
16507 shift
= shift_op (x
, &val
);
16511 fprintf (stream
, ", %s ", shift
);
16513 arm_print_operand (stream
, XEXP (x
, 1), 0);
16515 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
16520 /* An explanation of the 'Q', 'R' and 'H' register operands:
16522 In a pair of registers containing a DI or DF value the 'Q'
16523 operand returns the register number of the register containing
16524 the least significant part of the value. The 'R' operand returns
16525 the register number of the register containing the most
16526 significant part of the value.
16528 The 'H' operand returns the higher of the two register numbers.
16529 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16530 same as the 'Q' operand, since the most significant part of the
16531 value is held in the lower number register. The reverse is true
16532 on systems where WORDS_BIG_ENDIAN is false.
16534 The purpose of these operands is to distinguish between cases
16535 where the endian-ness of the values is important (for example
16536 when they are added together), and cases where the endian-ness
16537 is irrelevant, but the order of register operations is important.
16538 For example when loading a value from memory into a register
16539 pair, the endian-ness does not matter. Provided that the value
16540 from the lower memory address is put into the lower numbered
16541 register, and the value from the higher address is put into the
16542 higher numbered register, the load will work regardless of whether
16543 the value being loaded is big-wordian or little-wordian. The
16544 order of the two register loads can matter however, if the address
16545 of the memory location is actually held in one of the registers
16546 being overwritten by the load.
16548 The 'Q' and 'R' constraints are also available for 64-bit
16551 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
16553 rtx part
= gen_lowpart (SImode
, x
);
16554 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
16558 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16560 output_operand_lossage ("invalid operand for code '%c'", code
);
16564 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
16568 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
16570 enum machine_mode mode
= GET_MODE (x
);
16573 if (mode
== VOIDmode
)
16575 part
= gen_highpart_mode (SImode
, mode
, x
);
16576 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
16580 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16582 output_operand_lossage ("invalid operand for code '%c'", code
);
16586 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
16590 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16592 output_operand_lossage ("invalid operand for code '%c'", code
);
16596 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
16600 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16602 output_operand_lossage ("invalid operand for code '%c'", code
);
16606 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
16610 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16612 output_operand_lossage ("invalid operand for code '%c'", code
);
16616 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
16620 asm_fprintf (stream
, "%r",
16621 GET_CODE (XEXP (x
, 0)) == REG
16622 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
16626 asm_fprintf (stream
, "{%r-%r}",
16628 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
16631 /* Like 'M', but writing doubleword vector registers, for use by Neon
16635 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
16636 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
16638 asm_fprintf (stream
, "{d%d}", regno
);
16640 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
16645 /* CONST_TRUE_RTX means always -- that's the default. */
16646 if (x
== const_true_rtx
)
16649 if (!COMPARISON_P (x
))
16651 output_operand_lossage ("invalid operand for code '%c'", code
);
16655 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
16660 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16661 want to do that. */
16662 if (x
== const_true_rtx
)
16664 output_operand_lossage ("instruction never executed");
16667 if (!COMPARISON_P (x
))
16669 output_operand_lossage ("invalid operand for code '%c'", code
);
16673 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
16674 (get_arm_condition_code (x
))],
16678 /* Cirrus registers can be accessed in a variety of ways:
16679 single floating point (f)
16680 double floating point (d)
16682 64bit integer (dx). */
16683 case 'W': /* Cirrus register in F mode. */
16684 case 'X': /* Cirrus register in D mode. */
16685 case 'Y': /* Cirrus register in FX mode. */
16686 case 'Z': /* Cirrus register in DX mode. */
16687 gcc_assert (GET_CODE (x
) == REG
16688 && REGNO_REG_CLASS (REGNO (x
)) == CIRRUS_REGS
);
16690 fprintf (stream
, "mv%s%s",
16692 : code
== 'X' ? "d"
16693 : code
== 'Y' ? "fx" : "dx", reg_names
[REGNO (x
)] + 2);
16697 /* Print cirrus register in the mode specified by the register's mode. */
16700 int mode
= GET_MODE (x
);
16702 if (GET_CODE (x
) != REG
|| REGNO_REG_CLASS (REGNO (x
)) != CIRRUS_REGS
)
16704 output_operand_lossage ("invalid operand for code '%c'", code
);
16708 fprintf (stream
, "mv%s%s",
16709 mode
== DFmode
? "d"
16710 : mode
== SImode
? "fx"
16711 : mode
== DImode
? "dx"
16712 : "f", reg_names
[REGNO (x
)] + 2);
16718 if (GET_CODE (x
) != REG
16719 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
16720 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
16721 /* Bad value for wCG register number. */
16723 output_operand_lossage ("invalid operand for code '%c'", code
);
16728 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
16731 /* Print an iWMMXt control register name. */
16733 if (GET_CODE (x
) != CONST_INT
16735 || INTVAL (x
) >= 16)
16736 /* Bad value for wC register number. */
16738 output_operand_lossage ("invalid operand for code '%c'", code
);
16744 static const char * wc_reg_names
[16] =
16746 "wCID", "wCon", "wCSSF", "wCASF",
16747 "wC4", "wC5", "wC6", "wC7",
16748 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16749 "wC12", "wC13", "wC14", "wC15"
16752 fprintf (stream
, wc_reg_names
[INTVAL (x
)]);
16756 /* Print the high single-precision register of a VFP double-precision
16760 int mode
= GET_MODE (x
);
16763 if (GET_MODE_SIZE (mode
) != 8 || GET_CODE (x
) != REG
)
16765 output_operand_lossage ("invalid operand for code '%c'", code
);
16770 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
16772 output_operand_lossage ("invalid operand for code '%c'", code
);
16776 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
16780 /* Print a VFP/Neon double precision or quad precision register name. */
16784 int mode
= GET_MODE (x
);
16785 int is_quad
= (code
== 'q');
16788 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
16790 output_operand_lossage ("invalid operand for code '%c'", code
);
16794 if (GET_CODE (x
) != REG
16795 || !IS_VFP_REGNUM (REGNO (x
)))
16797 output_operand_lossage ("invalid operand for code '%c'", code
);
16802 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
16803 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
16805 output_operand_lossage ("invalid operand for code '%c'", code
);
16809 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
16810 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
16814 /* These two codes print the low/high doubleword register of a Neon quad
16815 register, respectively. For pair-structure types, can also print
16816 low/high quadword registers. */
16820 int mode
= GET_MODE (x
);
16823 if ((GET_MODE_SIZE (mode
) != 16
16824 && GET_MODE_SIZE (mode
) != 32) || GET_CODE (x
) != REG
)
16826 output_operand_lossage ("invalid operand for code '%c'", code
);
16831 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
16833 output_operand_lossage ("invalid operand for code '%c'", code
);
16837 if (GET_MODE_SIZE (mode
) == 16)
16838 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
16839 + (code
== 'f' ? 1 : 0));
16841 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
16842 + (code
== 'f' ? 1 : 0));
16846 /* Print a VFPv3 floating-point constant, represented as an integer
16850 int index
= vfp3_const_double_index (x
);
16851 gcc_assert (index
!= -1);
16852 fprintf (stream
, "%d", index
);
16856 /* Print bits representing opcode features for Neon.
16858 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16859 and polynomials as unsigned.
16861 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16863 Bit 2 is 1 for rounding functions, 0 otherwise. */
16865 /* Identify the type as 's', 'u', 'p' or 'f'. */
16868 HOST_WIDE_INT bits
= INTVAL (x
);
16869 fputc ("uspf"[bits
& 3], stream
);
16873 /* Likewise, but signed and unsigned integers are both 'i'. */
16876 HOST_WIDE_INT bits
= INTVAL (x
);
16877 fputc ("iipf"[bits
& 3], stream
);
16881 /* As for 'T', but emit 'u' instead of 'p'. */
16884 HOST_WIDE_INT bits
= INTVAL (x
);
16885 fputc ("usuf"[bits
& 3], stream
);
16889 /* Bit 2: rounding (vs none). */
16892 HOST_WIDE_INT bits
= INTVAL (x
);
16893 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
16897 /* Memory operand for vld1/vst1 instruction. */
16901 bool postinc
= FALSE
;
16902 unsigned align
, memsize
, align_bits
;
16904 gcc_assert (GET_CODE (x
) == MEM
);
16905 addr
= XEXP (x
, 0);
16906 if (GET_CODE (addr
) == POST_INC
)
16909 addr
= XEXP (addr
, 0);
16911 asm_fprintf (stream
, "[%r", REGNO (addr
));
16913 /* We know the alignment of this access, so we can emit a hint in the
16914 instruction (for some alignments) as an aid to the memory subsystem
16916 align
= MEM_ALIGN (x
) >> 3;
16917 memsize
= MEM_SIZE (x
);
16919 /* Only certain alignment specifiers are supported by the hardware. */
16920 if (memsize
== 16 && (align
% 32) == 0)
16922 else if ((memsize
== 8 || memsize
== 16) && (align
% 16) == 0)
16924 else if ((align
% 8) == 0)
16929 if (align_bits
!= 0)
16930 asm_fprintf (stream
, ":%d", align_bits
);
16932 asm_fprintf (stream
, "]");
16935 fputs("!", stream
);
16943 gcc_assert (GET_CODE (x
) == MEM
);
16944 addr
= XEXP (x
, 0);
16945 gcc_assert (GET_CODE (addr
) == REG
);
16946 asm_fprintf (stream
, "[%r]", REGNO (addr
));
16950 /* Translate an S register number into a D register number and element index. */
16953 int mode
= GET_MODE (x
);
16956 if (GET_MODE_SIZE (mode
) != 4 || GET_CODE (x
) != REG
)
16958 output_operand_lossage ("invalid operand for code '%c'", code
);
16963 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
16965 output_operand_lossage ("invalid operand for code '%c'", code
);
16969 regno
= regno
- FIRST_VFP_REGNUM
;
16970 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
16974 /* Register specifier for vld1.16/vst1.16. Translate the S register
16975 number into a D register number and element index. */
16978 int mode
= GET_MODE (x
);
16981 if (GET_MODE_SIZE (mode
) != 2 || GET_CODE (x
) != REG
)
16983 output_operand_lossage ("invalid operand for code '%c'", code
);
16988 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
16990 output_operand_lossage ("invalid operand for code '%c'", code
);
16994 regno
= regno
- FIRST_VFP_REGNUM
;
16995 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
17002 output_operand_lossage ("missing operand");
17006 switch (GET_CODE (x
))
17009 asm_fprintf (stream
, "%r", REGNO (x
));
17013 output_memory_reference_mode
= GET_MODE (x
);
17014 output_address (XEXP (x
, 0));
17021 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
17022 sizeof (fpstr
), 0, 1);
17023 fprintf (stream
, "#%s", fpstr
);
17026 fprintf (stream
, "#%s", fp_immediate_constant (x
));
17030 gcc_assert (GET_CODE (x
) != NEG
);
17031 fputc ('#', stream
);
17032 if (GET_CODE (x
) == HIGH
)
17034 fputs (":lower16:", stream
);
17038 output_addr_const (stream
, x
);
17044 /* Target hook for printing a memory address. */
17046 arm_print_operand_address (FILE *stream
, rtx x
)
17050 int is_minus
= GET_CODE (x
) == MINUS
;
17052 if (GET_CODE (x
) == REG
)
17053 asm_fprintf (stream
, "[%r, #0]", REGNO (x
));
17054 else if (GET_CODE (x
) == PLUS
|| is_minus
)
17056 rtx base
= XEXP (x
, 0);
17057 rtx index
= XEXP (x
, 1);
17058 HOST_WIDE_INT offset
= 0;
17059 if (GET_CODE (base
) != REG
17060 || (GET_CODE (index
) == REG
&& REGNO (index
) == SP_REGNUM
))
17062 /* Ensure that BASE is a register. */
17063 /* (one of them must be). */
17064 /* Also ensure the SP is not used as in index register. */
17069 switch (GET_CODE (index
))
17072 offset
= INTVAL (index
);
17075 asm_fprintf (stream
, "[%r, #%wd]",
17076 REGNO (base
), offset
);
17080 asm_fprintf (stream
, "[%r, %s%r]",
17081 REGNO (base
), is_minus
? "-" : "",
17091 asm_fprintf (stream
, "[%r, %s%r",
17092 REGNO (base
), is_minus
? "-" : "",
17093 REGNO (XEXP (index
, 0)));
17094 arm_print_operand (stream
, index
, 'S');
17095 fputs ("]", stream
);
17100 gcc_unreachable ();
17103 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
17104 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
17106 extern enum machine_mode output_memory_reference_mode
;
17108 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
17110 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
17111 asm_fprintf (stream
, "[%r, #%s%d]!",
17112 REGNO (XEXP (x
, 0)),
17113 GET_CODE (x
) == PRE_DEC
? "-" : "",
17114 GET_MODE_SIZE (output_memory_reference_mode
));
17116 asm_fprintf (stream
, "[%r], #%s%d",
17117 REGNO (XEXP (x
, 0)),
17118 GET_CODE (x
) == POST_DEC
? "-" : "",
17119 GET_MODE_SIZE (output_memory_reference_mode
));
17121 else if (GET_CODE (x
) == PRE_MODIFY
)
17123 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
17124 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
17125 asm_fprintf (stream
, "#%wd]!",
17126 INTVAL (XEXP (XEXP (x
, 1), 1)));
17128 asm_fprintf (stream
, "%r]!",
17129 REGNO (XEXP (XEXP (x
, 1), 1)));
17131 else if (GET_CODE (x
) == POST_MODIFY
)
17133 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
17134 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
17135 asm_fprintf (stream
, "#%wd",
17136 INTVAL (XEXP (XEXP (x
, 1), 1)));
17138 asm_fprintf (stream
, "%r",
17139 REGNO (XEXP (XEXP (x
, 1), 1)));
17141 else output_addr_const (stream
, x
);
17145 if (GET_CODE (x
) == REG
)
17146 asm_fprintf (stream
, "[%r]", REGNO (x
));
17147 else if (GET_CODE (x
) == POST_INC
)
17148 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
17149 else if (GET_CODE (x
) == PLUS
)
17151 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
17152 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
17153 asm_fprintf (stream
, "[%r, #%wd]",
17154 REGNO (XEXP (x
, 0)),
17155 INTVAL (XEXP (x
, 1)));
17157 asm_fprintf (stream
, "[%r, %r]",
17158 REGNO (XEXP (x
, 0)),
17159 REGNO (XEXP (x
, 1)));
17162 output_addr_const (stream
, x
);
17166 /* Target hook for indicating whether a punctuation character for
17167 TARGET_PRINT_OPERAND is valid. */
17169 arm_print_operand_punct_valid_p (unsigned char code
)
17171 return (code
== '@' || code
== '|' || code
== '.'
17172 || code
== '(' || code
== ')' || code
== '#'
17173 || (TARGET_32BIT
&& (code
== '?'))
17174 || (TARGET_THUMB2
&& (code
== '!'))
17175 || (TARGET_THUMB
&& (code
== '_')));
17178 /* Target hook for assembling integer objects. The ARM version needs to
17179 handle word-sized values specially. */
17181 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
17183 enum machine_mode mode
;
17185 if (size
== UNITS_PER_WORD
&& aligned_p
)
17187 fputs ("\t.word\t", asm_out_file
);
17188 output_addr_const (asm_out_file
, x
);
17190 /* Mark symbols as position independent. We only do this in the
17191 .text segment, not in the .data segment. */
17192 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
17193 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
17195 /* See legitimize_pic_address for an explanation of the
17196 TARGET_VXWORKS_RTP check. */
17197 if (TARGET_VXWORKS_RTP
17198 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
17199 fputs ("(GOT)", asm_out_file
);
17201 fputs ("(GOTOFF)", asm_out_file
);
17203 fputc ('\n', asm_out_file
);
17207 mode
= GET_MODE (x
);
17209 if (arm_vector_mode_supported_p (mode
))
17213 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
17215 units
= CONST_VECTOR_NUNITS (x
);
17216 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
17218 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
17219 for (i
= 0; i
< units
; i
++)
17221 rtx elt
= CONST_VECTOR_ELT (x
, i
);
17223 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
17226 for (i
= 0; i
< units
; i
++)
17228 rtx elt
= CONST_VECTOR_ELT (x
, i
);
17229 REAL_VALUE_TYPE rval
;
17231 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
17234 (rval
, GET_MODE_INNER (mode
),
17235 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
17241 return default_assemble_integer (x
, size
, aligned_p
);
17245 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
17249 if (!TARGET_AAPCS_BASED
)
17252 default_named_section_asm_out_constructor
17253 : default_named_section_asm_out_destructor
) (symbol
, priority
);
17257 /* Put these in the .init_array section, using a special relocation. */
17258 if (priority
!= DEFAULT_INIT_PRIORITY
)
17261 sprintf (buf
, "%s.%.5u",
17262 is_ctor
? ".init_array" : ".fini_array",
17264 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
17271 switch_to_section (s
);
17272 assemble_align (POINTER_SIZE
);
17273 fputs ("\t.word\t", asm_out_file
);
17274 output_addr_const (asm_out_file
, symbol
);
17275 fputs ("(target1)\n", asm_out_file
);
17278 /* Add a function to the list of static constructors. */
17281 arm_elf_asm_constructor (rtx symbol
, int priority
)
17283 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
17286 /* Add a function to the list of static destructors. */
17289 arm_elf_asm_destructor (rtx symbol
, int priority
)
17291 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
17294 /* A finite state machine takes care of noticing whether or not instructions
17295 can be conditionally executed, and thus decrease execution time and code
17296 size by deleting branch instructions. The fsm is controlled by
17297 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
17299 /* The state of the fsm controlling condition codes are:
17300 0: normal, do nothing special
17301 1: make ASM_OUTPUT_OPCODE not output this instruction
17302 2: make ASM_OUTPUT_OPCODE not output this instruction
17303 3: make instructions conditional
17304 4: make instructions conditional
17306 State transitions (state->state by whom under condition):
17307 0 -> 1 final_prescan_insn if the `target' is a label
17308 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
17309 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
17310 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
17311 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
17312 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
17313 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
17314 (the target insn is arm_target_insn).
17316 If the jump clobbers the conditions then we use states 2 and 4.
17318 A similar thing can be done with conditional return insns.
17320 XXX In case the `target' is an unconditional branch, this conditionalising
17321 of the instructions always reduces code size, but not always execution
17322 time. But then, I want to reduce the code size to somewhere near what
17323 /bin/cc produces. */
17325 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
17326 instructions. When a COND_EXEC instruction is seen the subsequent
17327 instructions are scanned so that multiple conditional instructions can be
17328 combined into a single IT block. arm_condexec_count and arm_condexec_mask
17329 specify the length and true/false mask for the IT block. These will be
17330 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
17332 /* Returns the index of the ARM condition code string in
17333 `arm_condition_codes'. COMPARISON should be an rtx like
17334 `(eq (...) (...))'. */
17335 static enum arm_cond_code
17336 get_arm_condition_code (rtx comparison
)
17338 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
17339 enum arm_cond_code code
;
17340 enum rtx_code comp_code
= GET_CODE (comparison
);
17342 if (GET_MODE_CLASS (mode
) != MODE_CC
)
17343 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
17344 XEXP (comparison
, 1));
17348 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
17349 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
17350 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
17351 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
17352 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
17353 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
17354 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
17355 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
17356 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
17357 case CC_DLTUmode
: code
= ARM_CC
;
17360 gcc_assert (comp_code
== EQ
|| comp_code
== NE
);
17362 if (comp_code
== EQ
)
17363 return ARM_INVERSE_CONDITION_CODE (code
);
17369 case NE
: return ARM_NE
;
17370 case EQ
: return ARM_EQ
;
17371 case GE
: return ARM_PL
;
17372 case LT
: return ARM_MI
;
17373 default: gcc_unreachable ();
17379 case NE
: return ARM_NE
;
17380 case EQ
: return ARM_EQ
;
17381 default: gcc_unreachable ();
17387 case NE
: return ARM_MI
;
17388 case EQ
: return ARM_PL
;
17389 default: gcc_unreachable ();
17394 /* These encodings assume that AC=1 in the FPA system control
17395 byte. This allows us to handle all cases except UNEQ and
17399 case GE
: return ARM_GE
;
17400 case GT
: return ARM_GT
;
17401 case LE
: return ARM_LS
;
17402 case LT
: return ARM_MI
;
17403 case NE
: return ARM_NE
;
17404 case EQ
: return ARM_EQ
;
17405 case ORDERED
: return ARM_VC
;
17406 case UNORDERED
: return ARM_VS
;
17407 case UNLT
: return ARM_LT
;
17408 case UNLE
: return ARM_LE
;
17409 case UNGT
: return ARM_HI
;
17410 case UNGE
: return ARM_PL
;
17411 /* UNEQ and LTGT do not have a representation. */
17412 case UNEQ
: /* Fall through. */
17413 case LTGT
: /* Fall through. */
17414 default: gcc_unreachable ();
17420 case NE
: return ARM_NE
;
17421 case EQ
: return ARM_EQ
;
17422 case GE
: return ARM_LE
;
17423 case GT
: return ARM_LT
;
17424 case LE
: return ARM_GE
;
17425 case LT
: return ARM_GT
;
17426 case GEU
: return ARM_LS
;
17427 case GTU
: return ARM_CC
;
17428 case LEU
: return ARM_CS
;
17429 case LTU
: return ARM_HI
;
17430 default: gcc_unreachable ();
17436 case LTU
: return ARM_CS
;
17437 case GEU
: return ARM_CC
;
17438 default: gcc_unreachable ();
17444 case NE
: return ARM_NE
;
17445 case EQ
: return ARM_EQ
;
17446 case GEU
: return ARM_CS
;
17447 case GTU
: return ARM_HI
;
17448 case LEU
: return ARM_LS
;
17449 case LTU
: return ARM_CC
;
17450 default: gcc_unreachable ();
17456 case GE
: return ARM_GE
;
17457 case LT
: return ARM_LT
;
17458 case GEU
: return ARM_CS
;
17459 case LTU
: return ARM_CC
;
17460 default: gcc_unreachable ();
17466 case NE
: return ARM_NE
;
17467 case EQ
: return ARM_EQ
;
17468 case GE
: return ARM_GE
;
17469 case GT
: return ARM_GT
;
17470 case LE
: return ARM_LE
;
17471 case LT
: return ARM_LT
;
17472 case GEU
: return ARM_CS
;
17473 case GTU
: return ARM_HI
;
17474 case LEU
: return ARM_LS
;
17475 case LTU
: return ARM_CC
;
17476 default: gcc_unreachable ();
17479 default: gcc_unreachable ();
17483 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17486 thumb2_final_prescan_insn (rtx insn
)
17488 rtx first_insn
= insn
;
17489 rtx body
= PATTERN (insn
);
17491 enum arm_cond_code code
;
17495 /* Remove the previous insn from the count of insns to be output. */
17496 if (arm_condexec_count
)
17497 arm_condexec_count
--;
17499 /* Nothing to do if we are already inside a conditional block. */
17500 if (arm_condexec_count
)
17503 if (GET_CODE (body
) != COND_EXEC
)
17506 /* Conditional jumps are implemented directly. */
17507 if (GET_CODE (insn
) == JUMP_INSN
)
17510 predicate
= COND_EXEC_TEST (body
);
17511 arm_current_cc
= get_arm_condition_code (predicate
);
17513 n
= get_attr_ce_count (insn
);
17514 arm_condexec_count
= 1;
17515 arm_condexec_mask
= (1 << n
) - 1;
17516 arm_condexec_masklen
= n
;
17517 /* See if subsequent instructions can be combined into the same block. */
17520 insn
= next_nonnote_insn (insn
);
17522 /* Jumping into the middle of an IT block is illegal, so a label or
17523 barrier terminates the block. */
17524 if (GET_CODE (insn
) != INSN
&& GET_CODE(insn
) != JUMP_INSN
)
17527 body
= PATTERN (insn
);
17528 /* USE and CLOBBER aren't really insns, so just skip them. */
17529 if (GET_CODE (body
) == USE
17530 || GET_CODE (body
) == CLOBBER
)
17533 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17534 if (GET_CODE (body
) != COND_EXEC
)
17536 /* Allow up to 4 conditionally executed instructions in a block. */
17537 n
= get_attr_ce_count (insn
);
17538 if (arm_condexec_masklen
+ n
> 4)
17541 predicate
= COND_EXEC_TEST (body
);
17542 code
= get_arm_condition_code (predicate
);
17543 mask
= (1 << n
) - 1;
17544 if (arm_current_cc
== code
)
17545 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
17546 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
17549 arm_condexec_count
++;
17550 arm_condexec_masklen
+= n
;
17552 /* A jump must be the last instruction in a conditional block. */
17553 if (GET_CODE(insn
) == JUMP_INSN
)
17556 /* Restore recog_data (getting the attributes of other insns can
17557 destroy this array, but final.c assumes that it remains intact
17558 across this call). */
17559 extract_constrain_insn_cached (first_insn
);
17563 arm_final_prescan_insn (rtx insn
)
17565 /* BODY will hold the body of INSN. */
17566 rtx body
= PATTERN (insn
);
17568 /* This will be 1 if trying to repeat the trick, and things need to be
17569 reversed if it appears to fail. */
17572 /* If we start with a return insn, we only succeed if we find another one. */
17573 int seeking_return
= 0;
17575 /* START_INSN will hold the insn from where we start looking. This is the
17576 first insn after the following code_label if REVERSE is true. */
17577 rtx start_insn
= insn
;
17579 /* If in state 4, check if the target branch is reached, in order to
17580 change back to state 0. */
17581 if (arm_ccfsm_state
== 4)
17583 if (insn
== arm_target_insn
)
17585 arm_target_insn
= NULL
;
17586 arm_ccfsm_state
= 0;
17591 /* If in state 3, it is possible to repeat the trick, if this insn is an
17592 unconditional branch to a label, and immediately following this branch
17593 is the previous target label which is only used once, and the label this
17594 branch jumps to is not too far off. */
17595 if (arm_ccfsm_state
== 3)
17597 if (simplejump_p (insn
))
17599 start_insn
= next_nonnote_insn (start_insn
);
17600 if (GET_CODE (start_insn
) == BARRIER
)
17602 /* XXX Isn't this always a barrier? */
17603 start_insn
= next_nonnote_insn (start_insn
);
17605 if (GET_CODE (start_insn
) == CODE_LABEL
17606 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
17607 && LABEL_NUSES (start_insn
) == 1)
17612 else if (GET_CODE (body
) == RETURN
)
17614 start_insn
= next_nonnote_insn (start_insn
);
17615 if (GET_CODE (start_insn
) == BARRIER
)
17616 start_insn
= next_nonnote_insn (start_insn
);
17617 if (GET_CODE (start_insn
) == CODE_LABEL
17618 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
17619 && LABEL_NUSES (start_insn
) == 1)
17622 seeking_return
= 1;
17631 gcc_assert (!arm_ccfsm_state
|| reverse
);
17632 if (GET_CODE (insn
) != JUMP_INSN
)
17635 /* This jump might be paralleled with a clobber of the condition codes
17636 the jump should always come first */
17637 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
17638 body
= XVECEXP (body
, 0, 0);
17641 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
17642 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
17645 int fail
= FALSE
, succeed
= FALSE
;
17646 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17647 int then_not_else
= TRUE
;
17648 rtx this_insn
= start_insn
, label
= 0;
17650 /* Register the insn jumped to. */
17653 if (!seeking_return
)
17654 label
= XEXP (SET_SRC (body
), 0);
17656 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
17657 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
17658 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
17660 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
17661 then_not_else
= FALSE
;
17663 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == RETURN
)
17664 seeking_return
= 1;
17665 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == RETURN
)
17667 seeking_return
= 1;
17668 then_not_else
= FALSE
;
17671 gcc_unreachable ();
17673 /* See how many insns this branch skips, and what kind of insns. If all
17674 insns are okay, and the label or unconditional branch to the same
17675 label is not too far away, succeed. */
17676 for (insns_skipped
= 0;
17677 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
17681 this_insn
= next_nonnote_insn (this_insn
);
17685 switch (GET_CODE (this_insn
))
17688 /* Succeed if it is the target label, otherwise fail since
17689 control falls in from somewhere else. */
17690 if (this_insn
== label
)
17692 arm_ccfsm_state
= 1;
17700 /* Succeed if the following insn is the target label.
17702 If return insns are used then the last insn in a function
17703 will be a barrier. */
17704 this_insn
= next_nonnote_insn (this_insn
);
17705 if (this_insn
&& this_insn
== label
)
17707 arm_ccfsm_state
= 1;
17715 /* The AAPCS says that conditional calls should not be
17716 used since they make interworking inefficient (the
17717 linker can't transform BL<cond> into BLX). That's
17718 only a problem if the machine has BLX. */
17725 /* Succeed if the following insn is the target label, or
17726 if the following two insns are a barrier and the
17728 this_insn
= next_nonnote_insn (this_insn
);
17729 if (this_insn
&& GET_CODE (this_insn
) == BARRIER
)
17730 this_insn
= next_nonnote_insn (this_insn
);
17732 if (this_insn
&& this_insn
== label
17733 && insns_skipped
< max_insns_skipped
)
17735 arm_ccfsm_state
= 1;
17743 /* If this is an unconditional branch to the same label, succeed.
17744 If it is to another label, do nothing. If it is conditional,
17746 /* XXX Probably, the tests for SET and the PC are
17749 scanbody
= PATTERN (this_insn
);
17750 if (GET_CODE (scanbody
) == SET
17751 && GET_CODE (SET_DEST (scanbody
)) == PC
)
17753 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
17754 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
17756 arm_ccfsm_state
= 2;
17759 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
17762 /* Fail if a conditional return is undesirable (e.g. on a
17763 StrongARM), but still allow this if optimizing for size. */
17764 else if (GET_CODE (scanbody
) == RETURN
17765 && !use_return_insn (TRUE
, NULL
)
17768 else if (GET_CODE (scanbody
) == RETURN
17771 arm_ccfsm_state
= 2;
17774 else if (GET_CODE (scanbody
) == PARALLEL
)
17776 switch (get_attr_conds (this_insn
))
17786 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
17791 /* Instructions using or affecting the condition codes make it
17793 scanbody
= PATTERN (this_insn
);
17794 if (!(GET_CODE (scanbody
) == SET
17795 || GET_CODE (scanbody
) == PARALLEL
)
17796 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
17799 /* A conditional cirrus instruction must be followed by
17800 a non Cirrus instruction. However, since we
17801 conditionalize instructions in this function and by
17802 the time we get here we can't add instructions
17803 (nops), because shorten_branches() has already been
17804 called, we will disable conditionalizing Cirrus
17805 instructions to be safe. */
17806 if (GET_CODE (scanbody
) != USE
17807 && GET_CODE (scanbody
) != CLOBBER
17808 && get_attr_cirrus (this_insn
) != CIRRUS_NOT
)
17818 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
17819 arm_target_label
= CODE_LABEL_NUMBER (label
);
17822 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
17824 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
17826 this_insn
= next_nonnote_insn (this_insn
);
17827 gcc_assert (!this_insn
17828 || (GET_CODE (this_insn
) != BARRIER
17829 && GET_CODE (this_insn
) != CODE_LABEL
));
17833 /* Oh, dear! we ran off the end.. give up. */
17834 extract_constrain_insn_cached (insn
);
17835 arm_ccfsm_state
= 0;
17836 arm_target_insn
= NULL
;
17839 arm_target_insn
= this_insn
;
17842 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17845 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
17847 if (reverse
|| then_not_else
)
17848 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
17851 /* Restore recog_data (getting the attributes of other insns can
17852 destroy this array, but final.c assumes that it remains intact
17853 across this call. */
17854 extract_constrain_insn_cached (insn
);
17858 /* Output IT instructions. */
17860 thumb2_asm_output_opcode (FILE * stream
)
17865 if (arm_condexec_mask
)
17867 for (n
= 0; n
< arm_condexec_masklen
; n
++)
17868 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
17870 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
17871 arm_condition_codes
[arm_current_cc
]);
17872 arm_condexec_mask
= 0;
17876 /* Returns true if REGNO is a valid register
17877 for holding a quantity of type MODE. */
17879 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
17881 if (GET_MODE_CLASS (mode
) == MODE_CC
)
17882 return (regno
== CC_REGNUM
17883 || (TARGET_HARD_FLOAT
&& TARGET_VFP
17884 && regno
== VFPCC_REGNUM
));
17887 /* For the Thumb we only allow values bigger than SImode in
17888 registers 0 - 6, so that there is always a second low
17889 register available to hold the upper part of the value.
17890 We probably we ought to ensure that the register is the
17891 start of an even numbered register pair. */
17892 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
17894 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
17895 && IS_CIRRUS_REGNUM (regno
))
17896 /* We have outlawed SI values in Cirrus registers because they
17897 reside in the lower 32 bits, but SF values reside in the
17898 upper 32 bits. This causes gcc all sorts of grief. We can't
17899 even split the registers into pairs because Cirrus SI values
17900 get sign extended to 64bits-- aldyh. */
17901 return (GET_MODE_CLASS (mode
) == MODE_FLOAT
) || (mode
== DImode
);
17903 if (TARGET_HARD_FLOAT
&& TARGET_VFP
17904 && IS_VFP_REGNUM (regno
))
17906 if (mode
== SFmode
|| mode
== SImode
)
17907 return VFP_REGNO_OK_FOR_SINGLE (regno
);
17909 if (mode
== DFmode
)
17910 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
17912 /* VFP registers can hold HFmode values, but there is no point in
17913 putting them there unless we have hardware conversion insns. */
17914 if (mode
== HFmode
)
17915 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
17918 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
17919 || (VALID_NEON_QREG_MODE (mode
)
17920 && NEON_REGNO_OK_FOR_QUAD (regno
))
17921 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
17922 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
17923 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
17924 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
17925 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
17930 if (TARGET_REALLY_IWMMXT
)
17932 if (IS_IWMMXT_GR_REGNUM (regno
))
17933 return mode
== SImode
;
17935 if (IS_IWMMXT_REGNUM (regno
))
17936 return VALID_IWMMXT_REG_MODE (mode
);
17939 /* We allow almost any value to be stored in the general registers.
17940 Restrict doubleword quantities to even register pairs so that we can
17941 use ldrd. Do not allow very large Neon structure opaque modes in
17942 general registers; they would use too many. */
17943 if (regno
<= LAST_ARM_REGNUM
)
17944 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
17945 && ARM_NUM_REGS (mode
) <= 4;
17947 if (regno
== FRAME_POINTER_REGNUM
17948 || regno
== ARG_POINTER_REGNUM
)
17949 /* We only allow integers in the fake hard registers. */
17950 return GET_MODE_CLASS (mode
) == MODE_INT
;
17952 /* The only registers left are the FPA registers
17953 which we only allow to hold FP values. */
17954 return (TARGET_HARD_FLOAT
&& TARGET_FPA
17955 && GET_MODE_CLASS (mode
) == MODE_FLOAT
17956 && regno
>= FIRST_FPA_REGNUM
17957 && regno
<= LAST_FPA_REGNUM
);
17960 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17961 not used in arm mode. */
17964 arm_regno_class (int regno
)
17968 if (regno
== STACK_POINTER_REGNUM
)
17970 if (regno
== CC_REGNUM
)
17977 if (TARGET_THUMB2
&& regno
< 8)
17980 if ( regno
<= LAST_ARM_REGNUM
17981 || regno
== FRAME_POINTER_REGNUM
17982 || regno
== ARG_POINTER_REGNUM
)
17983 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
17985 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
17986 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
17988 if (IS_CIRRUS_REGNUM (regno
))
17989 return CIRRUS_REGS
;
17991 if (IS_VFP_REGNUM (regno
))
17993 if (regno
<= D7_VFP_REGNUM
)
17994 return VFP_D0_D7_REGS
;
17995 else if (regno
<= LAST_LO_VFP_REGNUM
)
17996 return VFP_LO_REGS
;
17998 return VFP_HI_REGS
;
18001 if (IS_IWMMXT_REGNUM (regno
))
18002 return IWMMXT_REGS
;
18004 if (IS_IWMMXT_GR_REGNUM (regno
))
18005 return IWMMXT_GR_REGS
;
18010 /* Handle a special case when computing the offset
18011 of an argument from the frame pointer. */
18013 arm_debugger_arg_offset (int value
, rtx addr
)
18017 /* We are only interested if dbxout_parms() failed to compute the offset. */
18021 /* We can only cope with the case where the address is held in a register. */
18022 if (GET_CODE (addr
) != REG
)
18025 /* If we are using the frame pointer to point at the argument, then
18026 an offset of 0 is correct. */
18027 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
18030 /* If we are using the stack pointer to point at the
18031 argument, then an offset of 0 is correct. */
18032 /* ??? Check this is consistent with thumb2 frame layout. */
18033 if ((TARGET_THUMB
|| !frame_pointer_needed
)
18034 && REGNO (addr
) == SP_REGNUM
)
18037 /* Oh dear. The argument is pointed to by a register rather
18038 than being held in a register, or being stored at a known
18039 offset from the frame pointer. Since GDB only understands
18040 those two kinds of argument we must translate the address
18041 held in the register into an offset from the frame pointer.
18042 We do this by searching through the insns for the function
18043 looking to see where this register gets its value. If the
18044 register is initialized from the frame pointer plus an offset
18045 then we are in luck and we can continue, otherwise we give up.
18047 This code is exercised by producing debugging information
18048 for a function with arguments like this:
18050 double func (double a, double b, int c, double d) {return d;}
18052 Without this code the stab for parameter 'd' will be set to
18053 an offset of 0 from the frame pointer, rather than 8. */
18055 /* The if() statement says:
18057 If the insn is a normal instruction
18058 and if the insn is setting the value in a register
18059 and if the register being set is the register holding the address of the argument
18060 and if the address is computing by an addition
18061 that involves adding to a register
18062 which is the frame pointer
18067 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
18069 if ( GET_CODE (insn
) == INSN
18070 && GET_CODE (PATTERN (insn
)) == SET
18071 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
18072 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
18073 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 0)) == REG
18074 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
18075 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 1)) == CONST_INT
18078 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
18087 warning (0, "unable to compute real location of stacked parameter");
18088 value
= 8; /* XXX magic hack */
18108 T_MAX
/* Size of enum. Keep last. */
18109 } neon_builtin_type_mode
;
18111 #define TYPE_MODE_BIT(X) (1 << (X))
18113 #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
18114 | TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
18115 | TYPE_MODE_BIT (T_DI))
18116 #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
18117 | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
18118 | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
18120 #define v8qi_UP T_V8QI
18121 #define v4hi_UP T_V4HI
18122 #define v2si_UP T_V2SI
18123 #define v2sf_UP T_V2SF
18125 #define v16qi_UP T_V16QI
18126 #define v8hi_UP T_V8HI
18127 #define v4si_UP T_V4SI
18128 #define v4sf_UP T_V4SF
18129 #define v2di_UP T_V2DI
18134 #define UP(X) X##_UP
18167 NEON_LOADSTRUCTLANE
,
18169 NEON_STORESTRUCTLANE
,
18178 const neon_itype itype
;
18179 const neon_builtin_type_mode mode
;
18180 const enum insn_code code
;
18181 unsigned int fcode
;
18182 } neon_builtin_datum
;
18184 #define CF(N,X) CODE_FOR_neon_##N##X
18186 #define VAR1(T, N, A) \
18187 {#N, NEON_##T, UP (A), CF (N, A), 0}
18188 #define VAR2(T, N, A, B) \
18190 {#N, NEON_##T, UP (B), CF (N, B), 0}
18191 #define VAR3(T, N, A, B, C) \
18192 VAR2 (T, N, A, B), \
18193 {#N, NEON_##T, UP (C), CF (N, C), 0}
18194 #define VAR4(T, N, A, B, C, D) \
18195 VAR3 (T, N, A, B, C), \
18196 {#N, NEON_##T, UP (D), CF (N, D), 0}
18197 #define VAR5(T, N, A, B, C, D, E) \
18198 VAR4 (T, N, A, B, C, D), \
18199 {#N, NEON_##T, UP (E), CF (N, E), 0}
18200 #define VAR6(T, N, A, B, C, D, E, F) \
18201 VAR5 (T, N, A, B, C, D, E), \
18202 {#N, NEON_##T, UP (F), CF (N, F), 0}
18203 #define VAR7(T, N, A, B, C, D, E, F, G) \
18204 VAR6 (T, N, A, B, C, D, E, F), \
18205 {#N, NEON_##T, UP (G), CF (N, G), 0}
18206 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18207 VAR7 (T, N, A, B, C, D, E, F, G), \
18208 {#N, NEON_##T, UP (H), CF (N, H), 0}
18209 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18210 VAR8 (T, N, A, B, C, D, E, F, G, H), \
18211 {#N, NEON_##T, UP (I), CF (N, I), 0}
18212 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18213 VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
18214 {#N, NEON_##T, UP (J), CF (N, J), 0}
18216 /* The mode entries in the following table correspond to the "key" type of the
18217 instruction variant, i.e. equivalent to that which would be specified after
18218 the assembler mnemonic, which usually refers to the last vector operand.
18219 (Signed/unsigned/polynomial types are not differentiated between though, and
18220 are all mapped onto the same mode for a given element size.) The modes
18221 listed per instruction should be the same as those defined for that
18222 instruction's pattern in neon.md. */
18224 static neon_builtin_datum neon_builtin_data
[] =
18226 VAR10 (BINOP
, vadd
,
18227 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18228 VAR3 (BINOP
, vaddl
, v8qi
, v4hi
, v2si
),
18229 VAR3 (BINOP
, vaddw
, v8qi
, v4hi
, v2si
),
18230 VAR6 (BINOP
, vhadd
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18231 VAR8 (BINOP
, vqadd
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18232 VAR3 (BINOP
, vaddhn
, v8hi
, v4si
, v2di
),
18233 VAR8 (BINOP
, vmul
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18234 VAR8 (TERNOP
, vmla
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18235 VAR3 (TERNOP
, vmlal
, v8qi
, v4hi
, v2si
),
18236 VAR8 (TERNOP
, vmls
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18237 VAR3 (TERNOP
, vmlsl
, v8qi
, v4hi
, v2si
),
18238 VAR4 (BINOP
, vqdmulh
, v4hi
, v2si
, v8hi
, v4si
),
18239 VAR2 (TERNOP
, vqdmlal
, v4hi
, v2si
),
18240 VAR2 (TERNOP
, vqdmlsl
, v4hi
, v2si
),
18241 VAR3 (BINOP
, vmull
, v8qi
, v4hi
, v2si
),
18242 VAR2 (SCALARMULL
, vmull_n
, v4hi
, v2si
),
18243 VAR2 (LANEMULL
, vmull_lane
, v4hi
, v2si
),
18244 VAR2 (SCALARMULL
, vqdmull_n
, v4hi
, v2si
),
18245 VAR2 (LANEMULL
, vqdmull_lane
, v4hi
, v2si
),
18246 VAR4 (SCALARMULH
, vqdmulh_n
, v4hi
, v2si
, v8hi
, v4si
),
18247 VAR4 (LANEMULH
, vqdmulh_lane
, v4hi
, v2si
, v8hi
, v4si
),
18248 VAR2 (BINOP
, vqdmull
, v4hi
, v2si
),
18249 VAR8 (BINOP
, vshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18250 VAR8 (BINOP
, vqshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18251 VAR8 (SHIFTIMM
, vshr_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18252 VAR3 (SHIFTIMM
, vshrn_n
, v8hi
, v4si
, v2di
),
18253 VAR3 (SHIFTIMM
, vqshrn_n
, v8hi
, v4si
, v2di
),
18254 VAR3 (SHIFTIMM
, vqshrun_n
, v8hi
, v4si
, v2di
),
18255 VAR8 (SHIFTIMM
, vshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18256 VAR8 (SHIFTIMM
, vqshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18257 VAR8 (SHIFTIMM
, vqshlu_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18258 VAR3 (SHIFTIMM
, vshll_n
, v8qi
, v4hi
, v2si
),
18259 VAR8 (SHIFTACC
, vsra_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18260 VAR10 (BINOP
, vsub
,
18261 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18262 VAR3 (BINOP
, vsubl
, v8qi
, v4hi
, v2si
),
18263 VAR3 (BINOP
, vsubw
, v8qi
, v4hi
, v2si
),
18264 VAR8 (BINOP
, vqsub
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18265 VAR6 (BINOP
, vhsub
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18266 VAR3 (BINOP
, vsubhn
, v8hi
, v4si
, v2di
),
18267 VAR8 (BINOP
, vceq
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18268 VAR8 (BINOP
, vcge
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18269 VAR8 (BINOP
, vcgt
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18270 VAR2 (BINOP
, vcage
, v2sf
, v4sf
),
18271 VAR2 (BINOP
, vcagt
, v2sf
, v4sf
),
18272 VAR6 (BINOP
, vtst
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18273 VAR8 (BINOP
, vabd
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18274 VAR3 (BINOP
, vabdl
, v8qi
, v4hi
, v2si
),
18275 VAR6 (TERNOP
, vaba
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18276 VAR3 (TERNOP
, vabal
, v8qi
, v4hi
, v2si
),
18277 VAR8 (BINOP
, vmax
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18278 VAR8 (BINOP
, vmin
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18279 VAR4 (BINOP
, vpadd
, v8qi
, v4hi
, v2si
, v2sf
),
18280 VAR6 (UNOP
, vpaddl
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18281 VAR6 (BINOP
, vpadal
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18282 VAR4 (BINOP
, vpmax
, v8qi
, v4hi
, v2si
, v2sf
),
18283 VAR4 (BINOP
, vpmin
, v8qi
, v4hi
, v2si
, v2sf
),
18284 VAR2 (BINOP
, vrecps
, v2sf
, v4sf
),
18285 VAR2 (BINOP
, vrsqrts
, v2sf
, v4sf
),
18286 VAR8 (SHIFTINSERT
, vsri_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18287 VAR8 (SHIFTINSERT
, vsli_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
),
18288 VAR8 (UNOP
, vabs
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18289 VAR6 (UNOP
, vqabs
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18290 VAR8 (UNOP
, vneg
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18291 VAR6 (UNOP
, vqneg
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18292 VAR6 (UNOP
, vcls
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18293 VAR6 (UNOP
, vclz
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18294 VAR2 (UNOP
, vcnt
, v8qi
, v16qi
),
18295 VAR4 (UNOP
, vrecpe
, v2si
, v2sf
, v4si
, v4sf
),
18296 VAR4 (UNOP
, vrsqrte
, v2si
, v2sf
, v4si
, v4sf
),
18297 VAR6 (UNOP
, vmvn
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
),
18298 /* FIXME: vget_lane supports more variants than this! */
18299 VAR10 (GETLANE
, vget_lane
,
18300 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18301 VAR10 (SETLANE
, vset_lane
,
18302 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18303 VAR5 (CREATE
, vcreate
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18304 VAR10 (DUP
, vdup_n
,
18305 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18306 VAR10 (DUPLANE
, vdup_lane
,
18307 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18308 VAR5 (COMBINE
, vcombine
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18309 VAR5 (SPLIT
, vget_high
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18310 VAR5 (SPLIT
, vget_low
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18311 VAR3 (UNOP
, vmovn
, v8hi
, v4si
, v2di
),
18312 VAR3 (UNOP
, vqmovn
, v8hi
, v4si
, v2di
),
18313 VAR3 (UNOP
, vqmovun
, v8hi
, v4si
, v2di
),
18314 VAR3 (UNOP
, vmovl
, v8qi
, v4hi
, v2si
),
18315 VAR6 (LANEMUL
, vmul_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18316 VAR6 (LANEMAC
, vmla_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18317 VAR2 (LANEMAC
, vmlal_lane
, v4hi
, v2si
),
18318 VAR2 (LANEMAC
, vqdmlal_lane
, v4hi
, v2si
),
18319 VAR6 (LANEMAC
, vmls_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18320 VAR2 (LANEMAC
, vmlsl_lane
, v4hi
, v2si
),
18321 VAR2 (LANEMAC
, vqdmlsl_lane
, v4hi
, v2si
),
18322 VAR6 (SCALARMUL
, vmul_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18323 VAR6 (SCALARMAC
, vmla_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18324 VAR2 (SCALARMAC
, vmlal_n
, v4hi
, v2si
),
18325 VAR2 (SCALARMAC
, vqdmlal_n
, v4hi
, v2si
),
18326 VAR6 (SCALARMAC
, vmls_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18327 VAR2 (SCALARMAC
, vmlsl_n
, v4hi
, v2si
),
18328 VAR2 (SCALARMAC
, vqdmlsl_n
, v4hi
, v2si
),
18329 VAR10 (BINOP
, vext
,
18330 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18331 VAR8 (UNOP
, vrev64
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18332 VAR4 (UNOP
, vrev32
, v8qi
, v4hi
, v16qi
, v8hi
),
18333 VAR2 (UNOP
, vrev16
, v8qi
, v16qi
),
18334 VAR4 (CONVERT
, vcvt
, v2si
, v2sf
, v4si
, v4sf
),
18335 VAR4 (FIXCONV
, vcvt_n
, v2si
, v2sf
, v4si
, v4sf
),
18336 VAR10 (SELECT
, vbsl
,
18337 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18338 VAR1 (VTBL
, vtbl1
, v8qi
),
18339 VAR1 (VTBL
, vtbl2
, v8qi
),
18340 VAR1 (VTBL
, vtbl3
, v8qi
),
18341 VAR1 (VTBL
, vtbl4
, v8qi
),
18342 VAR1 (VTBX
, vtbx1
, v8qi
),
18343 VAR1 (VTBX
, vtbx2
, v8qi
),
18344 VAR1 (VTBX
, vtbx3
, v8qi
),
18345 VAR1 (VTBX
, vtbx4
, v8qi
),
18346 VAR8 (RESULTPAIR
, vtrn
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18347 VAR8 (RESULTPAIR
, vzip
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18348 VAR8 (RESULTPAIR
, vuzp
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
),
18349 VAR5 (REINTERP
, vreinterpretv8qi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18350 VAR5 (REINTERP
, vreinterpretv4hi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18351 VAR5 (REINTERP
, vreinterpretv2si
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18352 VAR5 (REINTERP
, vreinterpretv2sf
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18353 VAR5 (REINTERP
, vreinterpretdi
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18354 VAR5 (REINTERP
, vreinterpretv16qi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18355 VAR5 (REINTERP
, vreinterpretv8hi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18356 VAR5 (REINTERP
, vreinterpretv4si
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18357 VAR5 (REINTERP
, vreinterpretv4sf
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18358 VAR5 (REINTERP
, vreinterpretv2di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18359 VAR10 (LOAD1
, vld1
,
18360 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18361 VAR10 (LOAD1LANE
, vld1_lane
,
18362 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18363 VAR10 (LOAD1
, vld1_dup
,
18364 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18365 VAR10 (STORE1
, vst1
,
18366 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18367 VAR10 (STORE1LANE
, vst1_lane
,
18368 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18370 vld2
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18371 VAR7 (LOADSTRUCTLANE
, vld2_lane
,
18372 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18373 VAR5 (LOADSTRUCT
, vld2_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18374 VAR9 (STORESTRUCT
, vst2
,
18375 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18376 VAR7 (STORESTRUCTLANE
, vst2_lane
,
18377 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18379 vld3
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18380 VAR7 (LOADSTRUCTLANE
, vld3_lane
,
18381 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18382 VAR5 (LOADSTRUCT
, vld3_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18383 VAR9 (STORESTRUCT
, vst3
,
18384 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18385 VAR7 (STORESTRUCTLANE
, vst3_lane
,
18386 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18387 VAR9 (LOADSTRUCT
, vld4
,
18388 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18389 VAR7 (LOADSTRUCTLANE
, vld4_lane
,
18390 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18391 VAR5 (LOADSTRUCT
, vld4_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
),
18392 VAR9 (STORESTRUCT
, vst4
,
18393 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
),
18394 VAR7 (STORESTRUCTLANE
, vst4_lane
,
18395 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
),
18396 VAR10 (LOGICBINOP
, vand
,
18397 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18398 VAR10 (LOGICBINOP
, vorr
,
18399 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18400 VAR10 (BINOP
, veor
,
18401 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18402 VAR10 (LOGICBINOP
, vbic
,
18403 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
),
18404 VAR10 (LOGICBINOP
, vorn
,
18405 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
)
18420 /* Neon defines builtins from ARM_BUILTIN_MAX upwards, though they don't have
18421 symbolic names defined here (which would require too much duplication).
18425 ARM_BUILTIN_GETWCX
,
18426 ARM_BUILTIN_SETWCX
,
18430 ARM_BUILTIN_WAVG2BR
,
18431 ARM_BUILTIN_WAVG2HR
,
18432 ARM_BUILTIN_WAVG2B
,
18433 ARM_BUILTIN_WAVG2H
,
18440 ARM_BUILTIN_WMACSZ
,
18442 ARM_BUILTIN_WMACUZ
,
18445 ARM_BUILTIN_WSADBZ
,
18447 ARM_BUILTIN_WSADHZ
,
18449 ARM_BUILTIN_WALIGN
,
18452 ARM_BUILTIN_TMIAPH
,
18453 ARM_BUILTIN_TMIABB
,
18454 ARM_BUILTIN_TMIABT
,
18455 ARM_BUILTIN_TMIATB
,
18456 ARM_BUILTIN_TMIATT
,
18458 ARM_BUILTIN_TMOVMSKB
,
18459 ARM_BUILTIN_TMOVMSKH
,
18460 ARM_BUILTIN_TMOVMSKW
,
18462 ARM_BUILTIN_TBCSTB
,
18463 ARM_BUILTIN_TBCSTH
,
18464 ARM_BUILTIN_TBCSTW
,
18466 ARM_BUILTIN_WMADDS
,
18467 ARM_BUILTIN_WMADDU
,
18469 ARM_BUILTIN_WPACKHSS
,
18470 ARM_BUILTIN_WPACKWSS
,
18471 ARM_BUILTIN_WPACKDSS
,
18472 ARM_BUILTIN_WPACKHUS
,
18473 ARM_BUILTIN_WPACKWUS
,
18474 ARM_BUILTIN_WPACKDUS
,
18479 ARM_BUILTIN_WADDSSB
,
18480 ARM_BUILTIN_WADDSSH
,
18481 ARM_BUILTIN_WADDSSW
,
18482 ARM_BUILTIN_WADDUSB
,
18483 ARM_BUILTIN_WADDUSH
,
18484 ARM_BUILTIN_WADDUSW
,
18488 ARM_BUILTIN_WSUBSSB
,
18489 ARM_BUILTIN_WSUBSSH
,
18490 ARM_BUILTIN_WSUBSSW
,
18491 ARM_BUILTIN_WSUBUSB
,
18492 ARM_BUILTIN_WSUBUSH
,
18493 ARM_BUILTIN_WSUBUSW
,
18500 ARM_BUILTIN_WCMPEQB
,
18501 ARM_BUILTIN_WCMPEQH
,
18502 ARM_BUILTIN_WCMPEQW
,
18503 ARM_BUILTIN_WCMPGTUB
,
18504 ARM_BUILTIN_WCMPGTUH
,
18505 ARM_BUILTIN_WCMPGTUW
,
18506 ARM_BUILTIN_WCMPGTSB
,
18507 ARM_BUILTIN_WCMPGTSH
,
18508 ARM_BUILTIN_WCMPGTSW
,
18510 ARM_BUILTIN_TEXTRMSB
,
18511 ARM_BUILTIN_TEXTRMSH
,
18512 ARM_BUILTIN_TEXTRMSW
,
18513 ARM_BUILTIN_TEXTRMUB
,
18514 ARM_BUILTIN_TEXTRMUH
,
18515 ARM_BUILTIN_TEXTRMUW
,
18516 ARM_BUILTIN_TINSRB
,
18517 ARM_BUILTIN_TINSRH
,
18518 ARM_BUILTIN_TINSRW
,
18520 ARM_BUILTIN_WMAXSW
,
18521 ARM_BUILTIN_WMAXSH
,
18522 ARM_BUILTIN_WMAXSB
,
18523 ARM_BUILTIN_WMAXUW
,
18524 ARM_BUILTIN_WMAXUH
,
18525 ARM_BUILTIN_WMAXUB
,
18526 ARM_BUILTIN_WMINSW
,
18527 ARM_BUILTIN_WMINSH
,
18528 ARM_BUILTIN_WMINSB
,
18529 ARM_BUILTIN_WMINUW
,
18530 ARM_BUILTIN_WMINUH
,
18531 ARM_BUILTIN_WMINUB
,
18533 ARM_BUILTIN_WMULUM
,
18534 ARM_BUILTIN_WMULSM
,
18535 ARM_BUILTIN_WMULUL
,
18537 ARM_BUILTIN_PSADBH
,
18538 ARM_BUILTIN_WSHUFH
,
18552 ARM_BUILTIN_WSLLHI
,
18553 ARM_BUILTIN_WSLLWI
,
18554 ARM_BUILTIN_WSLLDI
,
18555 ARM_BUILTIN_WSRAHI
,
18556 ARM_BUILTIN_WSRAWI
,
18557 ARM_BUILTIN_WSRADI
,
18558 ARM_BUILTIN_WSRLHI
,
18559 ARM_BUILTIN_WSRLWI
,
18560 ARM_BUILTIN_WSRLDI
,
18561 ARM_BUILTIN_WRORHI
,
18562 ARM_BUILTIN_WRORWI
,
18563 ARM_BUILTIN_WRORDI
,
18565 ARM_BUILTIN_WUNPCKIHB
,
18566 ARM_BUILTIN_WUNPCKIHH
,
18567 ARM_BUILTIN_WUNPCKIHW
,
18568 ARM_BUILTIN_WUNPCKILB
,
18569 ARM_BUILTIN_WUNPCKILH
,
18570 ARM_BUILTIN_WUNPCKILW
,
18572 ARM_BUILTIN_WUNPCKEHSB
,
18573 ARM_BUILTIN_WUNPCKEHSH
,
18574 ARM_BUILTIN_WUNPCKEHSW
,
18575 ARM_BUILTIN_WUNPCKEHUB
,
18576 ARM_BUILTIN_WUNPCKEHUH
,
18577 ARM_BUILTIN_WUNPCKEHUW
,
18578 ARM_BUILTIN_WUNPCKELSB
,
18579 ARM_BUILTIN_WUNPCKELSH
,
18580 ARM_BUILTIN_WUNPCKELSW
,
18581 ARM_BUILTIN_WUNPCKELUB
,
18582 ARM_BUILTIN_WUNPCKELUH
,
18583 ARM_BUILTIN_WUNPCKELUW
,
18585 ARM_BUILTIN_THREAD_POINTER
,
18587 ARM_BUILTIN_NEON_BASE
,
18589 ARM_BUILTIN_MAX
= ARM_BUILTIN_NEON_BASE
+ ARRAY_SIZE (neon_builtin_data
)
18592 static GTY(()) tree arm_builtin_decls
[ARM_BUILTIN_MAX
];
18595 arm_init_neon_builtins (void)
18597 unsigned int i
, fcode
;
18600 tree neon_intQI_type_node
;
18601 tree neon_intHI_type_node
;
18602 tree neon_polyQI_type_node
;
18603 tree neon_polyHI_type_node
;
18604 tree neon_intSI_type_node
;
18605 tree neon_intDI_type_node
;
18606 tree neon_float_type_node
;
18608 tree intQI_pointer_node
;
18609 tree intHI_pointer_node
;
18610 tree intSI_pointer_node
;
18611 tree intDI_pointer_node
;
18612 tree float_pointer_node
;
18614 tree const_intQI_node
;
18615 tree const_intHI_node
;
18616 tree const_intSI_node
;
18617 tree const_intDI_node
;
18618 tree const_float_node
;
18620 tree const_intQI_pointer_node
;
18621 tree const_intHI_pointer_node
;
18622 tree const_intSI_pointer_node
;
18623 tree const_intDI_pointer_node
;
18624 tree const_float_pointer_node
;
18626 tree V8QI_type_node
;
18627 tree V4HI_type_node
;
18628 tree V2SI_type_node
;
18629 tree V2SF_type_node
;
18630 tree V16QI_type_node
;
18631 tree V8HI_type_node
;
18632 tree V4SI_type_node
;
18633 tree V4SF_type_node
;
18634 tree V2DI_type_node
;
18636 tree intUQI_type_node
;
18637 tree intUHI_type_node
;
18638 tree intUSI_type_node
;
18639 tree intUDI_type_node
;
18641 tree intEI_type_node
;
18642 tree intOI_type_node
;
18643 tree intCI_type_node
;
18644 tree intXI_type_node
;
18646 tree V8QI_pointer_node
;
18647 tree V4HI_pointer_node
;
18648 tree V2SI_pointer_node
;
18649 tree V2SF_pointer_node
;
18650 tree V16QI_pointer_node
;
18651 tree V8HI_pointer_node
;
18652 tree V4SI_pointer_node
;
18653 tree V4SF_pointer_node
;
18654 tree V2DI_pointer_node
;
18656 tree void_ftype_pv8qi_v8qi_v8qi
;
18657 tree void_ftype_pv4hi_v4hi_v4hi
;
18658 tree void_ftype_pv2si_v2si_v2si
;
18659 tree void_ftype_pv2sf_v2sf_v2sf
;
18660 tree void_ftype_pdi_di_di
;
18661 tree void_ftype_pv16qi_v16qi_v16qi
;
18662 tree void_ftype_pv8hi_v8hi_v8hi
;
18663 tree void_ftype_pv4si_v4si_v4si
;
18664 tree void_ftype_pv4sf_v4sf_v4sf
;
18665 tree void_ftype_pv2di_v2di_v2di
;
18667 tree reinterp_ftype_dreg
[5][5];
18668 tree reinterp_ftype_qreg
[5][5];
18669 tree dreg_types
[5], qreg_types
[5];
18671 /* Create distinguished type nodes for NEON vector element types,
18672 and pointers to values of such types, so we can detect them later. */
18673 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
18674 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
18675 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
18676 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
18677 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
18678 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
18679 neon_float_type_node
= make_node (REAL_TYPE
);
18680 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
18681 layout_type (neon_float_type_node
);
18683 /* Define typedefs which exactly correspond to the modes we are basing vector
18684 types on. If you change these names you'll need to change
18685 the table used by arm_mangle_type too. */
18686 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
18687 "__builtin_neon_qi");
18688 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
18689 "__builtin_neon_hi");
18690 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
18691 "__builtin_neon_si");
18692 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
18693 "__builtin_neon_sf");
18694 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
18695 "__builtin_neon_di");
18696 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
18697 "__builtin_neon_poly8");
18698 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
18699 "__builtin_neon_poly16");
18701 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
18702 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
18703 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
18704 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
18705 float_pointer_node
= build_pointer_type (neon_float_type_node
);
18707 /* Next create constant-qualified versions of the above types. */
18708 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
18710 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
18712 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
18714 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
18716 const_float_node
= build_qualified_type (neon_float_type_node
,
18719 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
18720 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
18721 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
18722 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
18723 const_float_pointer_node
= build_pointer_type (const_float_node
);
18725 /* Now create vector types based on our NEON element types. */
18726 /* 64-bit vectors. */
18728 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
18730 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
18732 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
18734 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
18735 /* 128-bit vectors. */
18737 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
18739 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
18741 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
18743 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
18745 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
18747 /* Unsigned integer types for various mode sizes. */
18748 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
18749 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
18750 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
18751 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
18753 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
18754 "__builtin_neon_uqi");
18755 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
18756 "__builtin_neon_uhi");
18757 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
18758 "__builtin_neon_usi");
18759 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
18760 "__builtin_neon_udi");
18762 /* Opaque integer types for structures of vectors. */
18763 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
18764 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
18765 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
18766 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
18768 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
18769 "__builtin_neon_ti");
18770 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
18771 "__builtin_neon_ei");
18772 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
18773 "__builtin_neon_oi");
18774 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
18775 "__builtin_neon_ci");
18776 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
18777 "__builtin_neon_xi");
18779 /* Pointers to vector types. */
18780 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
18781 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
18782 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
18783 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
18784 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
18785 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
18786 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
18787 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
18788 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
18790 /* Operations which return results as pairs. */
18791 void_ftype_pv8qi_v8qi_v8qi
=
18792 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
18793 V8QI_type_node
, NULL
);
18794 void_ftype_pv4hi_v4hi_v4hi
=
18795 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
18796 V4HI_type_node
, NULL
);
18797 void_ftype_pv2si_v2si_v2si
=
18798 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
18799 V2SI_type_node
, NULL
);
18800 void_ftype_pv2sf_v2sf_v2sf
=
18801 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
18802 V2SF_type_node
, NULL
);
18803 void_ftype_pdi_di_di
=
18804 build_function_type_list (void_type_node
, intDI_pointer_node
,
18805 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
18806 void_ftype_pv16qi_v16qi_v16qi
=
18807 build_function_type_list (void_type_node
, V16QI_pointer_node
,
18808 V16QI_type_node
, V16QI_type_node
, NULL
);
18809 void_ftype_pv8hi_v8hi_v8hi
=
18810 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
18811 V8HI_type_node
, NULL
);
18812 void_ftype_pv4si_v4si_v4si
=
18813 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
18814 V4SI_type_node
, NULL
);
18815 void_ftype_pv4sf_v4sf_v4sf
=
18816 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
18817 V4SF_type_node
, NULL
);
18818 void_ftype_pv2di_v2di_v2di
=
18819 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
18820 V2DI_type_node
, NULL
);
18822 dreg_types
[0] = V8QI_type_node
;
18823 dreg_types
[1] = V4HI_type_node
;
18824 dreg_types
[2] = V2SI_type_node
;
18825 dreg_types
[3] = V2SF_type_node
;
18826 dreg_types
[4] = neon_intDI_type_node
;
18828 qreg_types
[0] = V16QI_type_node
;
18829 qreg_types
[1] = V8HI_type_node
;
18830 qreg_types
[2] = V4SI_type_node
;
18831 qreg_types
[3] = V4SF_type_node
;
18832 qreg_types
[4] = V2DI_type_node
;
18834 for (i
= 0; i
< 5; i
++)
18837 for (j
= 0; j
< 5; j
++)
18839 reinterp_ftype_dreg
[i
][j
]
18840 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
18841 reinterp_ftype_qreg
[i
][j
]
18842 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
18846 for (i
= 0, fcode
= ARM_BUILTIN_NEON_BASE
;
18847 i
< ARRAY_SIZE (neon_builtin_data
);
18850 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
18852 const char* const modenames
[] = {
18853 "v8qi", "v4hi", "v2si", "v2sf", "di",
18854 "v16qi", "v8hi", "v4si", "v4sf", "v2di",
18859 int is_load
= 0, is_store
= 0;
18861 gcc_assert (ARRAY_SIZE (modenames
) == T_MAX
);
18868 case NEON_LOAD1LANE
:
18869 case NEON_LOADSTRUCT
:
18870 case NEON_LOADSTRUCTLANE
:
18872 /* Fall through. */
18874 case NEON_STORE1LANE
:
18875 case NEON_STORESTRUCT
:
18876 case NEON_STORESTRUCTLANE
:
18879 /* Fall through. */
18882 case NEON_LOGICBINOP
:
18883 case NEON_SHIFTINSERT
:
18890 case NEON_SHIFTIMM
:
18891 case NEON_SHIFTACC
:
18897 case NEON_LANEMULL
:
18898 case NEON_LANEMULH
:
18900 case NEON_SCALARMUL
:
18901 case NEON_SCALARMULL
:
18902 case NEON_SCALARMULH
:
18903 case NEON_SCALARMAC
:
18909 tree return_type
= void_type_node
, args
= void_list_node
;
18911 /* Build a function type directly from the insn_data for
18912 this builtin. The build_function_type() function takes
18913 care of removing duplicates for us. */
18914 for (k
= insn_data
[d
->code
].n_generator_args
- 1; k
>= 0; k
--)
18918 if (is_load
&& k
== 1)
18920 /* Neon load patterns always have the memory
18921 operand in the operand 1 position. */
18922 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
18923 == neon_struct_operand
);
18929 eltype
= const_intQI_pointer_node
;
18934 eltype
= const_intHI_pointer_node
;
18939 eltype
= const_intSI_pointer_node
;
18944 eltype
= const_float_pointer_node
;
18949 eltype
= const_intDI_pointer_node
;
18952 default: gcc_unreachable ();
18955 else if (is_store
&& k
== 0)
18957 /* Similarly, Neon store patterns use operand 0 as
18958 the memory location to store to. */
18959 gcc_assert (insn_data
[d
->code
].operand
[k
].predicate
18960 == neon_struct_operand
);
18966 eltype
= intQI_pointer_node
;
18971 eltype
= intHI_pointer_node
;
18976 eltype
= intSI_pointer_node
;
18981 eltype
= float_pointer_node
;
18986 eltype
= intDI_pointer_node
;
18989 default: gcc_unreachable ();
18994 switch (insn_data
[d
->code
].operand
[k
].mode
)
18996 case VOIDmode
: eltype
= void_type_node
; break;
18998 case QImode
: eltype
= neon_intQI_type_node
; break;
18999 case HImode
: eltype
= neon_intHI_type_node
; break;
19000 case SImode
: eltype
= neon_intSI_type_node
; break;
19001 case SFmode
: eltype
= neon_float_type_node
; break;
19002 case DImode
: eltype
= neon_intDI_type_node
; break;
19003 case TImode
: eltype
= intTI_type_node
; break;
19004 case EImode
: eltype
= intEI_type_node
; break;
19005 case OImode
: eltype
= intOI_type_node
; break;
19006 case CImode
: eltype
= intCI_type_node
; break;
19007 case XImode
: eltype
= intXI_type_node
; break;
19008 /* 64-bit vectors. */
19009 case V8QImode
: eltype
= V8QI_type_node
; break;
19010 case V4HImode
: eltype
= V4HI_type_node
; break;
19011 case V2SImode
: eltype
= V2SI_type_node
; break;
19012 case V2SFmode
: eltype
= V2SF_type_node
; break;
19013 /* 128-bit vectors. */
19014 case V16QImode
: eltype
= V16QI_type_node
; break;
19015 case V8HImode
: eltype
= V8HI_type_node
; break;
19016 case V4SImode
: eltype
= V4SI_type_node
; break;
19017 case V4SFmode
: eltype
= V4SF_type_node
; break;
19018 case V2DImode
: eltype
= V2DI_type_node
; break;
19019 default: gcc_unreachable ();
19023 if (k
== 0 && !is_store
)
19024 return_type
= eltype
;
19026 args
= tree_cons (NULL_TREE
, eltype
, args
);
19029 ftype
= build_function_type (return_type
, args
);
19033 case NEON_RESULTPAIR
:
19035 switch (insn_data
[d
->code
].operand
[1].mode
)
19037 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
19038 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
19039 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
19040 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
19041 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
19042 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
19043 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
19044 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
19045 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
19046 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
19047 default: gcc_unreachable ();
19052 case NEON_REINTERP
:
19054 /* We iterate over 5 doubleword types, then 5 quadword
19056 int rhs
= d
->mode
% 5;
19057 switch (insn_data
[d
->code
].operand
[0].mode
)
19059 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
19060 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
19061 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
19062 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
19063 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
19064 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
19065 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
19066 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
19067 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
19068 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
19069 default: gcc_unreachable ();
19075 gcc_unreachable ();
19078 gcc_assert (ftype
!= NULL
);
19080 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[d
->mode
]);
19082 decl
= add_builtin_function (namebuf
, ftype
, fcode
, BUILT_IN_MD
, NULL
,
19084 arm_builtin_decls
[fcode
] = decl
;
19088 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
19091 if ((MASK) & insn_flags) \
19094 bdecl = add_builtin_function ((NAME), (TYPE), (CODE), \
19095 BUILT_IN_MD, NULL, NULL_TREE); \
19096 arm_builtin_decls[CODE] = bdecl; \
19101 struct builtin_description
19103 const unsigned int mask
;
19104 const enum insn_code icode
;
19105 const char * const name
;
19106 const enum arm_builtins code
;
19107 const enum rtx_code comparison
;
19108 const unsigned int flag
;
19111 static const struct builtin_description bdesc_2arg
[] =
19113 #define IWMMXT_BUILTIN(code, string, builtin) \
19114 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
19115 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19117 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
19118 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
19119 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
19120 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
19121 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
19122 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
19123 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
19124 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
19125 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
19126 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
19127 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
19128 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
19129 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
19130 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
19131 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
19132 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
19133 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
19134 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
19135 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
19136 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
19137 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
19138 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
19139 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
19140 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
19141 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
19142 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
19143 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
19144 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
19145 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
19146 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
19147 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
19148 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
19149 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
19150 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
19151 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
19152 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
19153 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
19154 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
19155 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
19156 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
19157 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
19158 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
19159 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
19160 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
19161 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
19162 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
19163 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
19164 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
19165 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
19166 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
19167 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
19168 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
19169 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
19170 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
19171 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
19172 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
19173 IWMMXT_BUILTIN (iwmmxt_wmadds
, "wmadds", WMADDS
)
19174 IWMMXT_BUILTIN (iwmmxt_wmaddu
, "wmaddu", WMADDU
)
19176 #define IWMMXT_BUILTIN2(code, builtin) \
19177 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
19179 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
19180 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
19181 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
19182 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
19183 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
19184 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
19185 IWMMXT_BUILTIN2 (ashlv4hi3_di
, WSLLH
)
19186 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt
, WSLLHI
)
19187 IWMMXT_BUILTIN2 (ashlv2si3_di
, WSLLW
)
19188 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt
, WSLLWI
)
19189 IWMMXT_BUILTIN2 (ashldi3_di
, WSLLD
)
19190 IWMMXT_BUILTIN2 (ashldi3_iwmmxt
, WSLLDI
)
19191 IWMMXT_BUILTIN2 (lshrv4hi3_di
, WSRLH
)
19192 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt
, WSRLHI
)
19193 IWMMXT_BUILTIN2 (lshrv2si3_di
, WSRLW
)
19194 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt
, WSRLWI
)
19195 IWMMXT_BUILTIN2 (lshrdi3_di
, WSRLD
)
19196 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt
, WSRLDI
)
19197 IWMMXT_BUILTIN2 (ashrv4hi3_di
, WSRAH
)
19198 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt
, WSRAHI
)
19199 IWMMXT_BUILTIN2 (ashrv2si3_di
, WSRAW
)
19200 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt
, WSRAWI
)
19201 IWMMXT_BUILTIN2 (ashrdi3_di
, WSRAD
)
19202 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt
, WSRADI
)
19203 IWMMXT_BUILTIN2 (rorv4hi3_di
, WRORH
)
19204 IWMMXT_BUILTIN2 (rorv4hi3
, WRORHI
)
19205 IWMMXT_BUILTIN2 (rorv2si3_di
, WRORW
)
19206 IWMMXT_BUILTIN2 (rorv2si3
, WRORWI
)
19207 IWMMXT_BUILTIN2 (rordi3_di
, WRORD
)
19208 IWMMXT_BUILTIN2 (rordi3
, WRORDI
)
19209 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
19210 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
19213 static const struct builtin_description bdesc_1arg
[] =
19215 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
19216 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
19217 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
19218 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
19219 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
19220 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
19221 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
19222 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
19223 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
19224 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
19225 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
19226 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
19227 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
19228 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
19229 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
19230 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
19231 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
19232 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
19235 /* Set up all the iWMMXt builtins. This is not called if
19236 TARGET_IWMMXT is zero. */
19239 arm_init_iwmmxt_builtins (void)
19241 const struct builtin_description
* d
;
19244 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
19245 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
19246 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
19249 = build_function_type_list (integer_type_node
,
19250 integer_type_node
, NULL_TREE
);
19251 tree v8qi_ftype_v8qi_v8qi_int
19252 = build_function_type_list (V8QI_type_node
,
19253 V8QI_type_node
, V8QI_type_node
,
19254 integer_type_node
, NULL_TREE
);
19255 tree v4hi_ftype_v4hi_int
19256 = build_function_type_list (V4HI_type_node
,
19257 V4HI_type_node
, integer_type_node
, NULL_TREE
);
19258 tree v2si_ftype_v2si_int
19259 = build_function_type_list (V2SI_type_node
,
19260 V2SI_type_node
, integer_type_node
, NULL_TREE
);
19261 tree v2si_ftype_di_di
19262 = build_function_type_list (V2SI_type_node
,
19263 long_long_integer_type_node
,
19264 long_long_integer_type_node
,
19266 tree di_ftype_di_int
19267 = build_function_type_list (long_long_integer_type_node
,
19268 long_long_integer_type_node
,
19269 integer_type_node
, NULL_TREE
);
19270 tree di_ftype_di_int_int
19271 = build_function_type_list (long_long_integer_type_node
,
19272 long_long_integer_type_node
,
19274 integer_type_node
, NULL_TREE
);
19275 tree int_ftype_v8qi
19276 = build_function_type_list (integer_type_node
,
19277 V8QI_type_node
, NULL_TREE
);
19278 tree int_ftype_v4hi
19279 = build_function_type_list (integer_type_node
,
19280 V4HI_type_node
, NULL_TREE
);
19281 tree int_ftype_v2si
19282 = build_function_type_list (integer_type_node
,
19283 V2SI_type_node
, NULL_TREE
);
19284 tree int_ftype_v8qi_int
19285 = build_function_type_list (integer_type_node
,
19286 V8QI_type_node
, integer_type_node
, NULL_TREE
);
19287 tree int_ftype_v4hi_int
19288 = build_function_type_list (integer_type_node
,
19289 V4HI_type_node
, integer_type_node
, NULL_TREE
);
19290 tree int_ftype_v2si_int
19291 = build_function_type_list (integer_type_node
,
19292 V2SI_type_node
, integer_type_node
, NULL_TREE
);
19293 tree v8qi_ftype_v8qi_int_int
19294 = build_function_type_list (V8QI_type_node
,
19295 V8QI_type_node
, integer_type_node
,
19296 integer_type_node
, NULL_TREE
);
19297 tree v4hi_ftype_v4hi_int_int
19298 = build_function_type_list (V4HI_type_node
,
19299 V4HI_type_node
, integer_type_node
,
19300 integer_type_node
, NULL_TREE
);
19301 tree v2si_ftype_v2si_int_int
19302 = build_function_type_list (V2SI_type_node
,
19303 V2SI_type_node
, integer_type_node
,
19304 integer_type_node
, NULL_TREE
);
19305 /* Miscellaneous. */
19306 tree v8qi_ftype_v4hi_v4hi
19307 = build_function_type_list (V8QI_type_node
,
19308 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
19309 tree v4hi_ftype_v2si_v2si
19310 = build_function_type_list (V4HI_type_node
,
19311 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
19312 tree v2si_ftype_v4hi_v4hi
19313 = build_function_type_list (V2SI_type_node
,
19314 V4HI_type_node
, V4HI_type_node
, NULL_TREE
);
19315 tree v2si_ftype_v8qi_v8qi
19316 = build_function_type_list (V2SI_type_node
,
19317 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
19318 tree v4hi_ftype_v4hi_di
19319 = build_function_type_list (V4HI_type_node
,
19320 V4HI_type_node
, long_long_integer_type_node
,
19322 tree v2si_ftype_v2si_di
19323 = build_function_type_list (V2SI_type_node
,
19324 V2SI_type_node
, long_long_integer_type_node
,
19326 tree void_ftype_int_int
19327 = build_function_type_list (void_type_node
,
19328 integer_type_node
, integer_type_node
,
19331 = build_function_type_list (long_long_unsigned_type_node
, NULL_TREE
);
19333 = build_function_type_list (long_long_integer_type_node
,
19334 V8QI_type_node
, NULL_TREE
);
19336 = build_function_type_list (long_long_integer_type_node
,
19337 V4HI_type_node
, NULL_TREE
);
19339 = build_function_type_list (long_long_integer_type_node
,
19340 V2SI_type_node
, NULL_TREE
);
19341 tree v2si_ftype_v4hi
19342 = build_function_type_list (V2SI_type_node
,
19343 V4HI_type_node
, NULL_TREE
);
19344 tree v4hi_ftype_v8qi
19345 = build_function_type_list (V4HI_type_node
,
19346 V8QI_type_node
, NULL_TREE
);
19348 tree di_ftype_di_v4hi_v4hi
19349 = build_function_type_list (long_long_unsigned_type_node
,
19350 long_long_unsigned_type_node
,
19351 V4HI_type_node
, V4HI_type_node
,
19354 tree di_ftype_v4hi_v4hi
19355 = build_function_type_list (long_long_unsigned_type_node
,
19356 V4HI_type_node
,V4HI_type_node
,
19359 /* Normal vector binops. */
19360 tree v8qi_ftype_v8qi_v8qi
19361 = build_function_type_list (V8QI_type_node
,
19362 V8QI_type_node
, V8QI_type_node
, NULL_TREE
);
19363 tree v4hi_ftype_v4hi_v4hi
19364 = build_function_type_list (V4HI_type_node
,
19365 V4HI_type_node
,V4HI_type_node
, NULL_TREE
);
19366 tree v2si_ftype_v2si_v2si
19367 = build_function_type_list (V2SI_type_node
,
19368 V2SI_type_node
, V2SI_type_node
, NULL_TREE
);
19369 tree di_ftype_di_di
19370 = build_function_type_list (long_long_unsigned_type_node
,
19371 long_long_unsigned_type_node
,
19372 long_long_unsigned_type_node
,
19375 /* Add all builtins that are more or less simple operations on two
19377 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19379 /* Use one of the operands; the target can have a different mode for
19380 mask-generating compares. */
19381 enum machine_mode mode
;
19387 mode
= insn_data
[d
->icode
].operand
[1].mode
;
19392 type
= v8qi_ftype_v8qi_v8qi
;
19395 type
= v4hi_ftype_v4hi_v4hi
;
19398 type
= v2si_ftype_v2si_v2si
;
19401 type
= di_ftype_di_di
;
19405 gcc_unreachable ();
19408 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
19411 /* Add the remaining MMX insns with somewhat more complicated types. */
19412 #define iwmmx_mbuiltin(NAME, TYPE, CODE) \
19413 def_mbuiltin (FL_IWMMXT, "__builtin_arm_" NAME, (TYPE), \
19414 ARM_BUILTIN_ ## CODE)
19416 iwmmx_mbuiltin ("wzero", di_ftype_void
, WZERO
);
19417 iwmmx_mbuiltin ("setwcx", void_ftype_int_int
, SETWCX
);
19418 iwmmx_mbuiltin ("getwcx", int_ftype_int
, GETWCX
);
19420 iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di
, WSLLH
);
19421 iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di
, WSLLW
);
19422 iwmmx_mbuiltin ("wslld", di_ftype_di_di
, WSLLD
);
19423 iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int
, WSLLHI
);
19424 iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int
, WSLLWI
);
19425 iwmmx_mbuiltin ("wslldi", di_ftype_di_int
, WSLLDI
);
19427 iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di
, WSRLH
);
19428 iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di
, WSRLW
);
19429 iwmmx_mbuiltin ("wsrld", di_ftype_di_di
, WSRLD
);
19430 iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int
, WSRLHI
);
19431 iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int
, WSRLWI
);
19432 iwmmx_mbuiltin ("wsrldi", di_ftype_di_int
, WSRLDI
);
19434 iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di
, WSRAH
);
19435 iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di
, WSRAW
);
19436 iwmmx_mbuiltin ("wsrad", di_ftype_di_di
, WSRAD
);
19437 iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int
, WSRAHI
);
19438 iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int
, WSRAWI
);
19439 iwmmx_mbuiltin ("wsradi", di_ftype_di_int
, WSRADI
);
19441 iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di
, WRORH
);
19442 iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di
, WRORW
);
19443 iwmmx_mbuiltin ("wrord", di_ftype_di_di
, WRORD
);
19444 iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int
, WRORHI
);
19445 iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int
, WRORWI
);
19446 iwmmx_mbuiltin ("wrordi", di_ftype_di_int
, WRORDI
);
19448 iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int
, WSHUFH
);
19450 iwmmx_mbuiltin ("wsadb", v2si_ftype_v8qi_v8qi
, WSADB
);
19451 iwmmx_mbuiltin ("wsadh", v2si_ftype_v4hi_v4hi
, WSADH
);
19452 iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi
, WSADBZ
);
19453 iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi
, WSADHZ
);
19455 iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int
, TEXTRMSB
);
19456 iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int
, TEXTRMSH
);
19457 iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int
, TEXTRMSW
);
19458 iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int
, TEXTRMUB
);
19459 iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int
, TEXTRMUH
);
19460 iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int
, TEXTRMUW
);
19461 iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int
, TINSRB
);
19462 iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int
, TINSRH
);
19463 iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int
, TINSRW
);
19465 iwmmx_mbuiltin ("waccb", di_ftype_v8qi
, WACCB
);
19466 iwmmx_mbuiltin ("wacch", di_ftype_v4hi
, WACCH
);
19467 iwmmx_mbuiltin ("waccw", di_ftype_v2si
, WACCW
);
19469 iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi
, TMOVMSKB
);
19470 iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi
, TMOVMSKH
);
19471 iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si
, TMOVMSKW
);
19473 iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi
, WPACKHSS
);
19474 iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi
, WPACKHUS
);
19475 iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si
, WPACKWUS
);
19476 iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si
, WPACKWSS
);
19477 iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di
, WPACKDUS
);
19478 iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di
, WPACKDSS
);
19480 iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi
, WUNPCKEHUB
);
19481 iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi
, WUNPCKEHUH
);
19482 iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si
, WUNPCKEHUW
);
19483 iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi
, WUNPCKEHSB
);
19484 iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi
, WUNPCKEHSH
);
19485 iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si
, WUNPCKEHSW
);
19486 iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi
, WUNPCKELUB
);
19487 iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi
, WUNPCKELUH
);
19488 iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si
, WUNPCKELUW
);
19489 iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi
, WUNPCKELSB
);
19490 iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi
, WUNPCKELSH
);
19491 iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si
, WUNPCKELSW
);
19493 iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi
, WMACS
);
19494 iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi
, WMACSZ
);
19495 iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi
, WMACU
);
19496 iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi
, WMACUZ
);
19498 iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int
, WALIGN
);
19499 iwmmx_mbuiltin ("tmia", di_ftype_di_int_int
, TMIA
);
19500 iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int
, TMIAPH
);
19501 iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int
, TMIABB
);
19502 iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int
, TMIABT
);
19503 iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int
, TMIATB
);
19504 iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int
, TMIATT
);
19506 #undef iwmmx_mbuiltin
19510 arm_init_tls_builtins (void)
19514 ftype
= build_function_type (ptr_type_node
, void_list_node
);
19515 decl
= add_builtin_function ("__builtin_thread_pointer", ftype
,
19516 ARM_BUILTIN_THREAD_POINTER
, BUILT_IN_MD
,
19518 TREE_NOTHROW (decl
) = 1;
19519 TREE_READONLY (decl
) = 1;
19520 arm_builtin_decls
[ARM_BUILTIN_THREAD_POINTER
] = decl
;
19524 arm_init_fp16_builtins (void)
19526 tree fp16_type
= make_node (REAL_TYPE
);
19527 TYPE_PRECISION (fp16_type
) = 16;
19528 layout_type (fp16_type
);
19529 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
19533 arm_init_builtins (void)
19535 arm_init_tls_builtins ();
19537 if (TARGET_REALLY_IWMMXT
)
19538 arm_init_iwmmxt_builtins ();
19541 arm_init_neon_builtins ();
19543 if (arm_fp16_format
)
19544 arm_init_fp16_builtins ();
19547 /* Return the ARM builtin for CODE. */
19550 arm_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
19552 if (code
>= ARM_BUILTIN_MAX
)
19553 return error_mark_node
;
19555 return arm_builtin_decls
[code
];
19558 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19560 static const char *
19561 arm_invalid_parameter_type (const_tree t
)
19563 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19564 return N_("function parameters cannot have __fp16 type");
19568 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19570 static const char *
19571 arm_invalid_return_type (const_tree t
)
19573 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19574 return N_("functions cannot return __fp16 type");
19578 /* Implement TARGET_PROMOTED_TYPE. */
19581 arm_promoted_type (const_tree t
)
19583 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19584 return float_type_node
;
19588 /* Implement TARGET_CONVERT_TO_TYPE.
19589 Specifically, this hook implements the peculiarity of the ARM
19590 half-precision floating-point C semantics that requires conversions between
19591 __fp16 to or from double to do an intermediate conversion to float. */
19594 arm_convert_to_type (tree type
, tree expr
)
19596 tree fromtype
= TREE_TYPE (expr
);
19597 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
19599 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
19600 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
19601 return convert (type
, convert (float_type_node
, expr
));
19605 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19606 This simply adds HFmode as a supported mode; even though we don't
19607 implement arithmetic on this type directly, it's supported by
19608 optabs conversions, much the way the double-word arithmetic is
19609 special-cased in the default hook. */
19612 arm_scalar_mode_supported_p (enum machine_mode mode
)
19614 if (mode
== HFmode
)
19615 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
19616 else if (ALL_FIXED_POINT_MODE_P (mode
))
19619 return default_scalar_mode_supported_p (mode
);
19622 /* Errors in the source file can cause expand_expr to return const0_rtx
19623 where we expect a vector. To avoid crashing, use one of the vector
19624 clear instructions. */
19627 safe_vector_operand (rtx x
, enum machine_mode mode
)
19629 if (x
!= const0_rtx
)
19631 x
= gen_reg_rtx (mode
);
19633 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
19634 : gen_rtx_SUBREG (DImode
, x
, 0)));
19638 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19641 arm_expand_binop_builtin (enum insn_code icode
,
19642 tree exp
, rtx target
)
19645 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19646 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19647 rtx op0
= expand_normal (arg0
);
19648 rtx op1
= expand_normal (arg1
);
19649 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19650 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19651 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
19653 if (VECTOR_MODE_P (mode0
))
19654 op0
= safe_vector_operand (op0
, mode0
);
19655 if (VECTOR_MODE_P (mode1
))
19656 op1
= safe_vector_operand (op1
, mode1
);
19659 || GET_MODE (target
) != tmode
19660 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19661 target
= gen_reg_rtx (tmode
);
19663 gcc_assert (GET_MODE (op0
) == mode0
&& GET_MODE (op1
) == mode1
);
19665 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19666 op0
= copy_to_mode_reg (mode0
, op0
);
19667 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19668 op1
= copy_to_mode_reg (mode1
, op1
);
19670 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19677 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19680 arm_expand_unop_builtin (enum insn_code icode
,
19681 tree exp
, rtx target
, int do_load
)
19684 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19685 rtx op0
= expand_normal (arg0
);
19686 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19687 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19690 || GET_MODE (target
) != tmode
19691 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19692 target
= gen_reg_rtx (tmode
);
19694 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19697 if (VECTOR_MODE_P (mode0
))
19698 op0
= safe_vector_operand (op0
, mode0
);
19700 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19701 op0
= copy_to_mode_reg (mode0
, op0
);
19704 pat
= GEN_FCN (icode
) (target
, op0
);
19712 NEON_ARG_COPY_TO_REG
,
19718 #define NEON_MAX_BUILTIN_ARGS 5
19720 /* EXP is a pointer argument to a Neon load or store intrinsic. Derive
19721 and return an expression for the accessed memory.
19723 The intrinsic function operates on a block of registers that has
19724 mode REG_MODE. This block contains vectors of type TYPE_MODE.
19725 The function references the memory at EXP in mode MEM_MODE;
19726 this mode may be BLKmode if no more suitable mode is available. */
19729 neon_dereference_pointer (tree exp
, enum machine_mode mem_mode
,
19730 enum machine_mode reg_mode
,
19731 neon_builtin_type_mode type_mode
)
19733 HOST_WIDE_INT reg_size
, vector_size
, nvectors
, nelems
;
19734 tree elem_type
, upper_bound
, array_type
;
19736 /* Work out the size of the register block in bytes. */
19737 reg_size
= GET_MODE_SIZE (reg_mode
);
19739 /* Work out the size of each vector in bytes. */
19740 gcc_assert (TYPE_MODE_BIT (type_mode
) & (TB_DREG
| TB_QREG
));
19741 vector_size
= (TYPE_MODE_BIT (type_mode
) & TB_QREG
? 16 : 8);
19743 /* Work out how many vectors there are. */
19744 gcc_assert (reg_size
% vector_size
== 0);
19745 nvectors
= reg_size
/ vector_size
;
19747 /* Work out how many elements are being loaded or stored.
19748 MEM_MODE == REG_MODE implies a one-to-one mapping between register
19749 and memory elements; anything else implies a lane load or store. */
19750 if (mem_mode
== reg_mode
)
19751 nelems
= vector_size
* nvectors
;
19755 /* Work out the type of each element. */
19756 gcc_assert (POINTER_TYPE_P (TREE_TYPE (exp
)));
19757 elem_type
= TREE_TYPE (TREE_TYPE (exp
));
19759 /* Create a type that describes the full access. */
19760 upper_bound
= build_int_cst (size_type_node
, nelems
- 1);
19761 array_type
= build_array_type (elem_type
, build_index_type (upper_bound
));
19763 /* Dereference EXP using that type. */
19764 exp
= convert (build_pointer_type (array_type
), exp
);
19765 return fold_build2 (MEM_REF
, array_type
, exp
,
19766 build_int_cst (TREE_TYPE (exp
), 0));
19769 /* Expand a Neon builtin. */
19771 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
19772 neon_builtin_type_mode type_mode
,
19777 tree arg
[NEON_MAX_BUILTIN_ARGS
];
19778 rtx op
[NEON_MAX_BUILTIN_ARGS
];
19779 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19780 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
19781 enum machine_mode other_mode
;
19787 || GET_MODE (target
) != tmode
19788 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
19789 target
= gen_reg_rtx (tmode
);
19791 va_start (ap
, exp
);
19795 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
19797 if (thisarg
== NEON_ARG_STOP
)
19801 opno
= argc
+ have_retval
;
19802 mode
[argc
] = insn_data
[icode
].operand
[opno
].mode
;
19803 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
19804 if (thisarg
== NEON_ARG_MEMORY
)
19806 other_mode
= insn_data
[icode
].operand
[1 - opno
].mode
;
19807 arg
[argc
] = neon_dereference_pointer (arg
[argc
], mode
[argc
],
19808 other_mode
, type_mode
);
19810 op
[argc
] = expand_normal (arg
[argc
]);
19814 case NEON_ARG_COPY_TO_REG
:
19815 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19816 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
19817 (op
[argc
], mode
[argc
]))
19818 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
19821 case NEON_ARG_CONSTANT
:
19822 /* FIXME: This error message is somewhat unhelpful. */
19823 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
19824 (op
[argc
], mode
[argc
]))
19825 error ("argument must be a constant");
19828 case NEON_ARG_MEMORY
:
19829 gcc_assert (MEM_P (op
[argc
]));
19830 PUT_MODE (op
[argc
], mode
[argc
]);
19831 /* ??? arm_neon.h uses the same built-in functions for signed
19832 and unsigned accesses, casting where necessary. This isn't
19834 set_mem_alias_set (op
[argc
], 0);
19835 if (!(*insn_data
[icode
].operand
[opno
].predicate
)
19836 (op
[argc
], mode
[argc
]))
19837 op
[argc
] = (replace_equiv_address
19838 (op
[argc
], force_reg (Pmode
, XEXP (op
[argc
], 0))));
19841 case NEON_ARG_STOP
:
19842 gcc_unreachable ();
19855 pat
= GEN_FCN (icode
) (target
, op
[0]);
19859 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
19863 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
19867 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
19871 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
19875 gcc_unreachable ();
19881 pat
= GEN_FCN (icode
) (op
[0]);
19885 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
19889 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
19893 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
19897 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
19901 gcc_unreachable ();
19912 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19913 constants defined per-instruction or per instruction-variant. Instead, the
19914 required info is looked up in the table neon_builtin_data. */
19916 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
19918 neon_builtin_datum
*d
= &neon_builtin_data
[fcode
- ARM_BUILTIN_NEON_BASE
];
19919 neon_itype itype
= d
->itype
;
19920 enum insn_code icode
= d
->code
;
19921 neon_builtin_type_mode type_mode
= d
->mode
;
19928 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19929 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19933 case NEON_SCALARMUL
:
19934 case NEON_SCALARMULL
:
19935 case NEON_SCALARMULH
:
19936 case NEON_SHIFTINSERT
:
19937 case NEON_LOGICBINOP
:
19938 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19939 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19943 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19944 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19945 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19949 case NEON_SHIFTIMM
:
19950 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19951 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
19955 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19956 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19960 case NEON_REINTERP
:
19961 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19962 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19966 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19967 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19969 case NEON_RESULTPAIR
:
19970 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
,
19971 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19975 case NEON_LANEMULL
:
19976 case NEON_LANEMULH
:
19977 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19978 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19979 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19982 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19983 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19984 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19986 case NEON_SHIFTACC
:
19987 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19988 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19989 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19991 case NEON_SCALARMAC
:
19992 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19993 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19994 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19998 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
19999 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
20003 case NEON_LOADSTRUCT
:
20004 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
20005 NEON_ARG_MEMORY
, NEON_ARG_STOP
);
20007 case NEON_LOAD1LANE
:
20008 case NEON_LOADSTRUCTLANE
:
20009 return arm_expand_neon_args (target
, icode
, 1, type_mode
, exp
,
20010 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20014 case NEON_STORESTRUCT
:
20015 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
,
20016 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
20018 case NEON_STORE1LANE
:
20019 case NEON_STORESTRUCTLANE
:
20020 return arm_expand_neon_args (target
, icode
, 0, type_mode
, exp
,
20021 NEON_ARG_MEMORY
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
20025 gcc_unreachable ();
20028 /* Emit code to reinterpret one Neon type as another, without altering bits. */
20030 neon_reinterpret (rtx dest
, rtx src
)
20032 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
20035 /* Emit code to place a Neon pair result in memory locations (with equal
20038 neon_emit_pair_result_insn (enum machine_mode mode
,
20039 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
20042 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
20043 rtx tmp1
= gen_reg_rtx (mode
);
20044 rtx tmp2
= gen_reg_rtx (mode
);
20046 emit_insn (intfn (tmp1
, op1
, op2
, tmp2
));
20048 emit_move_insn (mem
, tmp1
);
20049 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
20050 emit_move_insn (mem
, tmp2
);
20053 /* Set up operands for a register copy from src to dest, taking care not to
20054 clobber registers in the process.
20055 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
20056 be called with a large N, so that should be OK. */
20059 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
20061 unsigned int copied
= 0, opctr
= 0;
20062 unsigned int done
= (1 << count
) - 1;
20065 while (copied
!= done
)
20067 for (i
= 0; i
< count
; i
++)
20071 for (j
= 0; good
&& j
< count
; j
++)
20072 if (i
!= j
&& (copied
& (1 << j
)) == 0
20073 && reg_overlap_mentioned_p (src
[j
], dest
[i
]))
20078 operands
[opctr
++] = dest
[i
];
20079 operands
[opctr
++] = src
[i
];
20085 gcc_assert (opctr
== count
* 2);
20088 /* Expand an expression EXP that calls a built-in function,
20089 with result going to TARGET if that's convenient
20090 (and in mode MODE if that's convenient).
20091 SUBTARGET may be used as the target for computing one of EXP's operands.
20092 IGNORE is nonzero if the value is to be ignored. */
20095 arm_expand_builtin (tree exp
,
20097 rtx subtarget ATTRIBUTE_UNUSED
,
20098 enum machine_mode mode ATTRIBUTE_UNUSED
,
20099 int ignore ATTRIBUTE_UNUSED
)
20101 const struct builtin_description
* d
;
20102 enum insn_code icode
;
20103 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
20111 int fcode
= DECL_FUNCTION_CODE (fndecl
);
20113 enum machine_mode tmode
;
20114 enum machine_mode mode0
;
20115 enum machine_mode mode1
;
20116 enum machine_mode mode2
;
20118 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
20119 return arm_expand_neon_builtin (fcode
, exp
, target
);
20123 case ARM_BUILTIN_TEXTRMSB
:
20124 case ARM_BUILTIN_TEXTRMUB
:
20125 case ARM_BUILTIN_TEXTRMSH
:
20126 case ARM_BUILTIN_TEXTRMUH
:
20127 case ARM_BUILTIN_TEXTRMSW
:
20128 case ARM_BUILTIN_TEXTRMUW
:
20129 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
20130 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
20131 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
20132 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
20133 : CODE_FOR_iwmmxt_textrmw
);
20135 arg0
= CALL_EXPR_ARG (exp
, 0);
20136 arg1
= CALL_EXPR_ARG (exp
, 1);
20137 op0
= expand_normal (arg0
);
20138 op1
= expand_normal (arg1
);
20139 tmode
= insn_data
[icode
].operand
[0].mode
;
20140 mode0
= insn_data
[icode
].operand
[1].mode
;
20141 mode1
= insn_data
[icode
].operand
[2].mode
;
20143 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20144 op0
= copy_to_mode_reg (mode0
, op0
);
20145 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20147 /* @@@ better error message */
20148 error ("selector must be an immediate");
20149 return gen_reg_rtx (tmode
);
20152 || GET_MODE (target
) != tmode
20153 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20154 target
= gen_reg_rtx (tmode
);
20155 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20161 case ARM_BUILTIN_TINSRB
:
20162 case ARM_BUILTIN_TINSRH
:
20163 case ARM_BUILTIN_TINSRW
:
20164 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
20165 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
20166 : CODE_FOR_iwmmxt_tinsrw
);
20167 arg0
= CALL_EXPR_ARG (exp
, 0);
20168 arg1
= CALL_EXPR_ARG (exp
, 1);
20169 arg2
= CALL_EXPR_ARG (exp
, 2);
20170 op0
= expand_normal (arg0
);
20171 op1
= expand_normal (arg1
);
20172 op2
= expand_normal (arg2
);
20173 tmode
= insn_data
[icode
].operand
[0].mode
;
20174 mode0
= insn_data
[icode
].operand
[1].mode
;
20175 mode1
= insn_data
[icode
].operand
[2].mode
;
20176 mode2
= insn_data
[icode
].operand
[3].mode
;
20178 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20179 op0
= copy_to_mode_reg (mode0
, op0
);
20180 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20181 op1
= copy_to_mode_reg (mode1
, op1
);
20182 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
20184 /* @@@ better error message */
20185 error ("selector must be an immediate");
20189 || GET_MODE (target
) != tmode
20190 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20191 target
= gen_reg_rtx (tmode
);
20192 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
20198 case ARM_BUILTIN_SETWCX
:
20199 arg0
= CALL_EXPR_ARG (exp
, 0);
20200 arg1
= CALL_EXPR_ARG (exp
, 1);
20201 op0
= force_reg (SImode
, expand_normal (arg0
));
20202 op1
= expand_normal (arg1
);
20203 emit_insn (gen_iwmmxt_tmcr (op1
, op0
));
20206 case ARM_BUILTIN_GETWCX
:
20207 arg0
= CALL_EXPR_ARG (exp
, 0);
20208 op0
= expand_normal (arg0
);
20209 target
= gen_reg_rtx (SImode
);
20210 emit_insn (gen_iwmmxt_tmrc (target
, op0
));
20213 case ARM_BUILTIN_WSHUFH
:
20214 icode
= CODE_FOR_iwmmxt_wshufh
;
20215 arg0
= CALL_EXPR_ARG (exp
, 0);
20216 arg1
= CALL_EXPR_ARG (exp
, 1);
20217 op0
= expand_normal (arg0
);
20218 op1
= expand_normal (arg1
);
20219 tmode
= insn_data
[icode
].operand
[0].mode
;
20220 mode1
= insn_data
[icode
].operand
[1].mode
;
20221 mode2
= insn_data
[icode
].operand
[2].mode
;
20223 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
20224 op0
= copy_to_mode_reg (mode1
, op0
);
20225 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
20227 /* @@@ better error message */
20228 error ("mask must be an immediate");
20232 || GET_MODE (target
) != tmode
20233 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20234 target
= gen_reg_rtx (tmode
);
20235 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
20241 case ARM_BUILTIN_WSADB
:
20242 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb
, exp
, target
);
20243 case ARM_BUILTIN_WSADH
:
20244 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh
, exp
, target
);
20245 case ARM_BUILTIN_WSADBZ
:
20246 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
20247 case ARM_BUILTIN_WSADHZ
:
20248 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
20250 /* Several three-argument builtins. */
20251 case ARM_BUILTIN_WMACS
:
20252 case ARM_BUILTIN_WMACU
:
20253 case ARM_BUILTIN_WALIGN
:
20254 case ARM_BUILTIN_TMIA
:
20255 case ARM_BUILTIN_TMIAPH
:
20256 case ARM_BUILTIN_TMIATT
:
20257 case ARM_BUILTIN_TMIATB
:
20258 case ARM_BUILTIN_TMIABT
:
20259 case ARM_BUILTIN_TMIABB
:
20260 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
20261 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
20262 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
20263 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
20264 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
20265 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
20266 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
20267 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
20268 : CODE_FOR_iwmmxt_walign
);
20269 arg0
= CALL_EXPR_ARG (exp
, 0);
20270 arg1
= CALL_EXPR_ARG (exp
, 1);
20271 arg2
= CALL_EXPR_ARG (exp
, 2);
20272 op0
= expand_normal (arg0
);
20273 op1
= expand_normal (arg1
);
20274 op2
= expand_normal (arg2
);
20275 tmode
= insn_data
[icode
].operand
[0].mode
;
20276 mode0
= insn_data
[icode
].operand
[1].mode
;
20277 mode1
= insn_data
[icode
].operand
[2].mode
;
20278 mode2
= insn_data
[icode
].operand
[3].mode
;
20280 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
20281 op0
= copy_to_mode_reg (mode0
, op0
);
20282 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
20283 op1
= copy_to_mode_reg (mode1
, op1
);
20284 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
20285 op2
= copy_to_mode_reg (mode2
, op2
);
20287 || GET_MODE (target
) != tmode
20288 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
20289 target
= gen_reg_rtx (tmode
);
20290 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
20296 case ARM_BUILTIN_WZERO
:
20297 target
= gen_reg_rtx (DImode
);
20298 emit_insn (gen_iwmmxt_clrdi (target
));
20301 case ARM_BUILTIN_THREAD_POINTER
:
20302 return arm_load_tp (target
);
20308 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
20309 if (d
->code
== (const enum arm_builtins
) fcode
)
20310 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
20312 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
20313 if (d
->code
== (const enum arm_builtins
) fcode
)
20314 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
20316 /* @@@ Should really do something sensible here. */
20320 /* Return the number (counting from 0) of
20321 the least significant set bit in MASK. */
20324 number_of_first_bit_set (unsigned mask
)
20326 return ctz_hwi (mask
);
20329 /* Like emit_multi_reg_push, but allowing for a different set of
20330 registers to be described as saved. MASK is the set of registers
20331 to be saved; REAL_REGS is the set of registers to be described as
20332 saved. If REAL_REGS is 0, only describe the stack adjustment. */
20335 thumb1_emit_multi_reg_push (unsigned long mask
, unsigned long real_regs
)
20337 unsigned long regno
;
20338 rtx par
[10], tmp
, reg
, insn
;
20341 /* Build the parallel of the registers actually being stored. */
20342 for (i
= 0; mask
; ++i
, mask
&= mask
- 1)
20344 regno
= ctz_hwi (mask
);
20345 reg
= gen_rtx_REG (SImode
, regno
);
20348 tmp
= gen_rtx_UNSPEC (BLKmode
, gen_rtvec (1, reg
), UNSPEC_PUSH_MULT
);
20350 tmp
= gen_rtx_USE (VOIDmode
, reg
);
20355 tmp
= plus_constant (stack_pointer_rtx
, -4 * i
);
20356 tmp
= gen_rtx_PRE_MODIFY (Pmode
, stack_pointer_rtx
, tmp
);
20357 tmp
= gen_frame_mem (BLKmode
, tmp
);
20358 tmp
= gen_rtx_SET (VOIDmode
, tmp
, par
[0]);
20361 tmp
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (i
, par
));
20362 insn
= emit_insn (tmp
);
20364 /* Always build the stack adjustment note for unwind info. */
20365 tmp
= plus_constant (stack_pointer_rtx
, -4 * i
);
20366 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
, tmp
);
20369 /* Build the parallel of the registers recorded as saved for unwind. */
20370 for (j
= 0; real_regs
; ++j
, real_regs
&= real_regs
- 1)
20372 regno
= ctz_hwi (real_regs
);
20373 reg
= gen_rtx_REG (SImode
, regno
);
20375 tmp
= plus_constant (stack_pointer_rtx
, j
* 4);
20376 tmp
= gen_frame_mem (SImode
, tmp
);
20377 tmp
= gen_rtx_SET (VOIDmode
, tmp
, reg
);
20378 RTX_FRAME_RELATED_P (tmp
) = 1;
20386 RTX_FRAME_RELATED_P (par
[0]) = 1;
20387 tmp
= gen_rtx_SEQUENCE (VOIDmode
, gen_rtvec_v (j
+ 1, par
));
20390 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, tmp
);
20395 /* Emit code to push or pop registers to or from the stack. F is the
20396 assembly file. MASK is the registers to pop. */
20398 thumb_pop (FILE *f
, unsigned long mask
)
20401 int lo_mask
= mask
& 0xFF;
20402 int pushed_words
= 0;
20406 if (lo_mask
== 0 && (mask
& (1 << PC_REGNUM
)))
20408 /* Special case. Do not generate a POP PC statement here, do it in
20410 thumb_exit (f
, -1);
20414 fprintf (f
, "\tpop\t{");
20416 /* Look at the low registers first. */
20417 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
20421 asm_fprintf (f
, "%r", regno
);
20423 if ((lo_mask
& ~1) != 0)
20430 if (mask
& (1 << PC_REGNUM
))
20432 /* Catch popping the PC. */
20433 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
20434 || crtl
->calls_eh_return
)
20436 /* The PC is never poped directly, instead
20437 it is popped into r3 and then BX is used. */
20438 fprintf (f
, "}\n");
20440 thumb_exit (f
, -1);
20449 asm_fprintf (f
, "%r", PC_REGNUM
);
20453 fprintf (f
, "}\n");
20456 /* Generate code to return from a thumb function.
20457 If 'reg_containing_return_addr' is -1, then the return address is
20458 actually on the stack, at the stack pointer. */
20460 thumb_exit (FILE *f
, int reg_containing_return_addr
)
20462 unsigned regs_available_for_popping
;
20463 unsigned regs_to_pop
;
20465 unsigned available
;
20469 int restore_a4
= FALSE
;
20471 /* Compute the registers we need to pop. */
20475 if (reg_containing_return_addr
== -1)
20477 regs_to_pop
|= 1 << LR_REGNUM
;
20481 if (TARGET_BACKTRACE
)
20483 /* Restore the (ARM) frame pointer and stack pointer. */
20484 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
20488 /* If there is nothing to pop then just emit the BX instruction and
20490 if (pops_needed
== 0)
20492 if (crtl
->calls_eh_return
)
20493 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
20495 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
20498 /* Otherwise if we are not supporting interworking and we have not created
20499 a backtrace structure and the function was not entered in ARM mode then
20500 just pop the return address straight into the PC. */
20501 else if (!TARGET_INTERWORK
20502 && !TARGET_BACKTRACE
20503 && !is_called_in_ARM_mode (current_function_decl
)
20504 && !crtl
->calls_eh_return
)
20506 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
20510 /* Find out how many of the (return) argument registers we can corrupt. */
20511 regs_available_for_popping
= 0;
20513 /* If returning via __builtin_eh_return, the bottom three registers
20514 all contain information needed for the return. */
20515 if (crtl
->calls_eh_return
)
20519 /* If we can deduce the registers used from the function's
20520 return value. This is more reliable that examining
20521 df_regs_ever_live_p () because that will be set if the register is
20522 ever used in the function, not just if the register is used
20523 to hold a return value. */
20525 if (crtl
->return_rtx
!= 0)
20526 mode
= GET_MODE (crtl
->return_rtx
);
20528 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
20530 size
= GET_MODE_SIZE (mode
);
20534 /* In a void function we can use any argument register.
20535 In a function that returns a structure on the stack
20536 we can use the second and third argument registers. */
20537 if (mode
== VOIDmode
)
20538 regs_available_for_popping
=
20539 (1 << ARG_REGISTER (1))
20540 | (1 << ARG_REGISTER (2))
20541 | (1 << ARG_REGISTER (3));
20543 regs_available_for_popping
=
20544 (1 << ARG_REGISTER (2))
20545 | (1 << ARG_REGISTER (3));
20547 else if (size
<= 4)
20548 regs_available_for_popping
=
20549 (1 << ARG_REGISTER (2))
20550 | (1 << ARG_REGISTER (3));
20551 else if (size
<= 8)
20552 regs_available_for_popping
=
20553 (1 << ARG_REGISTER (3));
20556 /* Match registers to be popped with registers into which we pop them. */
20557 for (available
= regs_available_for_popping
,
20558 required
= regs_to_pop
;
20559 required
!= 0 && available
!= 0;
20560 available
&= ~(available
& - available
),
20561 required
&= ~(required
& - required
))
20564 /* If we have any popping registers left over, remove them. */
20566 regs_available_for_popping
&= ~available
;
20568 /* Otherwise if we need another popping register we can use
20569 the fourth argument register. */
20570 else if (pops_needed
)
20572 /* If we have not found any free argument registers and
20573 reg a4 contains the return address, we must move it. */
20574 if (regs_available_for_popping
== 0
20575 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
20577 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
20578 reg_containing_return_addr
= LR_REGNUM
;
20580 else if (size
> 12)
20582 /* Register a4 is being used to hold part of the return value,
20583 but we have dire need of a free, low register. */
20586 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
20589 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
20591 /* The fourth argument register is available. */
20592 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
20598 /* Pop as many registers as we can. */
20599 thumb_pop (f
, regs_available_for_popping
);
20601 /* Process the registers we popped. */
20602 if (reg_containing_return_addr
== -1)
20604 /* The return address was popped into the lowest numbered register. */
20605 regs_to_pop
&= ~(1 << LR_REGNUM
);
20607 reg_containing_return_addr
=
20608 number_of_first_bit_set (regs_available_for_popping
);
20610 /* Remove this register for the mask of available registers, so that
20611 the return address will not be corrupted by further pops. */
20612 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
20615 /* If we popped other registers then handle them here. */
20616 if (regs_available_for_popping
)
20620 /* Work out which register currently contains the frame pointer. */
20621 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
20623 /* Move it into the correct place. */
20624 asm_fprintf (f
, "\tmov\t%r, %r\n",
20625 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
20627 /* (Temporarily) remove it from the mask of popped registers. */
20628 regs_available_for_popping
&= ~(1 << frame_pointer
);
20629 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
20631 if (regs_available_for_popping
)
20635 /* We popped the stack pointer as well,
20636 find the register that contains it. */
20637 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
20639 /* Move it into the stack register. */
20640 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
20642 /* At this point we have popped all necessary registers, so
20643 do not worry about restoring regs_available_for_popping
20644 to its correct value:
20646 assert (pops_needed == 0)
20647 assert (regs_available_for_popping == (1 << frame_pointer))
20648 assert (regs_to_pop == (1 << STACK_POINTER)) */
20652 /* Since we have just move the popped value into the frame
20653 pointer, the popping register is available for reuse, and
20654 we know that we still have the stack pointer left to pop. */
20655 regs_available_for_popping
|= (1 << frame_pointer
);
20659 /* If we still have registers left on the stack, but we no longer have
20660 any registers into which we can pop them, then we must move the return
20661 address into the link register and make available the register that
20663 if (regs_available_for_popping
== 0 && pops_needed
> 0)
20665 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
20667 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
20668 reg_containing_return_addr
);
20670 reg_containing_return_addr
= LR_REGNUM
;
20673 /* If we have registers left on the stack then pop some more.
20674 We know that at most we will want to pop FP and SP. */
20675 if (pops_needed
> 0)
20680 thumb_pop (f
, regs_available_for_popping
);
20682 /* We have popped either FP or SP.
20683 Move whichever one it is into the correct register. */
20684 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
20685 move_to
= number_of_first_bit_set (regs_to_pop
);
20687 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
20689 regs_to_pop
&= ~(1 << move_to
);
20694 /* If we still have not popped everything then we must have only
20695 had one register available to us and we are now popping the SP. */
20696 if (pops_needed
> 0)
20700 thumb_pop (f
, regs_available_for_popping
);
20702 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
20704 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
20706 assert (regs_to_pop == (1 << STACK_POINTER))
20707 assert (pops_needed == 1)
20711 /* If necessary restore the a4 register. */
20714 if (reg_containing_return_addr
!= LR_REGNUM
)
20716 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
20717 reg_containing_return_addr
= LR_REGNUM
;
20720 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
20723 if (crtl
->calls_eh_return
)
20724 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
20726 /* Return to caller. */
20727 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
20730 /* Scan INSN just before assembler is output for it.
20731 For Thumb-1, we track the status of the condition codes; this
20732 information is used in the cbranchsi4_insn pattern. */
20734 thumb1_final_prescan_insn (rtx insn
)
20736 if (flag_print_asm_name
)
20737 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
20738 INSN_ADDRESSES (INSN_UID (insn
)));
20739 /* Don't overwrite the previous setter when we get to a cbranch. */
20740 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
20742 enum attr_conds conds
;
20744 if (cfun
->machine
->thumb1_cc_insn
)
20746 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
20747 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
20750 conds
= get_attr_conds (insn
);
20751 if (conds
== CONDS_SET
)
20753 rtx set
= single_set (insn
);
20754 cfun
->machine
->thumb1_cc_insn
= insn
;
20755 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
20756 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
20757 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
20758 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
20760 rtx src1
= XEXP (SET_SRC (set
), 1);
20761 if (src1
== const0_rtx
)
20762 cfun
->machine
->thumb1_cc_mode
= CCmode
;
20765 else if (conds
!= CONDS_NOCOND
)
20766 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
20771 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
20773 unsigned HOST_WIDE_INT mask
= 0xff;
20776 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
20777 if (val
== 0) /* XXX */
20780 for (i
= 0; i
< 25; i
++)
20781 if ((val
& (mask
<< i
)) == val
)
20787 /* Returns nonzero if the current function contains,
20788 or might contain a far jump. */
20790 thumb_far_jump_used_p (void)
20794 /* This test is only important for leaf functions. */
20795 /* assert (!leaf_function_p ()); */
20797 /* If we have already decided that far jumps may be used,
20798 do not bother checking again, and always return true even if
20799 it turns out that they are not being used. Once we have made
20800 the decision that far jumps are present (and that hence the link
20801 register will be pushed onto the stack) we cannot go back on it. */
20802 if (cfun
->machine
->far_jump_used
)
20805 /* If this function is not being called from the prologue/epilogue
20806 generation code then it must be being called from the
20807 INITIAL_ELIMINATION_OFFSET macro. */
20808 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
20810 /* In this case we know that we are being asked about the elimination
20811 of the arg pointer register. If that register is not being used,
20812 then there are no arguments on the stack, and we do not have to
20813 worry that a far jump might force the prologue to push the link
20814 register, changing the stack offsets. In this case we can just
20815 return false, since the presence of far jumps in the function will
20816 not affect stack offsets.
20818 If the arg pointer is live (or if it was live, but has now been
20819 eliminated and so set to dead) then we do have to test to see if
20820 the function might contain a far jump. This test can lead to some
20821 false negatives, since before reload is completed, then length of
20822 branch instructions is not known, so gcc defaults to returning their
20823 longest length, which in turn sets the far jump attribute to true.
20825 A false negative will not result in bad code being generated, but it
20826 will result in a needless push and pop of the link register. We
20827 hope that this does not occur too often.
20829 If we need doubleword stack alignment this could affect the other
20830 elimination offsets so we can't risk getting it wrong. */
20831 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
20832 cfun
->machine
->arg_pointer_live
= 1;
20833 else if (!cfun
->machine
->arg_pointer_live
)
20837 /* Check to see if the function contains a branch
20838 insn with the far jump attribute set. */
20839 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
20841 if (GET_CODE (insn
) == JUMP_INSN
20842 /* Ignore tablejump patterns. */
20843 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
20844 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
20845 && get_attr_far_jump (insn
) == FAR_JUMP_YES
20848 /* Record the fact that we have decided that
20849 the function does use far jumps. */
20850 cfun
->machine
->far_jump_used
= 1;
20858 /* Return nonzero if FUNC must be entered in ARM mode. */
20860 is_called_in_ARM_mode (tree func
)
20862 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
20864 /* Ignore the problem about functions whose address is taken. */
20865 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
20869 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
20875 /* Given the stack offsets and register mask in OFFSETS, decide how
20876 many additional registers to push instead of subtracting a constant
20877 from SP. For epilogues the principle is the same except we use pop.
20878 FOR_PROLOGUE indicates which we're generating. */
20880 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
20882 HOST_WIDE_INT amount
;
20883 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
20884 /* Extract a mask of the ones we can give to the Thumb's push/pop
20886 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
20887 /* Then count how many other high registers will need to be pushed. */
20888 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20889 int n_free
, reg_base
;
20891 if (!for_prologue
&& frame_pointer_needed
)
20892 amount
= offsets
->locals_base
- offsets
->saved_regs
;
20894 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20896 /* If the stack frame size is 512 exactly, we can save one load
20897 instruction, which should make this a win even when optimizing
20899 if (!optimize_size
&& amount
!= 512)
20902 /* Can't do this if there are high registers to push. */
20903 if (high_regs_pushed
!= 0)
20906 /* Shouldn't do it in the prologue if no registers would normally
20907 be pushed at all. In the epilogue, also allow it if we'll have
20908 a pop insn for the PC. */
20911 || TARGET_BACKTRACE
20912 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
20913 || TARGET_INTERWORK
20914 || crtl
->args
.pretend_args_size
!= 0))
20917 /* Don't do this if thumb_expand_prologue wants to emit instructions
20918 between the push and the stack frame allocation. */
20920 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20921 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
20928 reg_base
= arm_size_return_regs () / UNITS_PER_WORD
;
20929 live_regs_mask
>>= reg_base
;
20932 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
20933 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
20935 live_regs_mask
>>= 1;
20941 gcc_assert (amount
/ 4 * 4 == amount
);
20943 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
20944 return (amount
- 508) / 4;
20945 if (amount
<= n_free
* 4)
20950 /* The bits which aren't usefully expanded as rtl. */
20952 thumb_unexpanded_epilogue (void)
20954 arm_stack_offsets
*offsets
;
20956 unsigned long live_regs_mask
= 0;
20957 int high_regs_pushed
= 0;
20959 int had_to_push_lr
;
20962 if (cfun
->machine
->return_used_this_function
!= 0)
20965 if (IS_NAKED (arm_current_func_type ()))
20968 offsets
= arm_get_frame_offsets ();
20969 live_regs_mask
= offsets
->saved_regs_mask
;
20970 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20972 /* If we can deduce the registers used from the function's return value.
20973 This is more reliable that examining df_regs_ever_live_p () because that
20974 will be set if the register is ever used in the function, not just if
20975 the register is used to hold a return value. */
20976 size
= arm_size_return_regs ();
20978 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
20981 unsigned long extra_mask
= (1 << extra_pop
) - 1;
20982 live_regs_mask
|= extra_mask
<< (size
/ UNITS_PER_WORD
);
20985 /* The prolog may have pushed some high registers to use as
20986 work registers. e.g. the testsuite file:
20987 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20988 compiles to produce:
20989 push {r4, r5, r6, r7, lr}
20993 as part of the prolog. We have to undo that pushing here. */
20995 if (high_regs_pushed
)
20997 unsigned long mask
= live_regs_mask
& 0xff;
21000 /* The available low registers depend on the size of the value we are
21008 /* Oh dear! We have no low registers into which we can pop
21011 ("no low registers available for popping high registers");
21013 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
21014 if (live_regs_mask
& (1 << next_hi_reg
))
21017 while (high_regs_pushed
)
21019 /* Find lo register(s) into which the high register(s) can
21021 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
21023 if (mask
& (1 << regno
))
21024 high_regs_pushed
--;
21025 if (high_regs_pushed
== 0)
21029 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
21031 /* Pop the values into the low register(s). */
21032 thumb_pop (asm_out_file
, mask
);
21034 /* Move the value(s) into the high registers. */
21035 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
21037 if (mask
& (1 << regno
))
21039 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
21042 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
21043 if (live_regs_mask
& (1 << next_hi_reg
))
21048 live_regs_mask
&= ~0x0f00;
21051 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
21052 live_regs_mask
&= 0xff;
21054 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
21056 /* Pop the return address into the PC. */
21057 if (had_to_push_lr
)
21058 live_regs_mask
|= 1 << PC_REGNUM
;
21060 /* Either no argument registers were pushed or a backtrace
21061 structure was created which includes an adjusted stack
21062 pointer, so just pop everything. */
21063 if (live_regs_mask
)
21064 thumb_pop (asm_out_file
, live_regs_mask
);
21066 /* We have either just popped the return address into the
21067 PC or it is was kept in LR for the entire function.
21068 Note that thumb_pop has already called thumb_exit if the
21069 PC was in the list. */
21070 if (!had_to_push_lr
)
21071 thumb_exit (asm_out_file
, LR_REGNUM
);
21075 /* Pop everything but the return address. */
21076 if (live_regs_mask
)
21077 thumb_pop (asm_out_file
, live_regs_mask
);
21079 if (had_to_push_lr
)
21083 /* We have no free low regs, so save one. */
21084 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
21088 /* Get the return address into a temporary register. */
21089 thumb_pop (asm_out_file
, 1 << LAST_ARG_REGNUM
);
21093 /* Move the return address to lr. */
21094 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
21096 /* Restore the low register. */
21097 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
21102 regno
= LAST_ARG_REGNUM
;
21107 /* Remove the argument registers that were pushed onto the stack. */
21108 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
21109 SP_REGNUM
, SP_REGNUM
,
21110 crtl
->args
.pretend_args_size
);
21112 thumb_exit (asm_out_file
, regno
);
21118 /* Functions to save and restore machine-specific function data. */
21119 static struct machine_function
*
21120 arm_init_machine_status (void)
21122 struct machine_function
*machine
;
21123 machine
= ggc_alloc_cleared_machine_function ();
21125 #if ARM_FT_UNKNOWN != 0
21126 machine
->func_type
= ARM_FT_UNKNOWN
;
21131 /* Return an RTX indicating where the return address to the
21132 calling function can be found. */
21134 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
21139 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
21142 /* Do anything needed before RTL is emitted for each function. */
21144 arm_init_expanders (void)
21146 /* Arrange to initialize and mark the machine per-function status. */
21147 init_machine_status
= arm_init_machine_status
;
21149 /* This is to stop the combine pass optimizing away the alignment
21150 adjustment of va_arg. */
21151 /* ??? It is claimed that this should not be necessary. */
21153 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
21157 /* Like arm_compute_initial_elimination offset. Simpler because there
21158 isn't an ABI specified frame pointer for Thumb. Instead, we set it
21159 to point at the base of the local variables after static stack
21160 space for a function has been allocated. */
21163 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
21165 arm_stack_offsets
*offsets
;
21167 offsets
= arm_get_frame_offsets ();
21171 case ARG_POINTER_REGNUM
:
21174 case STACK_POINTER_REGNUM
:
21175 return offsets
->outgoing_args
- offsets
->saved_args
;
21177 case FRAME_POINTER_REGNUM
:
21178 return offsets
->soft_frame
- offsets
->saved_args
;
21180 case ARM_HARD_FRAME_POINTER_REGNUM
:
21181 return offsets
->saved_regs
- offsets
->saved_args
;
21183 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21184 return offsets
->locals_base
- offsets
->saved_args
;
21187 gcc_unreachable ();
21191 case FRAME_POINTER_REGNUM
:
21194 case STACK_POINTER_REGNUM
:
21195 return offsets
->outgoing_args
- offsets
->soft_frame
;
21197 case ARM_HARD_FRAME_POINTER_REGNUM
:
21198 return offsets
->saved_regs
- offsets
->soft_frame
;
21200 case THUMB_HARD_FRAME_POINTER_REGNUM
:
21201 return offsets
->locals_base
- offsets
->soft_frame
;
21204 gcc_unreachable ();
21209 gcc_unreachable ();
21213 /* Generate the function's prologue. */
21216 thumb1_expand_prologue (void)
21220 HOST_WIDE_INT amount
;
21221 arm_stack_offsets
*offsets
;
21222 unsigned long func_type
;
21224 unsigned long live_regs_mask
;
21225 unsigned long l_mask
;
21226 unsigned high_regs_pushed
= 0;
21228 func_type
= arm_current_func_type ();
21230 /* Naked functions don't have prologues. */
21231 if (IS_NAKED (func_type
))
21234 if (IS_INTERRUPT (func_type
))
21236 error ("interrupt Service Routines cannot be coded in Thumb mode");
21240 if (is_called_in_ARM_mode (current_function_decl
))
21241 emit_insn (gen_prologue_thumb1_interwork ());
21243 offsets
= arm_get_frame_offsets ();
21244 live_regs_mask
= offsets
->saved_regs_mask
;
21246 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
21247 l_mask
= live_regs_mask
& 0x40ff;
21248 /* Then count how many other high registers will need to be pushed. */
21249 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
21251 if (crtl
->args
.pretend_args_size
)
21253 rtx x
= GEN_INT (-crtl
->args
.pretend_args_size
);
21255 if (cfun
->machine
->uses_anonymous_args
)
21257 int num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
21258 unsigned long mask
;
21260 mask
= 1ul << (LAST_ARG_REGNUM
+ 1);
21261 mask
-= 1ul << (LAST_ARG_REGNUM
+ 1 - num_pushes
);
21263 insn
= thumb1_emit_multi_reg_push (mask
, 0);
21267 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
21268 stack_pointer_rtx
, x
));
21270 RTX_FRAME_RELATED_P (insn
) = 1;
21273 if (TARGET_BACKTRACE
)
21275 HOST_WIDE_INT offset
= 0;
21276 unsigned work_register
;
21277 rtx work_reg
, x
, arm_hfp_rtx
;
21279 /* We have been asked to create a stack backtrace structure.
21280 The code looks like this:
21284 0 sub SP, #16 Reserve space for 4 registers.
21285 2 push {R7} Push low registers.
21286 4 add R7, SP, #20 Get the stack pointer before the push.
21287 6 str R7, [SP, #8] Store the stack pointer
21288 (before reserving the space).
21289 8 mov R7, PC Get hold of the start of this code + 12.
21290 10 str R7, [SP, #16] Store it.
21291 12 mov R7, FP Get hold of the current frame pointer.
21292 14 str R7, [SP, #4] Store it.
21293 16 mov R7, LR Get hold of the current return address.
21294 18 str R7, [SP, #12] Store it.
21295 20 add R7, SP, #16 Point at the start of the
21296 backtrace structure.
21297 22 mov FP, R7 Put this value into the frame pointer. */
21299 work_register
= thumb_find_work_register (live_regs_mask
);
21300 work_reg
= gen_rtx_REG (SImode
, work_register
);
21301 arm_hfp_rtx
= gen_rtx_REG (SImode
, ARM_HARD_FRAME_POINTER_REGNUM
);
21303 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
21304 stack_pointer_rtx
, GEN_INT (-16)));
21305 RTX_FRAME_RELATED_P (insn
) = 1;
21309 insn
= thumb1_emit_multi_reg_push (l_mask
, l_mask
);
21310 RTX_FRAME_RELATED_P (insn
) = 1;
21312 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
21315 x
= GEN_INT (offset
+ 16 + crtl
->args
.pretend_args_size
);
21316 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
21318 x
= plus_constant (stack_pointer_rtx
, offset
+ 4);
21319 x
= gen_frame_mem (SImode
, x
);
21320 emit_move_insn (x
, work_reg
);
21322 /* Make sure that the instruction fetching the PC is in the right place
21323 to calculate "start of backtrace creation code + 12". */
21324 /* ??? The stores using the common WORK_REG ought to be enough to
21325 prevent the scheduler from doing anything weird. Failing that
21326 we could always move all of the following into an UNSPEC_VOLATILE. */
21329 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
21330 emit_move_insn (work_reg
, x
);
21332 x
= plus_constant (stack_pointer_rtx
, offset
+ 12);
21333 x
= gen_frame_mem (SImode
, x
);
21334 emit_move_insn (x
, work_reg
);
21336 emit_move_insn (work_reg
, arm_hfp_rtx
);
21338 x
= plus_constant (stack_pointer_rtx
, offset
);
21339 x
= gen_frame_mem (SImode
, x
);
21340 emit_move_insn (x
, work_reg
);
21344 emit_move_insn (work_reg
, arm_hfp_rtx
);
21346 x
= plus_constant (stack_pointer_rtx
, offset
);
21347 x
= gen_frame_mem (SImode
, x
);
21348 emit_move_insn (x
, work_reg
);
21350 x
= gen_rtx_REG (SImode
, PC_REGNUM
);
21351 emit_move_insn (work_reg
, x
);
21353 x
= plus_constant (stack_pointer_rtx
, offset
+ 12);
21354 x
= gen_frame_mem (SImode
, x
);
21355 emit_move_insn (x
, work_reg
);
21358 x
= gen_rtx_REG (SImode
, LR_REGNUM
);
21359 emit_move_insn (work_reg
, x
);
21361 x
= plus_constant (stack_pointer_rtx
, offset
+ 8);
21362 x
= gen_frame_mem (SImode
, x
);
21363 emit_move_insn (x
, work_reg
);
21365 x
= GEN_INT (offset
+ 12);
21366 emit_insn (gen_addsi3 (work_reg
, stack_pointer_rtx
, x
));
21368 emit_move_insn (arm_hfp_rtx
, work_reg
);
21370 /* Optimization: If we are not pushing any low registers but we are going
21371 to push some high registers then delay our first push. This will just
21372 be a push of LR and we can combine it with the push of the first high
21374 else if ((l_mask
& 0xff) != 0
21375 || (high_regs_pushed
== 0 && l_mask
))
21377 unsigned long mask
= l_mask
;
21378 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
21379 insn
= thumb1_emit_multi_reg_push (mask
, mask
);
21380 RTX_FRAME_RELATED_P (insn
) = 1;
21383 if (high_regs_pushed
)
21385 unsigned pushable_regs
;
21386 unsigned next_hi_reg
;
21388 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
21389 if (live_regs_mask
& (1 << next_hi_reg
))
21392 pushable_regs
= l_mask
& 0xff;
21394 if (pushable_regs
== 0)
21395 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
21397 while (high_regs_pushed
> 0)
21399 unsigned long real_regs_mask
= 0;
21401 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
21403 if (pushable_regs
& (1 << regno
))
21405 emit_move_insn (gen_rtx_REG (SImode
, regno
),
21406 gen_rtx_REG (SImode
, next_hi_reg
));
21408 high_regs_pushed
--;
21409 real_regs_mask
|= (1 << next_hi_reg
);
21411 if (high_regs_pushed
)
21413 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
21415 if (live_regs_mask
& (1 << next_hi_reg
))
21420 pushable_regs
&= ~((1 << regno
) - 1);
21426 /* If we had to find a work register and we have not yet
21427 saved the LR then add it to the list of regs to push. */
21428 if (l_mask
== (1 << LR_REGNUM
))
21430 pushable_regs
|= l_mask
;
21431 real_regs_mask
|= l_mask
;
21435 insn
= thumb1_emit_multi_reg_push (pushable_regs
, real_regs_mask
);
21436 RTX_FRAME_RELATED_P (insn
) = 1;
21440 /* Load the pic register before setting the frame pointer,
21441 so we can use r7 as a temporary work register. */
21442 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
21443 arm_load_pic_register (live_regs_mask
);
21445 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
21446 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
21447 stack_pointer_rtx
);
21449 if (flag_stack_usage_info
)
21450 current_function_static_stack_size
21451 = offsets
->outgoing_args
- offsets
->saved_args
;
21453 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
21454 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
21459 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21460 GEN_INT (- amount
)));
21461 RTX_FRAME_RELATED_P (insn
) = 1;
21467 /* The stack decrement is too big for an immediate value in a single
21468 insn. In theory we could issue multiple subtracts, but after
21469 three of them it becomes more space efficient to place the full
21470 value in the constant pool and load into a register. (Also the
21471 ARM debugger really likes to see only one stack decrement per
21472 function). So instead we look for a scratch register into which
21473 we can load the decrement, and then we subtract this from the
21474 stack pointer. Unfortunately on the thumb the only available
21475 scratch registers are the argument registers, and we cannot use
21476 these as they may hold arguments to the function. Instead we
21477 attempt to locate a call preserved register which is used by this
21478 function. If we can find one, then we know that it will have
21479 been pushed at the start of the prologue and so we can corrupt
21481 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
21482 if (live_regs_mask
& (1 << regno
))
21485 gcc_assert(regno
<= LAST_LO_REGNUM
);
21487 reg
= gen_rtx_REG (SImode
, regno
);
21489 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
21491 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
21492 stack_pointer_rtx
, reg
));
21494 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
21495 plus_constant (stack_pointer_rtx
,
21497 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
21498 RTX_FRAME_RELATED_P (insn
) = 1;
21502 if (frame_pointer_needed
)
21503 thumb_set_frame_pointer (offsets
);
21505 /* If we are profiling, make sure no instructions are scheduled before
21506 the call to mcount. Similarly if the user has requested no
21507 scheduling in the prolog. Similarly if we want non-call exceptions
21508 using the EABI unwinder, to prevent faulting instructions from being
21509 swapped with a stack adjustment. */
21510 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
21511 || (arm_except_unwind_info (&global_options
) == UI_TARGET
21512 && cfun
->can_throw_non_call_exceptions
))
21513 emit_insn (gen_blockage ());
21515 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
21516 if (live_regs_mask
& 0xff)
21517 cfun
->machine
->lr_save_eliminated
= 0;
21522 thumb1_expand_epilogue (void)
21524 HOST_WIDE_INT amount
;
21525 arm_stack_offsets
*offsets
;
21528 /* Naked functions don't have prologues. */
21529 if (IS_NAKED (arm_current_func_type ()))
21532 offsets
= arm_get_frame_offsets ();
21533 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
21535 if (frame_pointer_needed
)
21537 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
21538 amount
= offsets
->locals_base
- offsets
->saved_regs
;
21540 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
21542 gcc_assert (amount
>= 0);
21546 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
21547 GEN_INT (amount
)));
21550 /* r3 is always free in the epilogue. */
21551 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
21553 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
21554 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
21558 /* Emit a USE (stack_pointer_rtx), so that
21559 the stack adjustment will not be deleted. */
21560 emit_insn (gen_prologue_use (stack_pointer_rtx
));
21562 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
21563 emit_insn (gen_blockage ());
21565 /* Emit a clobber for each insn that will be restored in the epilogue,
21566 so that flow2 will get register lifetimes correct. */
21567 for (regno
= 0; regno
< 13; regno
++)
21568 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
21569 emit_clobber (gen_rtx_REG (SImode
, regno
));
21571 if (! df_regs_ever_live_p (LR_REGNUM
))
21572 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
21575 /* Implementation of insn prologue_thumb1_interwork. This is the first
21576 "instruction" of a function called in ARM mode. Swap to thumb mode. */
21579 thumb1_output_interwork (void)
21582 FILE *f
= asm_out_file
;
21584 gcc_assert (GET_CODE (DECL_RTL (current_function_decl
)) == MEM
);
21585 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
21587 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
21589 /* Generate code sequence to switch us into Thumb mode. */
21590 /* The .code 32 directive has already been emitted by
21591 ASM_DECLARE_FUNCTION_NAME. */
21592 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
21593 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
21595 /* Generate a label, so that the debugger will notice the
21596 change in instruction sets. This label is also used by
21597 the assembler to bypass the ARM code when this function
21598 is called from a Thumb encoded function elsewhere in the
21599 same file. Hence the definition of STUB_NAME here must
21600 agree with the definition in gas/config/tc-arm.c. */
21602 #define STUB_NAME ".real_start_of"
21604 fprintf (f
, "\t.code\t16\n");
21606 if (arm_dllexport_name_p (name
))
21607 name
= arm_strip_name_encoding (name
);
21609 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
21610 fprintf (f
, "\t.thumb_func\n");
21611 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
21616 /* Handle the case of a double word load into a low register from
21617 a computed memory address. The computed address may involve a
21618 register which is overwritten by the load. */
21620 thumb_load_double_from_address (rtx
*operands
)
21628 gcc_assert (GET_CODE (operands
[0]) == REG
);
21629 gcc_assert (GET_CODE (operands
[1]) == MEM
);
21631 /* Get the memory address. */
21632 addr
= XEXP (operands
[1], 0);
21634 /* Work out how the memory address is computed. */
21635 switch (GET_CODE (addr
))
21638 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21640 if (REGNO (operands
[0]) == REGNO (addr
))
21642 output_asm_insn ("ldr\t%H0, %2", operands
);
21643 output_asm_insn ("ldr\t%0, %1", operands
);
21647 output_asm_insn ("ldr\t%0, %1", operands
);
21648 output_asm_insn ("ldr\t%H0, %2", operands
);
21653 /* Compute <address> + 4 for the high order load. */
21654 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21656 output_asm_insn ("ldr\t%0, %1", operands
);
21657 output_asm_insn ("ldr\t%H0, %2", operands
);
21661 arg1
= XEXP (addr
, 0);
21662 arg2
= XEXP (addr
, 1);
21664 if (CONSTANT_P (arg1
))
21665 base
= arg2
, offset
= arg1
;
21667 base
= arg1
, offset
= arg2
;
21669 gcc_assert (GET_CODE (base
) == REG
);
21671 /* Catch the case of <address> = <reg> + <reg> */
21672 if (GET_CODE (offset
) == REG
)
21674 int reg_offset
= REGNO (offset
);
21675 int reg_base
= REGNO (base
);
21676 int reg_dest
= REGNO (operands
[0]);
21678 /* Add the base and offset registers together into the
21679 higher destination register. */
21680 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
21681 reg_dest
+ 1, reg_base
, reg_offset
);
21683 /* Load the lower destination register from the address in
21684 the higher destination register. */
21685 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
21686 reg_dest
, reg_dest
+ 1);
21688 /* Load the higher destination register from its own address
21690 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
21691 reg_dest
+ 1, reg_dest
+ 1);
21695 /* Compute <address> + 4 for the high order load. */
21696 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21698 /* If the computed address is held in the low order register
21699 then load the high order register first, otherwise always
21700 load the low order register first. */
21701 if (REGNO (operands
[0]) == REGNO (base
))
21703 output_asm_insn ("ldr\t%H0, %2", operands
);
21704 output_asm_insn ("ldr\t%0, %1", operands
);
21708 output_asm_insn ("ldr\t%0, %1", operands
);
21709 output_asm_insn ("ldr\t%H0, %2", operands
);
21715 /* With no registers to worry about we can just load the value
21717 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21719 output_asm_insn ("ldr\t%H0, %2", operands
);
21720 output_asm_insn ("ldr\t%0, %1", operands
);
21724 gcc_unreachable ();
21731 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
21738 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21741 operands
[4] = operands
[5];
21744 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
21745 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
21749 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21752 operands
[4] = operands
[5];
21755 if (REGNO (operands
[5]) > REGNO (operands
[6]))
21758 operands
[5] = operands
[6];
21761 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21764 operands
[4] = operands
[5];
21768 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
21769 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
21773 gcc_unreachable ();
21779 /* Output a call-via instruction for thumb state. */
21781 thumb_call_via_reg (rtx reg
)
21783 int regno
= REGNO (reg
);
21786 gcc_assert (regno
< LR_REGNUM
);
21788 /* If we are in the normal text section we can use a single instance
21789 per compilation unit. If we are doing function sections, then we need
21790 an entry per section, since we can't rely on reachability. */
21791 if (in_section
== text_section
)
21793 thumb_call_reg_needed
= 1;
21795 if (thumb_call_via_label
[regno
] == NULL
)
21796 thumb_call_via_label
[regno
] = gen_label_rtx ();
21797 labelp
= thumb_call_via_label
+ regno
;
21801 if (cfun
->machine
->call_via
[regno
] == NULL
)
21802 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
21803 labelp
= cfun
->machine
->call_via
+ regno
;
21806 output_asm_insn ("bl\t%a0", labelp
);
21810 /* Routines for generating rtl. */
21812 thumb_expand_movmemqi (rtx
*operands
)
21814 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
21815 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
21816 HOST_WIDE_INT len
= INTVAL (operands
[2]);
21817 HOST_WIDE_INT offset
= 0;
21821 emit_insn (gen_movmem12b (out
, in
, out
, in
));
21827 emit_insn (gen_movmem8b (out
, in
, out
, in
));
21833 rtx reg
= gen_reg_rtx (SImode
);
21834 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
21835 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
21842 rtx reg
= gen_reg_rtx (HImode
);
21843 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
21844 plus_constant (in
, offset
))));
21845 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (out
, offset
)),
21853 rtx reg
= gen_reg_rtx (QImode
);
21854 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
21855 plus_constant (in
, offset
))));
21856 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (out
, offset
)),
21862 thumb_reload_out_hi (rtx
*operands
)
21864 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
21867 /* Handle reading a half-word from memory during reload. */
21869 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
21871 gcc_unreachable ();
21874 /* Return the length of a function name prefix
21875 that starts with the character 'c'. */
21877 arm_get_strip_length (int c
)
21881 ARM_NAME_ENCODING_LENGTHS
21886 /* Return a pointer to a function's name with any
21887 and all prefix encodings stripped from it. */
21889 arm_strip_name_encoding (const char *name
)
21893 while ((skip
= arm_get_strip_length (* name
)))
21899 /* If there is a '*' anywhere in the name's prefix, then
21900 emit the stripped name verbatim, otherwise prepend an
21901 underscore if leading underscores are being used. */
21903 arm_asm_output_labelref (FILE *stream
, const char *name
)
21908 while ((skip
= arm_get_strip_length (* name
)))
21910 verbatim
|= (*name
== '*');
21915 fputs (name
, stream
);
21917 asm_fprintf (stream
, "%U%s", name
);
21921 arm_file_start (void)
21925 if (TARGET_UNIFIED_ASM
)
21926 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
21930 const char *fpu_name
;
21931 if (arm_selected_arch
)
21932 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
21934 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_selected_cpu
->name
);
21936 if (TARGET_SOFT_FLOAT
)
21939 fpu_name
= "softvfp";
21941 fpu_name
= "softfpa";
21945 fpu_name
= arm_fpu_desc
->name
;
21946 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
21948 if (TARGET_HARD_FLOAT
)
21949 asm_fprintf (asm_out_file
, "\t.eabi_attribute 27, 3\n");
21950 if (TARGET_HARD_FLOAT_ABI
)
21951 asm_fprintf (asm_out_file
, "\t.eabi_attribute 28, 1\n");
21954 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
21956 /* Some of these attributes only apply when the corresponding features
21957 are used. However we don't have any easy way of figuring this out.
21958 Conservatively record the setting that would have been used. */
21960 /* Tag_ABI_FP_rounding. */
21961 if (flag_rounding_math
)
21962 asm_fprintf (asm_out_file
, "\t.eabi_attribute 19, 1\n");
21963 if (!flag_unsafe_math_optimizations
)
21965 /* Tag_ABI_FP_denomal. */
21966 asm_fprintf (asm_out_file
, "\t.eabi_attribute 20, 1\n");
21967 /* Tag_ABI_FP_exceptions. */
21968 asm_fprintf (asm_out_file
, "\t.eabi_attribute 21, 1\n");
21970 /* Tag_ABI_FP_user_exceptions. */
21971 if (flag_signaling_nans
)
21972 asm_fprintf (asm_out_file
, "\t.eabi_attribute 22, 1\n");
21973 /* Tag_ABI_FP_number_model. */
21974 asm_fprintf (asm_out_file
, "\t.eabi_attribute 23, %d\n",
21975 flag_finite_math_only
? 1 : 3);
21977 /* Tag_ABI_align8_needed. */
21978 asm_fprintf (asm_out_file
, "\t.eabi_attribute 24, 1\n");
21979 /* Tag_ABI_align8_preserved. */
21980 asm_fprintf (asm_out_file
, "\t.eabi_attribute 25, 1\n");
21981 /* Tag_ABI_enum_size. */
21982 asm_fprintf (asm_out_file
, "\t.eabi_attribute 26, %d\n",
21983 flag_short_enums
? 1 : 2);
21985 /* Tag_ABI_optimization_goals. */
21988 else if (optimize
>= 2)
21994 asm_fprintf (asm_out_file
, "\t.eabi_attribute 30, %d\n", val
);
21996 /* Tag_ABI_FP_16bit_format. */
21997 if (arm_fp16_format
)
21998 asm_fprintf (asm_out_file
, "\t.eabi_attribute 38, %d\n",
21999 (int)arm_fp16_format
);
22001 if (arm_lang_output_object_attributes_hook
)
22002 arm_lang_output_object_attributes_hook();
22004 default_file_start();
22008 arm_file_end (void)
22012 if (NEED_INDICATE_EXEC_STACK
)
22013 /* Add .note.GNU-stack. */
22014 file_end_indicate_exec_stack ();
22016 if (! thumb_call_reg_needed
)
22019 switch_to_section (text_section
);
22020 asm_fprintf (asm_out_file
, "\t.code 16\n");
22021 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
22023 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
22025 rtx label
= thumb_call_via_label
[regno
];
22029 targetm
.asm_out
.internal_label (asm_out_file
, "L",
22030 CODE_LABEL_NUMBER (label
));
22031 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
22037 /* Symbols in the text segment can be accessed without indirecting via the
22038 constant pool; it may take an extra binary operation, but this is still
22039 faster than indirecting via memory. Don't do this when not optimizing,
22040 since we won't be calculating al of the offsets necessary to do this
22044 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
22046 if (optimize
> 0 && TREE_CONSTANT (decl
))
22047 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
22049 default_encode_section_info (decl
, rtl
, first
);
22051 #endif /* !ARM_PE */
22054 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
22056 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
22057 && !strcmp (prefix
, "L"))
22059 arm_ccfsm_state
= 0;
22060 arm_target_insn
= NULL
;
22062 default_internal_label (stream
, prefix
, labelno
);
22065 /* Output code to add DELTA to the first argument, and then jump
22066 to FUNCTION. Used for C++ multiple inheritance. */
22068 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
22069 HOST_WIDE_INT delta
,
22070 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
22073 static int thunk_label
= 0;
22076 int mi_delta
= delta
;
22077 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
22079 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
22082 mi_delta
= - mi_delta
;
22086 int labelno
= thunk_label
++;
22087 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
22088 /* Thunks are entered in arm mode when avaiable. */
22089 if (TARGET_THUMB1_ONLY
)
22091 /* push r3 so we can use it as a temporary. */
22092 /* TODO: Omit this save if r3 is not used. */
22093 fputs ("\tpush {r3}\n", file
);
22094 fputs ("\tldr\tr3, ", file
);
22098 fputs ("\tldr\tr12, ", file
);
22100 assemble_name (file
, label
);
22101 fputc ('\n', file
);
22104 /* If we are generating PIC, the ldr instruction below loads
22105 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
22106 the address of the add + 8, so we have:
22108 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
22111 Note that we have "+ 1" because some versions of GNU ld
22112 don't set the low bit of the result for R_ARM_REL32
22113 relocations against thumb function symbols.
22114 On ARMv6M this is +4, not +8. */
22115 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
22116 assemble_name (file
, labelpc
);
22117 fputs (":\n", file
);
22118 if (TARGET_THUMB1_ONLY
)
22120 /* This is 2 insns after the start of the thunk, so we know it
22121 is 4-byte aligned. */
22122 fputs ("\tadd\tr3, pc, r3\n", file
);
22123 fputs ("\tmov r12, r3\n", file
);
22126 fputs ("\tadd\tr12, pc, r12\n", file
);
22128 else if (TARGET_THUMB1_ONLY
)
22129 fputs ("\tmov r12, r3\n", file
);
22131 if (TARGET_THUMB1_ONLY
)
22133 if (mi_delta
> 255)
22135 fputs ("\tldr\tr3, ", file
);
22136 assemble_name (file
, label
);
22137 fputs ("+4\n", file
);
22138 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
22139 mi_op
, this_regno
, this_regno
);
22141 else if (mi_delta
!= 0)
22143 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
22144 mi_op
, this_regno
, this_regno
,
22150 /* TODO: Use movw/movt for large constants when available. */
22151 while (mi_delta
!= 0)
22153 if ((mi_delta
& (3 << shift
)) == 0)
22157 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
22158 mi_op
, this_regno
, this_regno
,
22159 mi_delta
& (0xff << shift
));
22160 mi_delta
&= ~(0xff << shift
);
22167 if (TARGET_THUMB1_ONLY
)
22168 fputs ("\tpop\t{r3}\n", file
);
22170 fprintf (file
, "\tbx\tr12\n");
22171 ASM_OUTPUT_ALIGN (file
, 2);
22172 assemble_name (file
, label
);
22173 fputs (":\n", file
);
22176 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
22177 rtx tem
= XEXP (DECL_RTL (function
), 0);
22178 tem
= gen_rtx_PLUS (GET_MODE (tem
), tem
, GEN_INT (-7));
22179 tem
= gen_rtx_MINUS (GET_MODE (tem
),
22181 gen_rtx_SYMBOL_REF (Pmode
,
22182 ggc_strdup (labelpc
)));
22183 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
22186 /* Output ".word .LTHUNKn". */
22187 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
22189 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
22190 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
22194 fputs ("\tb\t", file
);
22195 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
22196 if (NEED_PLT_RELOC
)
22197 fputs ("(PLT)", file
);
22198 fputc ('\n', file
);
22203 arm_emit_vector_const (FILE *file
, rtx x
)
22206 const char * pattern
;
22208 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
22210 switch (GET_MODE (x
))
22212 case V2SImode
: pattern
= "%08x"; break;
22213 case V4HImode
: pattern
= "%04x"; break;
22214 case V8QImode
: pattern
= "%02x"; break;
22215 default: gcc_unreachable ();
22218 fprintf (file
, "0x");
22219 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
22223 element
= CONST_VECTOR_ELT (x
, i
);
22224 fprintf (file
, pattern
, INTVAL (element
));
22230 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
22231 HFmode constant pool entries are actually loaded with ldr. */
22233 arm_emit_fp16_const (rtx c
)
22238 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
22239 bits
= real_to_target (NULL
, &r
, HFmode
);
22240 if (WORDS_BIG_ENDIAN
)
22241 assemble_zeros (2);
22242 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
22243 if (!WORDS_BIG_ENDIAN
)
22244 assemble_zeros (2);
22248 arm_output_load_gr (rtx
*operands
)
22255 if (GET_CODE (operands
[1]) != MEM
22256 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
22257 || GET_CODE (reg
= XEXP (sum
, 0)) != REG
22258 || GET_CODE (offset
= XEXP (sum
, 1)) != CONST_INT
22259 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
22260 return "wldrw%?\t%0, %1";
22262 /* Fix up an out-of-range load of a GR register. */
22263 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
22264 wcgr
= operands
[0];
22266 output_asm_insn ("ldr%?\t%0, %1", operands
);
22268 operands
[0] = wcgr
;
22270 output_asm_insn ("tmcr%?\t%0, %1", operands
);
22271 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
22276 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
22278 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
22279 named arg and all anonymous args onto the stack.
22280 XXX I know the prologue shouldn't be pushing registers, but it is faster
22284 arm_setup_incoming_varargs (cumulative_args_t pcum_v
,
22285 enum machine_mode mode
,
22288 int second_time ATTRIBUTE_UNUSED
)
22290 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
22293 cfun
->machine
->uses_anonymous_args
= 1;
22294 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
22296 nregs
= pcum
->aapcs_ncrn
;
22297 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
22301 nregs
= pcum
->nregs
;
22303 if (nregs
< NUM_ARG_REGS
)
22304 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
22307 /* Return nonzero if the CONSUMER instruction (a store) does not need
22308 PRODUCER's value to calculate the address. */
22311 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
22313 rtx value
= PATTERN (producer
);
22314 rtx addr
= PATTERN (consumer
);
22316 if (GET_CODE (value
) == COND_EXEC
)
22317 value
= COND_EXEC_CODE (value
);
22318 if (GET_CODE (value
) == PARALLEL
)
22319 value
= XVECEXP (value
, 0, 0);
22320 value
= XEXP (value
, 0);
22321 if (GET_CODE (addr
) == COND_EXEC
)
22322 addr
= COND_EXEC_CODE (addr
);
22323 if (GET_CODE (addr
) == PARALLEL
)
22324 addr
= XVECEXP (addr
, 0, 0);
22325 addr
= XEXP (addr
, 0);
22327 return !reg_overlap_mentioned_p (value
, addr
);
22330 /* Return nonzero if the CONSUMER instruction (a store) does need
22331 PRODUCER's value to calculate the address. */
22334 arm_early_store_addr_dep (rtx producer
, rtx consumer
)
22336 return !arm_no_early_store_addr_dep (producer
, consumer
);
22339 /* Return nonzero if the CONSUMER instruction (a load) does need
22340 PRODUCER's value to calculate the address. */
22343 arm_early_load_addr_dep (rtx producer
, rtx consumer
)
22345 rtx value
= PATTERN (producer
);
22346 rtx addr
= PATTERN (consumer
);
22348 if (GET_CODE (value
) == COND_EXEC
)
22349 value
= COND_EXEC_CODE (value
);
22350 if (GET_CODE (value
) == PARALLEL
)
22351 value
= XVECEXP (value
, 0, 0);
22352 value
= XEXP (value
, 0);
22353 if (GET_CODE (addr
) == COND_EXEC
)
22354 addr
= COND_EXEC_CODE (addr
);
22355 if (GET_CODE (addr
) == PARALLEL
)
22356 addr
= XVECEXP (addr
, 0, 0);
22357 addr
= XEXP (addr
, 1);
22359 return reg_overlap_mentioned_p (value
, addr
);
22362 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22363 have an early register shift value or amount dependency on the
22364 result of PRODUCER. */
22367 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
22369 rtx value
= PATTERN (producer
);
22370 rtx op
= PATTERN (consumer
);
22373 if (GET_CODE (value
) == COND_EXEC
)
22374 value
= COND_EXEC_CODE (value
);
22375 if (GET_CODE (value
) == PARALLEL
)
22376 value
= XVECEXP (value
, 0, 0);
22377 value
= XEXP (value
, 0);
22378 if (GET_CODE (op
) == COND_EXEC
)
22379 op
= COND_EXEC_CODE (op
);
22380 if (GET_CODE (op
) == PARALLEL
)
22381 op
= XVECEXP (op
, 0, 0);
22384 early_op
= XEXP (op
, 0);
22385 /* This is either an actual independent shift, or a shift applied to
22386 the first operand of another operation. We want the whole shift
22388 if (GET_CODE (early_op
) == REG
)
22391 return !reg_overlap_mentioned_p (value
, early_op
);
22394 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
22395 have an early register shift value dependency on the result of
22399 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
22401 rtx value
= PATTERN (producer
);
22402 rtx op
= PATTERN (consumer
);
22405 if (GET_CODE (value
) == COND_EXEC
)
22406 value
= COND_EXEC_CODE (value
);
22407 if (GET_CODE (value
) == PARALLEL
)
22408 value
= XVECEXP (value
, 0, 0);
22409 value
= XEXP (value
, 0);
22410 if (GET_CODE (op
) == COND_EXEC
)
22411 op
= COND_EXEC_CODE (op
);
22412 if (GET_CODE (op
) == PARALLEL
)
22413 op
= XVECEXP (op
, 0, 0);
22416 early_op
= XEXP (op
, 0);
22418 /* This is either an actual independent shift, or a shift applied to
22419 the first operand of another operation. We want the value being
22420 shifted, in either case. */
22421 if (GET_CODE (early_op
) != REG
)
22422 early_op
= XEXP (early_op
, 0);
22424 return !reg_overlap_mentioned_p (value
, early_op
);
22427 /* Return nonzero if the CONSUMER (a mul or mac op) does not
22428 have an early register mult dependency on the result of
22432 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
22434 rtx value
= PATTERN (producer
);
22435 rtx op
= PATTERN (consumer
);
22437 if (GET_CODE (value
) == COND_EXEC
)
22438 value
= COND_EXEC_CODE (value
);
22439 if (GET_CODE (value
) == PARALLEL
)
22440 value
= XVECEXP (value
, 0, 0);
22441 value
= XEXP (value
, 0);
22442 if (GET_CODE (op
) == COND_EXEC
)
22443 op
= COND_EXEC_CODE (op
);
22444 if (GET_CODE (op
) == PARALLEL
)
22445 op
= XVECEXP (op
, 0, 0);
22448 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
22450 if (GET_CODE (XEXP (op
, 0)) == MULT
)
22451 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
22453 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
22459 /* We can't rely on the caller doing the proper promotion when
22460 using APCS or ATPCS. */
22463 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
22465 return !TARGET_AAPCS_BASED
;
22468 static enum machine_mode
22469 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
22470 enum machine_mode mode
,
22471 int *punsignedp ATTRIBUTE_UNUSED
,
22472 const_tree fntype ATTRIBUTE_UNUSED
,
22473 int for_return ATTRIBUTE_UNUSED
)
22475 if (GET_MODE_CLASS (mode
) == MODE_INT
22476 && GET_MODE_SIZE (mode
) < 4)
22482 /* AAPCS based ABIs use short enums by default. */
22485 arm_default_short_enums (void)
22487 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
22491 /* AAPCS requires that anonymous bitfields affect structure alignment. */
22494 arm_align_anon_bitfield (void)
22496 return TARGET_AAPCS_BASED
;
22500 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
22503 arm_cxx_guard_type (void)
22505 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
22508 /* Return non-zero if the consumer (a multiply-accumulate instruction)
22509 has an accumulator dependency on the result of the producer (a
22510 multiplication instruction) and no other dependency on that result. */
22512 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
22514 rtx mul
= PATTERN (producer
);
22515 rtx mac
= PATTERN (consumer
);
22517 rtx mac_op0
, mac_op1
, mac_acc
;
22519 if (GET_CODE (mul
) == COND_EXEC
)
22520 mul
= COND_EXEC_CODE (mul
);
22521 if (GET_CODE (mac
) == COND_EXEC
)
22522 mac
= COND_EXEC_CODE (mac
);
22524 /* Check that mul is of the form (set (...) (mult ...))
22525 and mla is of the form (set (...) (plus (mult ...) (...))). */
22526 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
22527 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
22528 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
22531 mul_result
= XEXP (mul
, 0);
22532 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
22533 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
22534 mac_acc
= XEXP (XEXP (mac
, 1), 1);
22536 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
22537 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
22538 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));
22542 /* The EABI says test the least significant bit of a guard variable. */
22545 arm_cxx_guard_mask_bit (void)
22547 return TARGET_AAPCS_BASED
;
22551 /* The EABI specifies that all array cookies are 8 bytes long. */
22554 arm_get_cookie_size (tree type
)
22558 if (!TARGET_AAPCS_BASED
)
22559 return default_cxx_get_cookie_size (type
);
22561 size
= build_int_cst (sizetype
, 8);
22566 /* The EABI says that array cookies should also contain the element size. */
22569 arm_cookie_has_size (void)
22571 return TARGET_AAPCS_BASED
;
22575 /* The EABI says constructors and destructors should return a pointer to
22576 the object constructed/destroyed. */
22579 arm_cxx_cdtor_returns_this (void)
22581 return TARGET_AAPCS_BASED
;
22584 /* The EABI says that an inline function may never be the key
22588 arm_cxx_key_method_may_be_inline (void)
22590 return !TARGET_AAPCS_BASED
;
22594 arm_cxx_determine_class_data_visibility (tree decl
)
22596 if (!TARGET_AAPCS_BASED
22597 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
22600 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22601 is exported. However, on systems without dynamic vague linkage,
22602 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22603 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
22604 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
22606 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
22607 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
22611 arm_cxx_class_data_always_comdat (void)
22613 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22614 vague linkage if the class has no key function. */
22615 return !TARGET_AAPCS_BASED
;
22619 /* The EABI says __aeabi_atexit should be used to register static
22623 arm_cxx_use_aeabi_atexit (void)
22625 return TARGET_AAPCS_BASED
;
22630 arm_set_return_address (rtx source
, rtx scratch
)
22632 arm_stack_offsets
*offsets
;
22633 HOST_WIDE_INT delta
;
22635 unsigned long saved_regs
;
22637 offsets
= arm_get_frame_offsets ();
22638 saved_regs
= offsets
->saved_regs_mask
;
22640 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
22641 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
22644 if (frame_pointer_needed
)
22645 addr
= plus_constant(hard_frame_pointer_rtx
, -4);
22648 /* LR will be the first saved register. */
22649 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
22654 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
22655 GEN_INT (delta
& ~4095)));
22660 addr
= stack_pointer_rtx
;
22662 addr
= plus_constant (addr
, delta
);
22664 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
22670 thumb_set_return_address (rtx source
, rtx scratch
)
22672 arm_stack_offsets
*offsets
;
22673 HOST_WIDE_INT delta
;
22674 HOST_WIDE_INT limit
;
22677 unsigned long mask
;
22681 offsets
= arm_get_frame_offsets ();
22682 mask
= offsets
->saved_regs_mask
;
22683 if (mask
& (1 << LR_REGNUM
))
22686 /* Find the saved regs. */
22687 if (frame_pointer_needed
)
22689 delta
= offsets
->soft_frame
- offsets
->saved_args
;
22690 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
22696 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
22699 /* Allow for the stack frame. */
22700 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
22702 /* The link register is always the first saved register. */
22705 /* Construct the address. */
22706 addr
= gen_rtx_REG (SImode
, reg
);
22709 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
22710 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
22714 addr
= plus_constant (addr
, delta
);
22716 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
22719 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
22722 /* Implements target hook vector_mode_supported_p. */
22724 arm_vector_mode_supported_p (enum machine_mode mode
)
22726 /* Neon also supports V2SImode, etc. listed in the clause below. */
22727 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
22728 || mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
22731 if ((TARGET_NEON
|| TARGET_IWMMXT
)
22732 && ((mode
== V2SImode
)
22733 || (mode
== V4HImode
)
22734 || (mode
== V8QImode
)))
22737 if (TARGET_INT_SIMD
&& (mode
== V4UQQmode
|| mode
== V4QQmode
22738 || mode
== V2UHQmode
|| mode
== V2HQmode
|| mode
== V2UHAmode
22739 || mode
== V2HAmode
))
22745 /* Implements target hook array_mode_supported_p. */
22748 arm_array_mode_supported_p (enum machine_mode mode
,
22749 unsigned HOST_WIDE_INT nelems
)
22752 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
))
22753 && (nelems
>= 2 && nelems
<= 4))
22759 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22760 registers when autovectorizing for Neon, at least until multiple vector
22761 widths are supported properly by the middle-end. */
22763 static enum machine_mode
22764 arm_preferred_simd_mode (enum machine_mode mode
)
22770 return TARGET_NEON_VECTORIZE_QUAD
? V4SFmode
: V2SFmode
;
22772 return TARGET_NEON_VECTORIZE_QUAD
? V4SImode
: V2SImode
;
22774 return TARGET_NEON_VECTORIZE_QUAD
? V8HImode
: V4HImode
;
22776 return TARGET_NEON_VECTORIZE_QUAD
? V16QImode
: V8QImode
;
22778 if (TARGET_NEON_VECTORIZE_QUAD
)
22785 if (TARGET_REALLY_IWMMXT
)
22801 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22803 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
22804 using r0-r4 for function arguments, r7 for the stack frame and don't have
22805 enough left over to do doubleword arithmetic. For Thumb-2 all the
22806 potentially problematic instructions accept high registers so this is not
22807 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
22808 that require many low registers. */
22810 arm_class_likely_spilled_p (reg_class_t rclass
)
22812 if ((TARGET_THUMB1
&& rclass
== LO_REGS
)
22813 || rclass
== CC_REG
)
22819 /* Implements target hook small_register_classes_for_mode_p. */
22821 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
22823 return TARGET_THUMB1
;
22826 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22827 ARM insns and therefore guarantee that the shift count is modulo 256.
22828 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22829 guarantee no particular behavior for out-of-range counts. */
22831 static unsigned HOST_WIDE_INT
22832 arm_shift_truncation_mask (enum machine_mode mode
)
22834 return mode
== SImode
? 255 : 0;
22838 /* Map internal gcc register numbers to DWARF2 register numbers. */
22841 arm_dbx_register_number (unsigned int regno
)
22846 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22847 compatibility. The EABI defines them as registers 96-103. */
22848 if (IS_FPA_REGNUM (regno
))
22849 return (TARGET_AAPCS_BASED
? 96 : 16) + regno
- FIRST_FPA_REGNUM
;
22851 if (IS_VFP_REGNUM (regno
))
22853 /* See comment in arm_dwarf_register_span. */
22854 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
22855 return 64 + regno
- FIRST_VFP_REGNUM
;
22857 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
22860 if (IS_IWMMXT_GR_REGNUM (regno
))
22861 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
22863 if (IS_IWMMXT_REGNUM (regno
))
22864 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
22866 gcc_unreachable ();
22869 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22870 GCC models tham as 64 32-bit registers, so we need to describe this to
22871 the DWARF generation code. Other registers can use the default. */
22873 arm_dwarf_register_span (rtx rtl
)
22880 regno
= REGNO (rtl
);
22881 if (!IS_VFP_REGNUM (regno
))
22884 /* XXX FIXME: The EABI defines two VFP register ranges:
22885 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22887 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22888 corresponding D register. Until GDB supports this, we shall use the
22889 legacy encodings. We also use these encodings for D0-D15 for
22890 compatibility with older debuggers. */
22891 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
22894 nregs
= GET_MODE_SIZE (GET_MODE (rtl
)) / 8;
22895 p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nregs
));
22896 regno
= (regno
- FIRST_VFP_REGNUM
) / 2;
22897 for (i
= 0; i
< nregs
; i
++)
22898 XVECEXP (p
, 0, i
) = gen_rtx_REG (DImode
, 256 + regno
+ i
);
22903 #if ARM_UNWIND_INFO
22904 /* Emit unwind directives for a store-multiple instruction or stack pointer
22905 push during alignment.
22906 These should only ever be generated by the function prologue code, so
22907 expect them to have a particular form. */
22910 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
22913 HOST_WIDE_INT offset
;
22914 HOST_WIDE_INT nregs
;
22920 e
= XVECEXP (p
, 0, 0);
22921 if (GET_CODE (e
) != SET
)
22924 /* First insn will adjust the stack pointer. */
22925 if (GET_CODE (e
) != SET
22926 || GET_CODE (XEXP (e
, 0)) != REG
22927 || REGNO (XEXP (e
, 0)) != SP_REGNUM
22928 || GET_CODE (XEXP (e
, 1)) != PLUS
)
22931 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
22932 nregs
= XVECLEN (p
, 0) - 1;
22934 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
22937 /* The function prologue may also push pc, but not annotate it as it is
22938 never restored. We turn this into a stack pointer adjustment. */
22939 if (nregs
* 4 == offset
- 4)
22941 fprintf (asm_out_file
, "\t.pad #4\n");
22945 fprintf (asm_out_file
, "\t.save {");
22947 else if (IS_VFP_REGNUM (reg
))
22950 fprintf (asm_out_file
, "\t.vsave {");
22952 else if (reg
>= FIRST_FPA_REGNUM
&& reg
<= LAST_FPA_REGNUM
)
22954 /* FPA registers are done differently. */
22955 asm_fprintf (asm_out_file
, "\t.save %r, %wd\n", reg
, nregs
);
22959 /* Unknown register type. */
22962 /* If the stack increment doesn't match the size of the saved registers,
22963 something has gone horribly wrong. */
22964 if (offset
!= nregs
* reg_size
)
22969 /* The remaining insns will describe the stores. */
22970 for (i
= 1; i
<= nregs
; i
++)
22972 /* Expect (set (mem <addr>) (reg)).
22973 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22974 e
= XVECEXP (p
, 0, i
);
22975 if (GET_CODE (e
) != SET
22976 || GET_CODE (XEXP (e
, 0)) != MEM
22977 || GET_CODE (XEXP (e
, 1)) != REG
)
22980 reg
= REGNO (XEXP (e
, 1));
22985 fprintf (asm_out_file
, ", ");
22986 /* We can't use %r for vfp because we need to use the
22987 double precision register names. */
22988 if (IS_VFP_REGNUM (reg
))
22989 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
22991 asm_fprintf (asm_out_file
, "%r", reg
);
22993 #ifdef ENABLE_CHECKING
22994 /* Check that the addresses are consecutive. */
22995 e
= XEXP (XEXP (e
, 0), 0);
22996 if (GET_CODE (e
) == PLUS
)
22998 offset
+= reg_size
;
22999 if (GET_CODE (XEXP (e
, 0)) != REG
23000 || REGNO (XEXP (e
, 0)) != SP_REGNUM
23001 || GET_CODE (XEXP (e
, 1)) != CONST_INT
23002 || offset
!= INTVAL (XEXP (e
, 1)))
23006 || GET_CODE (e
) != REG
23007 || REGNO (e
) != SP_REGNUM
)
23011 fprintf (asm_out_file
, "}\n");
23014 /* Emit unwind directives for a SET. */
23017 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
23025 switch (GET_CODE (e0
))
23028 /* Pushing a single register. */
23029 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
23030 || GET_CODE (XEXP (XEXP (e0
, 0), 0)) != REG
23031 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
23034 asm_fprintf (asm_out_file
, "\t.save ");
23035 if (IS_VFP_REGNUM (REGNO (e1
)))
23036 asm_fprintf(asm_out_file
, "{d%d}\n",
23037 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
23039 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
23043 if (REGNO (e0
) == SP_REGNUM
)
23045 /* A stack increment. */
23046 if (GET_CODE (e1
) != PLUS
23047 || GET_CODE (XEXP (e1
, 0)) != REG
23048 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
23049 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
23052 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
23053 -INTVAL (XEXP (e1
, 1)));
23055 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
23057 HOST_WIDE_INT offset
;
23059 if (GET_CODE (e1
) == PLUS
)
23061 if (GET_CODE (XEXP (e1
, 0)) != REG
23062 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
23064 reg
= REGNO (XEXP (e1
, 0));
23065 offset
= INTVAL (XEXP (e1
, 1));
23066 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
23067 HARD_FRAME_POINTER_REGNUM
, reg
,
23070 else if (GET_CODE (e1
) == REG
)
23073 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
23074 HARD_FRAME_POINTER_REGNUM
, reg
);
23079 else if (GET_CODE (e1
) == REG
&& REGNO (e1
) == SP_REGNUM
)
23081 /* Move from sp to reg. */
23082 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
23084 else if (GET_CODE (e1
) == PLUS
23085 && GET_CODE (XEXP (e1
, 0)) == REG
23086 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
23087 && GET_CODE (XEXP (e1
, 1)) == CONST_INT
)
23089 /* Set reg to offset from sp. */
23090 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
23091 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
23103 /* Emit unwind directives for the given insn. */
23106 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
23109 bool handled_one
= false;
23111 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
23114 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
23115 && (TREE_NOTHROW (current_function_decl
)
23116 || crtl
->all_throwers_are_sibcalls
))
23119 if (NOTE_P (insn
) || !RTX_FRAME_RELATED_P (insn
))
23122 for (note
= REG_NOTES (insn
); note
; note
= XEXP (note
, 1))
23124 pat
= XEXP (note
, 0);
23125 switch (REG_NOTE_KIND (note
))
23127 case REG_FRAME_RELATED_EXPR
:
23130 case REG_CFA_REGISTER
:
23133 pat
= PATTERN (insn
);
23134 if (GET_CODE (pat
) == PARALLEL
)
23135 pat
= XVECEXP (pat
, 0, 0);
23138 /* Only emitted for IS_STACKALIGN re-alignment. */
23143 src
= SET_SRC (pat
);
23144 dest
= SET_DEST (pat
);
23146 gcc_assert (src
== stack_pointer_rtx
);
23147 reg
= REGNO (dest
);
23148 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
23151 handled_one
= true;
23154 case REG_CFA_DEF_CFA
:
23155 case REG_CFA_EXPRESSION
:
23156 case REG_CFA_ADJUST_CFA
:
23157 case REG_CFA_OFFSET
:
23158 /* ??? Only handling here what we actually emit. */
23159 gcc_unreachable ();
23167 pat
= PATTERN (insn
);
23170 switch (GET_CODE (pat
))
23173 arm_unwind_emit_set (asm_out_file
, pat
);
23177 /* Store multiple. */
23178 arm_unwind_emit_sequence (asm_out_file
, pat
);
23187 /* Output a reference from a function exception table to the type_info
23188 object X. The EABI specifies that the symbol should be relocated by
23189 an R_ARM_TARGET2 relocation. */
23192 arm_output_ttype (rtx x
)
23194 fputs ("\t.word\t", asm_out_file
);
23195 output_addr_const (asm_out_file
, x
);
23196 /* Use special relocations for symbol references. */
23197 if (GET_CODE (x
) != CONST_INT
)
23198 fputs ("(TARGET2)", asm_out_file
);
23199 fputc ('\n', asm_out_file
);
23204 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
23207 arm_asm_emit_except_personality (rtx personality
)
23209 fputs ("\t.personality\t", asm_out_file
);
23210 output_addr_const (asm_out_file
, personality
);
23211 fputc ('\n', asm_out_file
);
23214 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
23217 arm_asm_init_sections (void)
23219 exception_section
= get_unnamed_section (0, output_section_asm_op
,
23222 #endif /* ARM_UNWIND_INFO */
23224 /* Output unwind directives for the start/end of a function. */
23227 arm_output_fn_unwind (FILE * f
, bool prologue
)
23229 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
23233 fputs ("\t.fnstart\n", f
);
23236 /* If this function will never be unwound, then mark it as such.
23237 The came condition is used in arm_unwind_emit to suppress
23238 the frame annotations. */
23239 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
23240 && (TREE_NOTHROW (current_function_decl
)
23241 || crtl
->all_throwers_are_sibcalls
))
23242 fputs("\t.cantunwind\n", f
);
23244 fputs ("\t.fnend\n", f
);
23249 arm_emit_tls_decoration (FILE *fp
, rtx x
)
23251 enum tls_reloc reloc
;
23254 val
= XVECEXP (x
, 0, 0);
23255 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
23257 output_addr_const (fp
, val
);
23262 fputs ("(tlsgd)", fp
);
23265 fputs ("(tlsldm)", fp
);
23268 fputs ("(tlsldo)", fp
);
23271 fputs ("(gottpoff)", fp
);
23274 fputs ("(tpoff)", fp
);
23277 fputs ("(tlsdesc)", fp
);
23280 gcc_unreachable ();
23289 fputs (" + (. - ", fp
);
23290 output_addr_const (fp
, XVECEXP (x
, 0, 2));
23291 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
23292 fputs (reloc
== TLS_DESCSEQ
? " + " : " - ", fp
);
23293 output_addr_const (fp
, XVECEXP (x
, 0, 3));
23303 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
23306 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
23308 gcc_assert (size
== 4);
23309 fputs ("\t.word\t", file
);
23310 output_addr_const (file
, x
);
23311 fputs ("(tlsldo)", file
);
23314 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
23317 arm_output_addr_const_extra (FILE *fp
, rtx x
)
23319 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
23320 return arm_emit_tls_decoration (fp
, x
);
23321 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
23324 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
23326 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
23327 assemble_name_raw (fp
, label
);
23331 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
23333 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
23337 output_addr_const (fp
, XVECEXP (x
, 0, 0));
23341 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
23343 output_addr_const (fp
, XVECEXP (x
, 0, 0));
23347 output_addr_const (fp
, XVECEXP (x
, 0, 1));
23351 else if (GET_CODE (x
) == CONST_VECTOR
)
23352 return arm_emit_vector_const (fp
, x
);
23357 /* Output assembly for a shift instruction.
23358 SET_FLAGS determines how the instruction modifies the condition codes.
23359 0 - Do not set condition codes.
23360 1 - Set condition codes.
23361 2 - Use smallest instruction. */
23363 arm_output_shift(rtx
* operands
, int set_flags
)
23366 static const char flag_chars
[3] = {'?', '.', '!'};
23371 c
= flag_chars
[set_flags
];
23372 if (TARGET_UNIFIED_ASM
)
23374 shift
= shift_op(operands
[3], &val
);
23378 operands
[2] = GEN_INT(val
);
23379 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
23382 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
23385 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
23386 output_asm_insn (pattern
, operands
);
23390 /* Output a Thumb-1 casesi dispatch sequence. */
23392 thumb1_output_casesi (rtx
*operands
)
23394 rtx diff_vec
= PATTERN (next_real_insn (operands
[0]));
23396 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
23398 switch (GET_MODE(diff_vec
))
23401 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
23402 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
23404 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
23405 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
23407 return "bl\t%___gnu_thumb1_case_si";
23409 gcc_unreachable ();
23413 /* Output a Thumb-2 casesi instruction. */
23415 thumb2_output_casesi (rtx
*operands
)
23417 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
23419 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
23421 output_asm_insn ("cmp\t%0, %1", operands
);
23422 output_asm_insn ("bhi\t%l3", operands
);
23423 switch (GET_MODE(diff_vec
))
23426 return "tbb\t[%|pc, %0]";
23428 return "tbh\t[%|pc, %0, lsl #1]";
23432 output_asm_insn ("adr\t%4, %l2", operands
);
23433 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
23434 output_asm_insn ("add\t%4, %4, %5", operands
);
23439 output_asm_insn ("adr\t%4, %l2", operands
);
23440 return "ldr\t%|pc, [%4, %0, lsl #2]";
23443 gcc_unreachable ();
23447 /* Most ARM cores are single issue, but some newer ones can dual issue.
23448 The scheduler descriptions rely on this being correct. */
23450 arm_issue_rate (void)
23468 /* A table and a function to perform ARM-specific name mangling for
23469 NEON vector types in order to conform to the AAPCS (see "Procedure
23470 Call Standard for the ARM Architecture", Appendix A). To qualify
23471 for emission with the mangled names defined in that document, a
23472 vector type must not only be of the correct mode but also be
23473 composed of NEON vector element types (e.g. __builtin_neon_qi). */
23476 enum machine_mode mode
;
23477 const char *element_type_name
;
23478 const char *aapcs_name
;
23479 } arm_mangle_map_entry
;
23481 static arm_mangle_map_entry arm_mangle_map
[] = {
23482 /* 64-bit containerized types. */
23483 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
23484 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
23485 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
23486 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
23487 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
23488 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
23489 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
23490 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
23491 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
23492 /* 128-bit containerized types. */
23493 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
23494 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
23495 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
23496 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
23497 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
23498 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
23499 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
23500 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
23501 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
23502 { VOIDmode
, NULL
, NULL
}
23506 arm_mangle_type (const_tree type
)
23508 arm_mangle_map_entry
*pos
= arm_mangle_map
;
23510 /* The ARM ABI documents (10th October 2008) say that "__va_list"
23511 has to be managled as if it is in the "std" namespace. */
23512 if (TARGET_AAPCS_BASED
23513 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
23515 static bool warned
;
23516 if (!warned
&& warn_psabi
&& !in_system_header
)
23519 inform (input_location
,
23520 "the mangling of %<va_list%> has changed in GCC 4.4");
23522 return "St9__va_list";
23525 /* Half-precision float. */
23526 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
23529 if (TREE_CODE (type
) != VECTOR_TYPE
)
23532 /* Check the mode of the vector type, and the name of the vector
23533 element type, against the table. */
23534 while (pos
->mode
!= VOIDmode
)
23536 tree elt_type
= TREE_TYPE (type
);
23538 if (pos
->mode
== TYPE_MODE (type
)
23539 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
23540 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
23541 pos
->element_type_name
))
23542 return pos
->aapcs_name
;
23547 /* Use the default mangling for unrecognized (possibly user-defined)
23552 /* Order of allocation of core registers for Thumb: this allocation is
23553 written over the corresponding initial entries of the array
23554 initialized with REG_ALLOC_ORDER. We allocate all low registers
23555 first. Saving and restoring a low register is usually cheaper than
23556 using a call-clobbered high register. */
23558 static const int thumb_core_reg_alloc_order
[] =
23560 3, 2, 1, 0, 4, 5, 6, 7,
23561 14, 12, 8, 9, 10, 11, 13, 15
23564 /* Adjust register allocation order when compiling for Thumb. */
23567 arm_order_regs_for_local_alloc (void)
23569 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
23570 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
23572 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
23573 sizeof (thumb_core_reg_alloc_order
));
23576 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23579 arm_frame_pointer_required (void)
23581 return (cfun
->has_nonlocal_label
23582 || SUBTARGET_FRAME_POINTER_REQUIRED
23583 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
23586 /* Only thumb1 can't support conditional execution, so return true if
23587 the target is not thumb1. */
23589 arm_have_conditional_execution (void)
23591 return !TARGET_THUMB1
;
23594 /* Legitimize a memory reference for sync primitive implemented using
23595 ldrex / strex. We currently force the form of the reference to be
23596 indirect without offset. We do not yet support the indirect offset
23597 addressing supported by some ARM targets for these
23600 arm_legitimize_sync_memory (rtx memory
)
23602 rtx addr
= force_reg (Pmode
, XEXP (memory
, 0));
23603 rtx legitimate_memory
= gen_rtx_MEM (GET_MODE (memory
), addr
);
23605 set_mem_alias_set (legitimate_memory
, ALIAS_SET_MEMORY_BARRIER
);
23606 MEM_VOLATILE_P (legitimate_memory
) = MEM_VOLATILE_P (memory
);
23607 return legitimate_memory
;
23610 /* An instruction emitter. */
23611 typedef void (* emit_f
) (int label
, const char *, rtx
*);
23613 /* An instruction emitter that emits via the conventional
23614 output_asm_insn. */
23616 arm_emit (int label ATTRIBUTE_UNUSED
, const char *pattern
, rtx
*operands
)
23618 output_asm_insn (pattern
, operands
);
23621 /* Count the number of emitted synchronization instructions. */
23622 static unsigned arm_insn_count
;
23624 /* An emitter that counts emitted instructions but does not actually
23625 emit instruction into the instruction stream. */
23627 arm_count (int label
,
23628 const char *pattern ATTRIBUTE_UNUSED
,
23629 rtx
*operands ATTRIBUTE_UNUSED
)
23635 /* Construct a pattern using conventional output formatting and feed
23636 it to output_asm_insn. Provides a mechanism to construct the
23637 output pattern on the fly. Note the hard limit on the pattern
23639 static void ATTRIBUTE_PRINTF_4
23640 arm_output_asm_insn (emit_f emit
, int label
, rtx
*operands
,
23641 const char *pattern
, ...)
23646 va_start (ap
, pattern
);
23647 vsprintf (buffer
, pattern
, ap
);
23649 emit (label
, buffer
, operands
);
23652 /* Emit the memory barrier instruction, if any, provided by this
23653 target to a specified emitter. */
23655 arm_process_output_memory_barrier (emit_f emit
, rtx
*operands
)
23657 if (TARGET_HAVE_DMB
)
23659 /* Note we issue a system level barrier. We should consider
23660 issuing a inner shareabilty zone barrier here instead, ie.
23662 emit (0, "dmb\tsy", operands
);
23666 if (TARGET_HAVE_DMB_MCR
)
23668 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands
);
23672 gcc_unreachable ();
23675 /* Emit the memory barrier instruction, if any, provided by this
23678 arm_output_memory_barrier (rtx
*operands
)
23680 arm_process_output_memory_barrier (arm_emit
, operands
);
23684 /* Helper to figure out the instruction suffix required on ldrex/strex
23685 for operations on an object of the specified mode. */
23686 static const char *
23687 arm_ldrex_suffix (enum machine_mode mode
)
23691 case QImode
: return "b";
23692 case HImode
: return "h";
23693 case SImode
: return "";
23694 case DImode
: return "d";
23696 gcc_unreachable ();
23701 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23704 arm_output_ldrex (emit_f emit
,
23705 enum machine_mode mode
,
23709 const char *suffix
= arm_ldrex_suffix (mode
);
23712 operands
[0] = target
;
23713 operands
[1] = memory
;
23714 arm_output_asm_insn (emit
, 0, operands
, "ldrex%s\t%%0, %%C1", suffix
);
23717 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23720 arm_output_strex (emit_f emit
,
23721 enum machine_mode mode
,
23727 const char *suffix
= arm_ldrex_suffix (mode
);
23730 operands
[0] = result
;
23731 operands
[1] = value
;
23732 operands
[2] = memory
;
23733 arm_output_asm_insn (emit
, 0, operands
, "strex%s%s\t%%0, %%1, %%C2", suffix
,
23737 /* Helper to emit a two operand instruction. */
23739 arm_output_op2 (emit_f emit
, const char *mnemonic
, rtx d
, rtx s
)
23745 arm_output_asm_insn (emit
, 0, operands
, "%s\t%%0, %%1", mnemonic
);
23748 /* Helper to emit a three operand instruction. */
23750 arm_output_op3 (emit_f emit
, const char *mnemonic
, rtx d
, rtx a
, rtx b
)
23757 arm_output_asm_insn (emit
, 0, operands
, "%s\t%%0, %%1, %%2", mnemonic
);
23760 /* Emit a load store exclusive synchronization loop.
23764 if old_value != required_value
23766 t1 = sync_op (old_value, new_value)
23767 [mem] = t1, t2 = [0|1]
23771 t1 == t2 is not permitted
23772 t1 == old_value is permitted
23776 RTX register or const_int representing the required old_value for
23777 the modify to continue, if NULL no comparsion is performed. */
23779 arm_output_sync_loop (emit_f emit
,
23780 enum machine_mode mode
,
23783 rtx required_value
,
23787 enum attr_sync_op sync_op
,
23788 int early_barrier_required
)
23792 gcc_assert (t1
!= t2
);
23794 if (early_barrier_required
)
23795 arm_process_output_memory_barrier (emit
, NULL
);
23797 arm_output_asm_insn (emit
, 1, operands
, "%sLSYT%%=:", LOCAL_LABEL_PREFIX
);
23799 arm_output_ldrex (emit
, mode
, old_value
, memory
);
23801 if (required_value
)
23805 operands
[0] = old_value
;
23806 operands
[1] = required_value
;
23807 arm_output_asm_insn (emit
, 0, operands
, "cmp\t%%0, %%1");
23808 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX
);
23814 arm_output_op3 (emit
, "add", t1
, old_value
, new_value
);
23818 arm_output_op3 (emit
, "sub", t1
, old_value
, new_value
);
23822 arm_output_op3 (emit
, "orr", t1
, old_value
, new_value
);
23826 arm_output_op3 (emit
, "eor", t1
, old_value
, new_value
);
23830 arm_output_op3 (emit
,"and", t1
, old_value
, new_value
);
23834 arm_output_op3 (emit
, "and", t1
, old_value
, new_value
);
23835 arm_output_op2 (emit
, "mvn", t1
, t1
);
23845 arm_output_strex (emit
, mode
, "", t2
, t1
, memory
);
23847 arm_output_asm_insn (emit
, 0, operands
, "teq\t%%0, #0");
23848 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYT%%=",
23849 LOCAL_LABEL_PREFIX
);
23853 /* Use old_value for the return value because for some operations
23854 the old_value can easily be restored. This saves one register. */
23855 arm_output_strex (emit
, mode
, "", old_value
, t1
, memory
);
23856 operands
[0] = old_value
;
23857 arm_output_asm_insn (emit
, 0, operands
, "teq\t%%0, #0");
23858 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYT%%=",
23859 LOCAL_LABEL_PREFIX
);
23864 arm_output_op3 (emit
, "sub", old_value
, t1
, new_value
);
23868 arm_output_op3 (emit
, "add", old_value
, t1
, new_value
);
23872 arm_output_op3 (emit
, "eor", old_value
, t1
, new_value
);
23876 arm_output_op2 (emit
, "mov", old_value
, required_value
);
23880 gcc_unreachable ();
23884 arm_process_output_memory_barrier (emit
, NULL
);
23885 arm_output_asm_insn (emit
, 1, operands
, "%sLSYB%%=:", LOCAL_LABEL_PREFIX
);
23889 arm_get_sync_operand (rtx
*operands
, int index
, rtx default_value
)
23892 default_value
= operands
[index
- 1];
23894 return default_value
;
23897 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23898 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23900 /* Extract the operands for a synchroniztion instruction from the
23901 instructions attributes and emit the instruction. */
23903 arm_process_output_sync_insn (emit_f emit
, rtx insn
, rtx
*operands
)
23905 rtx result
, memory
, required_value
, new_value
, t1
, t2
;
23907 enum machine_mode mode
;
23908 enum attr_sync_op sync_op
;
23910 result
= FETCH_SYNC_OPERAND(result
, 0);
23911 memory
= FETCH_SYNC_OPERAND(memory
, 0);
23912 required_value
= FETCH_SYNC_OPERAND(required_value
, 0);
23913 new_value
= FETCH_SYNC_OPERAND(new_value
, 0);
23914 t1
= FETCH_SYNC_OPERAND(t1
, 0);
23915 t2
= FETCH_SYNC_OPERAND(t2
, 0);
23917 get_attr_sync_release_barrier (insn
) == SYNC_RELEASE_BARRIER_YES
;
23918 sync_op
= get_attr_sync_op (insn
);
23919 mode
= GET_MODE (memory
);
23921 arm_output_sync_loop (emit
, mode
, result
, memory
, required_value
,
23922 new_value
, t1
, t2
, sync_op
, early_barrier
);
23925 /* Emit a synchronization instruction loop. */
23927 arm_output_sync_insn (rtx insn
, rtx
*operands
)
23929 arm_process_output_sync_insn (arm_emit
, insn
, operands
);
23933 /* Count the number of machine instruction that will be emitted for a
23934 synchronization instruction. Note that the emitter used does not
23935 emit instructions, it just counts instructions being carefull not
23936 to count labels. */
23938 arm_sync_loop_insns (rtx insn
, rtx
*operands
)
23940 arm_insn_count
= 0;
23941 arm_process_output_sync_insn (arm_count
, insn
, operands
);
23942 return arm_insn_count
;
23945 /* Helper to call a target sync instruction generator, dealing with
23946 the variation in operands required by the different generators. */
23948 arm_call_generator (struct arm_sync_generator
*generator
, rtx old_value
,
23949 rtx memory
, rtx required_value
, rtx new_value
)
23951 switch (generator
->op
)
23953 case arm_sync_generator_omn
:
23954 gcc_assert (! required_value
);
23955 return generator
->u
.omn (old_value
, memory
, new_value
);
23957 case arm_sync_generator_omrn
:
23958 gcc_assert (required_value
);
23959 return generator
->u
.omrn (old_value
, memory
, required_value
, new_value
);
23965 /* Expand a synchronization loop. The synchronization loop is expanded
23966 as an opaque block of instructions in order to ensure that we do
23967 not subsequently get extraneous memory accesses inserted within the
23968 critical region. The exclusive access property of ldrex/strex is
23969 only guaranteed in there are no intervening memory accesses. */
23971 arm_expand_sync (enum machine_mode mode
,
23972 struct arm_sync_generator
*generator
,
23973 rtx target
, rtx memory
, rtx required_value
, rtx new_value
)
23975 if (target
== NULL
)
23976 target
= gen_reg_rtx (mode
);
23978 memory
= arm_legitimize_sync_memory (memory
);
23979 if (mode
!= SImode
)
23981 rtx load_temp
= gen_reg_rtx (SImode
);
23983 if (required_value
)
23984 required_value
= convert_modes (SImode
, mode
, required_value
, true);
23986 new_value
= convert_modes (SImode
, mode
, new_value
, true);
23987 emit_insn (arm_call_generator (generator
, load_temp
, memory
,
23988 required_value
, new_value
));
23989 emit_move_insn (target
, gen_lowpart (mode
, load_temp
));
23993 emit_insn (arm_call_generator (generator
, target
, memory
, required_value
,
23998 static unsigned int
23999 arm_autovectorize_vector_sizes (void)
24001 return TARGET_NEON_VECTORIZE_QUAD
? 16 | 8 : 0;
24005 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
24007 /* Vectors which aren't in packed structures will not be less aligned than
24008 the natural alignment of their element type, so this is safe. */
24009 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
24012 return default_builtin_vector_alignment_reachable (type
, is_packed
);
24016 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
24017 const_tree type
, int misalignment
,
24020 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
24022 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
24027 /* If the misalignment is unknown, we should be able to handle the access
24028 so long as it is not to a member of a packed data structure. */
24029 if (misalignment
== -1)
24032 /* Return true if the misalignment is a multiple of the natural alignment
24033 of the vector's element type. This is probably always going to be
24034 true in practice, since we've already established that this isn't a
24036 return ((misalignment
% align
) == 0);
24039 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
24044 arm_conditional_register_usage (void)
24048 if (TARGET_SOFT_FLOAT
|| TARGET_THUMB1
|| !TARGET_FPA
)
24050 for (regno
= FIRST_FPA_REGNUM
;
24051 regno
<= LAST_FPA_REGNUM
; ++regno
)
24052 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
24055 if (TARGET_THUMB1
&& optimize_size
)
24057 /* When optimizing for size on Thumb-1, it's better not
24058 to use the HI regs, because of the overhead of
24060 for (regno
= FIRST_HI_REGNUM
;
24061 regno
<= LAST_HI_REGNUM
; ++regno
)
24062 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
24065 /* The link register can be clobbered by any branch insn,
24066 but we have no way to track that at present, so mark
24067 it as unavailable. */
24069 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
24071 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
24073 if (TARGET_MAVERICK
)
24075 for (regno
= FIRST_FPA_REGNUM
;
24076 regno
<= LAST_FPA_REGNUM
; ++ regno
)
24077 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
24078 for (regno
= FIRST_CIRRUS_FP_REGNUM
;
24079 regno
<= LAST_CIRRUS_FP_REGNUM
; ++ regno
)
24081 fixed_regs
[regno
] = 0;
24082 call_used_regs
[regno
] = regno
< FIRST_CIRRUS_FP_REGNUM
+ 4;
24087 /* VFPv3 registers are disabled when earlier VFP
24088 versions are selected due to the definition of
24089 LAST_VFP_REGNUM. */
24090 for (regno
= FIRST_VFP_REGNUM
;
24091 regno
<= LAST_VFP_REGNUM
; ++ regno
)
24093 fixed_regs
[regno
] = 0;
24094 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
24095 || regno
>= FIRST_VFP_REGNUM
+ 32;
24100 if (TARGET_REALLY_IWMMXT
)
24102 regno
= FIRST_IWMMXT_GR_REGNUM
;
24103 /* The 2002/10/09 revision of the XScale ABI has wCG0
24104 and wCG1 as call-preserved registers. The 2002/11/21
24105 revision changed this so that all wCG registers are
24106 scratch registers. */
24107 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
24108 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
24109 fixed_regs
[regno
] = 0;
24110 /* The XScale ABI has wR0 - wR9 as scratch registers,
24111 the rest as call-preserved registers. */
24112 for (regno
= FIRST_IWMMXT_REGNUM
;
24113 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
24115 fixed_regs
[regno
] = 0;
24116 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
24120 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
24122 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
24123 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
24125 else if (TARGET_APCS_STACK
)
24127 fixed_regs
[10] = 1;
24128 call_used_regs
[10] = 1;
24130 /* -mcaller-super-interworking reserves r11 for calls to
24131 _interwork_r11_call_via_rN(). Making the register global
24132 is an easy way of ensuring that it remains valid for all
24134 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
24135 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
24137 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
24138 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
24139 if (TARGET_CALLER_INTERWORKING
)
24140 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
24142 SUBTARGET_CONDITIONAL_REGISTER_USAGE
24146 arm_preferred_rename_class (reg_class_t rclass
)
24148 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
24149 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
24150 and code size can be reduced. */
24151 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
24157 /* Compute the atrribute "length" of insn "*push_multi".
24158 So this function MUST be kept in sync with that insn pattern. */
24160 arm_attr_length_push_multi(rtx parallel_op
, rtx first_op
)
24162 int i
, regno
, hi_reg
;
24163 int num_saves
= XVECLEN (parallel_op
, 0);
24173 regno
= REGNO (first_op
);
24174 hi_reg
= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
24175 for (i
= 1; i
< num_saves
&& !hi_reg
; i
++)
24177 regno
= REGNO (XEXP (XVECEXP (parallel_op
, 0, i
), 0));
24178 hi_reg
|= (REGNO_REG_CLASS (regno
) == HI_REGS
) && (regno
!= LR_REGNUM
);
24186 #include "gt-arm.h"