1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
43 #include "diagnostic-core.h"
48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h"
52 #include "target-def.h"
54 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode
;
62 typedef struct minipool_fixup Mfix
;
64 void (*arm_lang_output_object_attributes_hook
)(void);
66 /* Forward function declarations. */
67 static bool arm_needs_doubleword_align (enum machine_mode
, const_tree
);
68 static int arm_compute_static_chain_stack_bytes (void);
69 static arm_stack_offsets
*arm_get_frame_offsets (void);
70 static void arm_add_gc_roots (void);
71 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
72 HOST_WIDE_INT
, rtx
, rtx
, int, int);
73 static unsigned bit_count (unsigned long);
74 static int arm_address_register_rtx_p (rtx
, int);
75 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
76 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
77 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
78 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
79 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
80 inline static int thumb1_index_register_rtx_p (rtx
, int);
81 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
82 static int thumb_far_jump_used_p (void);
83 static bool thumb_force_lr_save (void);
84 static int const_ok_for_op (HOST_WIDE_INT
, enum rtx_code
);
85 static rtx
emit_sfm (int, int);
86 static unsigned arm_size_return_regs (void);
87 static bool arm_assemble_integer (rtx
, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx
, int);
89 static void arm_print_operand_address (FILE *, rtx
);
90 static bool arm_print_operand_punct_valid_p (unsigned char code
);
91 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
92 static arm_cc
get_arm_condition_code (rtx
);
93 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
94 static rtx
is_jump_table (rtx
);
95 static const char *output_multi_immediate (rtx
*, const char *, const char *,
97 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
98 static struct machine_function
*arm_init_machine_status (void);
99 static void thumb_exit (FILE *, int);
100 static rtx
is_jump_table (rtx
);
101 static HOST_WIDE_INT
get_jump_table_size (rtx
);
102 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
103 static Mnode
*add_minipool_forward_ref (Mfix
*);
104 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
105 static Mnode
*add_minipool_backward_ref (Mfix
*);
106 static void assign_minipool_offsets (Mfix
*);
107 static void arm_print_value (FILE *, rtx
);
108 static void dump_minipool (rtx
);
109 static int arm_barrier_cost (rtx
);
110 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
111 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
112 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
114 static void arm_reorg (void);
115 static bool note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
116 static unsigned long arm_compute_save_reg0_reg12_mask (void);
117 static unsigned long arm_compute_save_reg_mask (void);
118 static unsigned long arm_isr_value (tree
);
119 static unsigned long arm_compute_func_type (void);
120 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
121 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
122 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
123 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
124 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
126 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
127 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
128 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT
);
129 static int arm_comp_type_attributes (const_tree
, const_tree
);
130 static void arm_set_default_type_attributes (tree
);
131 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
132 static int count_insns_for_constant (HOST_WIDE_INT
, int);
133 static int arm_get_strip_length (int);
134 static bool arm_function_ok_for_sibcall (tree
, tree
);
135 static enum machine_mode
arm_promote_function_mode (const_tree
,
136 enum machine_mode
, int *,
138 static bool arm_return_in_memory (const_tree
, const_tree
);
139 static rtx
arm_function_value (const_tree
, const_tree
, bool);
140 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
142 static void arm_internal_label (FILE *, const char *, unsigned long);
143 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
145 static bool arm_have_conditional_execution (void);
146 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
147 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
148 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
149 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
150 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
151 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
152 static bool arm_rtx_costs (rtx
, int, int, int *, bool);
153 static int arm_address_cost (rtx
, bool);
154 static bool arm_memory_load_p (rtx
);
155 static bool arm_cirrus_insn_p (rtx
);
156 static void cirrus_reorg (rtx
);
157 static void arm_init_builtins (void);
158 static void arm_init_iwmmxt_builtins (void);
159 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
160 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
161 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
162 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
163 static void emit_constant_insn (rtx cond
, rtx pattern
);
164 static rtx
emit_set_insn (rtx
, rtx
);
165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
167 static rtx
arm_function_arg (CUMULATIVE_ARGS
*, enum machine_mode
,
169 static void arm_function_arg_advance (CUMULATIVE_ARGS
*, enum machine_mode
,
171 static unsigned int arm_function_arg_boundary (enum machine_mode
, const_tree
);
172 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
174 static int aapcs_select_return_coproc (const_tree
, const_tree
);
176 #ifdef OBJECT_FORMAT_ELF
177 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
178 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
181 static void arm_encode_section_info (tree
, rtx
, int);
184 static void arm_file_end (void);
185 static void arm_file_start (void);
187 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
189 static bool arm_pass_by_reference (CUMULATIVE_ARGS
*,
190 enum machine_mode
, const_tree
, bool);
191 static bool arm_promote_prototypes (const_tree
);
192 static bool arm_default_short_enums (void);
193 static bool arm_align_anon_bitfield (void);
194 static bool arm_return_in_msb (const_tree
);
195 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
196 static bool arm_return_in_memory (const_tree
, const_tree
);
198 static void arm_unwind_emit (FILE *, rtx
);
199 static bool arm_output_ttype (rtx
);
200 static void arm_asm_emit_except_personality (rtx
);
201 static void arm_asm_init_sections (void);
203 static enum unwind_info_type
arm_except_unwind_info (struct gcc_options
*);
204 static void arm_dwarf_handle_frame_unspec (const char *, rtx
, int);
205 static rtx
arm_dwarf_register_span (rtx
);
207 static tree
arm_cxx_guard_type (void);
208 static bool arm_cxx_guard_mask_bit (void);
209 static tree
arm_get_cookie_size (tree
);
210 static bool arm_cookie_has_size (void);
211 static bool arm_cxx_cdtor_returns_this (void);
212 static bool arm_cxx_key_method_may_be_inline (void);
213 static void arm_cxx_determine_class_data_visibility (tree
);
214 static bool arm_cxx_class_data_always_comdat (void);
215 static bool arm_cxx_use_aeabi_atexit (void);
216 static void arm_init_libfuncs (void);
217 static tree
arm_build_builtin_va_list (void);
218 static void arm_expand_builtin_va_start (tree
, rtx
);
219 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
220 static void arm_option_override (void);
221 static bool arm_handle_option (size_t, const char *, int);
222 static void arm_target_help (void);
223 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
224 static bool arm_cannot_copy_insn_p (rtx
);
225 static bool arm_tls_symbol_p (rtx x
);
226 static int arm_issue_rate (void);
227 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
228 static bool arm_output_addr_const_extra (FILE *, rtx
);
229 static bool arm_allocate_stack_slots_for_args (void);
230 static const char *arm_invalid_parameter_type (const_tree t
);
231 static const char *arm_invalid_return_type (const_tree t
);
232 static tree
arm_promoted_type (const_tree t
);
233 static tree
arm_convert_to_type (tree type
, tree expr
);
234 static bool arm_scalar_mode_supported_p (enum machine_mode
);
235 static bool arm_frame_pointer_required (void);
236 static bool arm_can_eliminate (const int, const int);
237 static void arm_asm_trampoline_template (FILE *);
238 static void arm_trampoline_init (rtx
, tree
, rtx
);
239 static rtx
arm_trampoline_adjust_address (rtx
);
240 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
241 static bool cortex_a9_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
242 static bool xscale_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
243 static bool fa726te_sched_adjust_cost (rtx
, rtx
, rtx
, int *);
244 static enum machine_mode
arm_preferred_simd_mode (enum machine_mode
);
245 static bool arm_class_likely_spilled_p (reg_class_t
);
246 static bool arm_vector_alignment_reachable (const_tree type
, bool is_packed
);
247 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode
,
251 static void arm_conditional_register_usage (void);
252 static reg_class_t
arm_preferred_rename_class (reg_class_t rclass
);
255 /* Table of machine attributes. */
256 static const struct attribute_spec arm_attribute_table
[] =
258 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
259 /* Function calls made to this symbol must be done indirectly, because
260 it may lie outside of the 26 bit addressing range of a normal function
262 { "long_call", 0, 0, false, true, true, NULL
},
263 /* Whereas these functions are always known to reside within the 26 bit
265 { "short_call", 0, 0, false, true, true, NULL
},
266 /* Specify the procedure call conventions for a function. */
267 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
},
268 /* Interrupt Service Routines have special prologue and epilogue requirements. */
269 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
},
270 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
},
271 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
},
273 /* ARM/PE has three new attributes:
275 dllexport - for exporting a function/variable that will live in a dll
276 dllimport - for importing a function/variable from a dll
278 Microsoft allows multiple declspecs in one __declspec, separating
279 them with spaces. We do NOT support this. Instead, use __declspec
282 { "dllimport", 0, 0, true, false, false, NULL
},
283 { "dllexport", 0, 0, true, false, false, NULL
},
284 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
},
285 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
286 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
287 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
288 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
},
290 { NULL
, 0, 0, false, false, false, NULL
}
293 /* Set default optimization options. */
294 static const struct default_options arm_option_optimization_table
[] =
296 /* Enable section anchors by default at -O1 or higher. */
297 { OPT_LEVELS_1_PLUS
, OPT_fsection_anchors
, NULL
, 1 },
298 { OPT_LEVELS_1_PLUS
, OPT_fomit_frame_pointer
, NULL
, 1 },
299 { OPT_LEVELS_NONE
, 0, NULL
, 0 }
302 /* Initialize the GCC target structure. */
303 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
304 #undef TARGET_MERGE_DECL_ATTRIBUTES
305 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
308 #undef TARGET_LEGITIMIZE_ADDRESS
309 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
311 #undef TARGET_ATTRIBUTE_TABLE
312 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
314 #undef TARGET_ASM_FILE_START
315 #define TARGET_ASM_FILE_START arm_file_start
316 #undef TARGET_ASM_FILE_END
317 #define TARGET_ASM_FILE_END arm_file_end
319 #undef TARGET_ASM_ALIGNED_SI_OP
320 #define TARGET_ASM_ALIGNED_SI_OP NULL
321 #undef TARGET_ASM_INTEGER
322 #define TARGET_ASM_INTEGER arm_assemble_integer
324 #undef TARGET_PRINT_OPERAND
325 #define TARGET_PRINT_OPERAND arm_print_operand
326 #undef TARGET_PRINT_OPERAND_ADDRESS
327 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
328 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
329 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
331 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
332 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
334 #undef TARGET_ASM_FUNCTION_PROLOGUE
335 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
337 #undef TARGET_ASM_FUNCTION_EPILOGUE
338 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
340 #undef TARGET_DEFAULT_TARGET_FLAGS
341 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
342 #undef TARGET_HANDLE_OPTION
343 #define TARGET_HANDLE_OPTION arm_handle_option
345 #define TARGET_HELP arm_target_help
346 #undef TARGET_OPTION_OVERRIDE
347 #define TARGET_OPTION_OVERRIDE arm_option_override
348 #undef TARGET_OPTION_OPTIMIZATION_TABLE
349 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
351 #undef TARGET_COMP_TYPE_ATTRIBUTES
352 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
354 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
355 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
357 #undef TARGET_SCHED_ADJUST_COST
358 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
360 #undef TARGET_ENCODE_SECTION_INFO
362 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
364 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
367 #undef TARGET_STRIP_NAME_ENCODING
368 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
370 #undef TARGET_ASM_INTERNAL_LABEL
371 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
373 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
374 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
376 #undef TARGET_FUNCTION_VALUE
377 #define TARGET_FUNCTION_VALUE arm_function_value
379 #undef TARGET_LIBCALL_VALUE
380 #define TARGET_LIBCALL_VALUE arm_libcall_value
382 #undef TARGET_ASM_OUTPUT_MI_THUNK
383 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
384 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
385 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
387 #undef TARGET_RTX_COSTS
388 #define TARGET_RTX_COSTS arm_rtx_costs
389 #undef TARGET_ADDRESS_COST
390 #define TARGET_ADDRESS_COST arm_address_cost
392 #undef TARGET_SHIFT_TRUNCATION_MASK
393 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
394 #undef TARGET_VECTOR_MODE_SUPPORTED_P
395 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
396 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
397 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
399 #undef TARGET_MACHINE_DEPENDENT_REORG
400 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
402 #undef TARGET_INIT_BUILTINS
403 #define TARGET_INIT_BUILTINS arm_init_builtins
404 #undef TARGET_EXPAND_BUILTIN
405 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
407 #undef TARGET_INIT_LIBFUNCS
408 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
410 #undef TARGET_PROMOTE_FUNCTION_MODE
411 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
414 #undef TARGET_PASS_BY_REFERENCE
415 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
416 #undef TARGET_ARG_PARTIAL_BYTES
417 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
418 #undef TARGET_FUNCTION_ARG
419 #define TARGET_FUNCTION_ARG arm_function_arg
420 #undef TARGET_FUNCTION_ARG_ADVANCE
421 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
422 #undef TARGET_FUNCTION_ARG_BOUNDARY
423 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
425 #undef TARGET_SETUP_INCOMING_VARARGS
426 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
428 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
429 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
431 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
432 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
433 #undef TARGET_TRAMPOLINE_INIT
434 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
435 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
436 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
438 #undef TARGET_DEFAULT_SHORT_ENUMS
439 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
441 #undef TARGET_ALIGN_ANON_BITFIELD
442 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
444 #undef TARGET_NARROW_VOLATILE_BITFIELD
445 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
447 #undef TARGET_CXX_GUARD_TYPE
448 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
450 #undef TARGET_CXX_GUARD_MASK_BIT
451 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
453 #undef TARGET_CXX_GET_COOKIE_SIZE
454 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
456 #undef TARGET_CXX_COOKIE_HAS_SIZE
457 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
459 #undef TARGET_CXX_CDTOR_RETURNS_THIS
460 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
462 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
463 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
465 #undef TARGET_CXX_USE_AEABI_ATEXIT
466 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
468 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
469 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
470 arm_cxx_determine_class_data_visibility
472 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
473 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
475 #undef TARGET_RETURN_IN_MSB
476 #define TARGET_RETURN_IN_MSB arm_return_in_msb
478 #undef TARGET_RETURN_IN_MEMORY
479 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
481 #undef TARGET_MUST_PASS_IN_STACK
482 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
485 #undef TARGET_ASM_UNWIND_EMIT
486 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
488 /* EABI unwinding tables use a different format for the typeinfo tables. */
489 #undef TARGET_ASM_TTYPE
490 #define TARGET_ASM_TTYPE arm_output_ttype
492 #undef TARGET_ARM_EABI_UNWINDER
493 #define TARGET_ARM_EABI_UNWINDER true
495 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
496 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
498 #undef TARGET_ASM_INIT_SECTIONS
499 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
500 #endif /* ARM_UNWIND_INFO */
502 #undef TARGET_EXCEPT_UNWIND_INFO
503 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
505 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
506 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
508 #undef TARGET_DWARF_REGISTER_SPAN
509 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
511 #undef TARGET_CANNOT_COPY_INSN_P
512 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
515 #undef TARGET_HAVE_TLS
516 #define TARGET_HAVE_TLS true
519 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
520 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
522 #undef TARGET_CANNOT_FORCE_CONST_MEM
523 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
525 #undef TARGET_MAX_ANCHOR_OFFSET
526 #define TARGET_MAX_ANCHOR_OFFSET 4095
528 /* The minimum is set such that the total size of the block
529 for a particular anchor is -4088 + 1 + 4095 bytes, which is
530 divisible by eight, ensuring natural spacing of anchors. */
531 #undef TARGET_MIN_ANCHOR_OFFSET
532 #define TARGET_MIN_ANCHOR_OFFSET -4088
534 #undef TARGET_SCHED_ISSUE_RATE
535 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
537 #undef TARGET_MANGLE_TYPE
538 #define TARGET_MANGLE_TYPE arm_mangle_type
540 #undef TARGET_BUILD_BUILTIN_VA_LIST
541 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
542 #undef TARGET_EXPAND_BUILTIN_VA_START
543 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
544 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
545 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
548 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
549 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
552 #undef TARGET_LEGITIMATE_ADDRESS_P
553 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
555 #undef TARGET_INVALID_PARAMETER_TYPE
556 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
558 #undef TARGET_INVALID_RETURN_TYPE
559 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
561 #undef TARGET_PROMOTED_TYPE
562 #define TARGET_PROMOTED_TYPE arm_promoted_type
564 #undef TARGET_CONVERT_TO_TYPE
565 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
567 #undef TARGET_SCALAR_MODE_SUPPORTED_P
568 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
570 #undef TARGET_FRAME_POINTER_REQUIRED
571 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
573 #undef TARGET_CAN_ELIMINATE
574 #define TARGET_CAN_ELIMINATE arm_can_eliminate
576 #undef TARGET_CONDITIONAL_REGISTER_USAGE
577 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
579 #undef TARGET_CLASS_LIKELY_SPILLED_P
580 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
582 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
583 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
584 arm_vector_alignment_reachable
586 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
587 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
588 arm_builtin_support_vector_misalignment
590 #undef TARGET_PREFERRED_RENAME_CLASS
591 #define TARGET_PREFERRED_RENAME_CLASS \
592 arm_preferred_rename_class
594 struct gcc_target targetm
= TARGET_INITIALIZER
;
596 /* Obstack for minipool constant handling. */
597 static struct obstack minipool_obstack
;
598 static char * minipool_startobj
;
600 /* The maximum number of insns skipped which
601 will be conditionalised if possible. */
602 static int max_insns_skipped
= 5;
604 extern FILE * asm_out_file
;
606 /* True if we are currently building a constant table. */
607 int making_const_table
;
609 /* The processor for which instructions should be scheduled. */
610 enum processor_type arm_tune
= arm_none
;
612 /* The current tuning set. */
613 const struct tune_params
*current_tune
;
615 /* Which floating point hardware to schedule for. */
618 /* Which floating popint hardware to use. */
619 const struct arm_fpu_desc
*arm_fpu_desc
;
621 /* Whether to use floating point hardware. */
622 enum float_abi_type arm_float_abi
;
624 /* Which __fp16 format to use. */
625 enum arm_fp16_format_type arm_fp16_format
;
627 /* Which ABI to use. */
628 enum arm_abi_type arm_abi
;
630 /* Which thread pointer model to use. */
631 enum arm_tp_type target_thread_pointer
= TP_AUTO
;
633 /* Used to parse -mstructure_size_boundary command line option. */
634 int arm_structure_size_boundary
= DEFAULT_STRUCTURE_SIZE_BOUNDARY
;
636 /* Used for Thumb call_via trampolines. */
637 rtx thumb_call_via_label
[14];
638 static int thumb_call_reg_needed
;
640 /* Bit values used to identify processor capabilities. */
641 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
642 #define FL_ARCH3M (1 << 1) /* Extended multiply */
643 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
644 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
645 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
646 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
647 #define FL_THUMB (1 << 6) /* Thumb aware */
648 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
649 #define FL_STRONG (1 << 8) /* StrongARM */
650 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
651 #define FL_XSCALE (1 << 10) /* XScale */
652 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
653 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
654 media instructions. */
655 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
656 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
657 Note: ARM6 & 7 derivatives only. */
658 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
659 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
660 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
662 #define FL_DIV (1 << 18) /* Hardware divide. */
663 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
664 #define FL_NEON (1 << 20) /* Neon instructions. */
665 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
667 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
669 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
671 /* Flags that only effect tuning, not available instructions. */
672 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
675 #define FL_FOR_ARCH2 FL_NOTM
676 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
677 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
678 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
679 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
680 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
681 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
682 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
683 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
684 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
685 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
686 #define FL_FOR_ARCH6J FL_FOR_ARCH6
687 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
688 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
689 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
690 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
691 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
692 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
693 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
694 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
695 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
696 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
698 /* The bits in this mask specify which
699 instructions we are allowed to generate. */
700 static unsigned long insn_flags
= 0;
702 /* The bits in this mask specify which instruction scheduling options should
704 static unsigned long tune_flags
= 0;
706 /* The following are used in the arm.md file as equivalents to bits
707 in the above two flag variables. */
709 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
712 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
715 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
718 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
721 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
724 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
727 /* Nonzero if this chip supports the ARM 6K extensions. */
730 /* Nonzero if this chip supports the ARM 7 extensions. */
733 /* Nonzero if instructions not present in the 'M' profile can be used. */
734 int arm_arch_notm
= 0;
736 /* Nonzero if instructions present in ARMv7E-M can be used. */
739 /* Nonzero if this chip can benefit from load scheduling. */
740 int arm_ld_sched
= 0;
742 /* Nonzero if this chip is a StrongARM. */
743 int arm_tune_strongarm
= 0;
745 /* Nonzero if this chip is a Cirrus variant. */
746 int arm_arch_cirrus
= 0;
748 /* Nonzero if this chip supports Intel Wireless MMX technology. */
749 int arm_arch_iwmmxt
= 0;
751 /* Nonzero if this chip is an XScale. */
752 int arm_arch_xscale
= 0;
754 /* Nonzero if tuning for XScale */
755 int arm_tune_xscale
= 0;
757 /* Nonzero if we want to tune for stores that access the write-buffer.
758 This typically means an ARM6 or ARM7 with MMU or MPU. */
759 int arm_tune_wbuf
= 0;
761 /* Nonzero if tuning for Cortex-A9. */
762 int arm_tune_cortex_a9
= 0;
764 /* Nonzero if generating Thumb instructions. */
767 /* Nonzero if generating Thumb-1 instructions. */
770 /* Nonzero if we should define __THUMB_INTERWORK__ in the
772 XXX This is a bit of a hack, it's intended to help work around
773 problems in GLD which doesn't understand that armv5t code is
774 interworking clean. */
775 int arm_cpp_interwork
= 0;
777 /* Nonzero if chip supports Thumb 2. */
780 /* Nonzero if chip supports integer division instruction. */
783 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
784 we must report the mode of the memory reference from
785 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
786 enum machine_mode output_memory_reference_mode
;
788 /* The register number to be used for the PIC offset register. */
789 unsigned arm_pic_register
= INVALID_REGNUM
;
791 /* Set to 1 after arm_reorg has started. Reset to start at the start of
792 the next function. */
793 static int after_arm_reorg
= 0;
795 enum arm_pcs arm_pcs_default
;
797 /* For an explanation of these variables, see final_prescan_insn below. */
799 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
800 enum arm_cond_code arm_current_cc
;
803 int arm_target_label
;
804 /* The number of conditionally executed insns, including the current insn. */
805 int arm_condexec_count
= 0;
806 /* A bitmask specifying the patterns for the IT block.
807 Zero means do not output an IT block before this insn. */
808 int arm_condexec_mask
= 0;
809 /* The number of bits used in arm_condexec_mask. */
810 int arm_condexec_masklen
= 0;
812 /* The condition codes of the ARM, and the inverse function. */
813 static const char * const arm_condition_codes
[] =
815 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
816 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
819 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
820 int arm_regs_in_sequence
[] =
822 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
825 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
826 #define streq(string1, string2) (strcmp (string1, string2) == 0)
828 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
829 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
830 | (1 << PIC_OFFSET_TABLE_REGNUM)))
832 /* Initialization code. */
836 const char *const name
;
837 enum processor_type core
;
839 const unsigned long flags
;
840 const struct tune_params
*const tune
;
844 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
845 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
850 const struct tune_params arm_slowmul_tune
=
852 arm_slowmul_rtx_costs
,
855 ARM_PREFETCH_NOT_BENEFICIAL
858 const struct tune_params arm_fastmul_tune
=
860 arm_fastmul_rtx_costs
,
863 ARM_PREFETCH_NOT_BENEFICIAL
866 const struct tune_params arm_xscale_tune
=
868 arm_xscale_rtx_costs
,
869 xscale_sched_adjust_cost
,
871 ARM_PREFETCH_NOT_BENEFICIAL
874 const struct tune_params arm_9e_tune
=
879 ARM_PREFETCH_NOT_BENEFICIAL
882 const struct tune_params arm_cortex_a9_tune
=
885 cortex_a9_sched_adjust_cost
,
887 ARM_PREFETCH_BENEFICIAL(4,32,32)
890 const struct tune_params arm_fa726te_tune
=
893 fa726te_sched_adjust_cost
,
895 ARM_PREFETCH_NOT_BENEFICIAL
899 /* Not all of these give usefully different compilation alternatives,
900 but there is no simple way of generalizing them. */
901 static const struct processors all_cores
[] =
904 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
905 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
906 #include "arm-cores.def"
908 {NULL
, arm_none
, NULL
, 0, NULL
}
911 static const struct processors all_architectures
[] =
913 /* ARM Architectures */
914 /* We don't specify tuning costs here as it will be figured out
917 {"armv2", arm2
, "2", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH2
, NULL
},
918 {"armv2a", arm2
, "2", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH2
, NULL
},
919 {"armv3", arm6
, "3", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH3
, NULL
},
920 {"armv3m", arm7m
, "3M", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH3M
, NULL
},
921 {"armv4", arm7tdmi
, "4", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH4
, NULL
},
922 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
923 implementations that support it, so we will leave it out for now. */
924 {"armv4t", arm7tdmi
, "4T", FL_CO_PROC
| FL_FOR_ARCH4T
, NULL
},
925 {"armv5", arm10tdmi
, "5", FL_CO_PROC
| FL_FOR_ARCH5
, NULL
},
926 {"armv5t", arm10tdmi
, "5T", FL_CO_PROC
| FL_FOR_ARCH5T
, NULL
},
927 {"armv5e", arm1026ejs
, "5E", FL_CO_PROC
| FL_FOR_ARCH5E
, NULL
},
928 {"armv5te", arm1026ejs
, "5TE", FL_CO_PROC
| FL_FOR_ARCH5TE
, NULL
},
929 {"armv6", arm1136js
, "6", FL_CO_PROC
| FL_FOR_ARCH6
, NULL
},
930 {"armv6j", arm1136js
, "6J", FL_CO_PROC
| FL_FOR_ARCH6J
, NULL
},
931 {"armv6k", mpcore
, "6K", FL_CO_PROC
| FL_FOR_ARCH6K
, NULL
},
932 {"armv6z", arm1176jzs
, "6Z", FL_CO_PROC
| FL_FOR_ARCH6Z
, NULL
},
933 {"armv6zk", arm1176jzs
, "6ZK", FL_CO_PROC
| FL_FOR_ARCH6ZK
, NULL
},
934 {"armv6t2", arm1156t2s
, "6T2", FL_CO_PROC
| FL_FOR_ARCH6T2
, NULL
},
935 {"armv6-m", cortexm1
, "6M", FL_FOR_ARCH6M
, NULL
},
936 {"armv7", cortexa8
, "7", FL_CO_PROC
| FL_FOR_ARCH7
, NULL
},
937 {"armv7-a", cortexa8
, "7A", FL_CO_PROC
| FL_FOR_ARCH7A
, NULL
},
938 {"armv7-r", cortexr4
, "7R", FL_CO_PROC
| FL_FOR_ARCH7R
, NULL
},
939 {"armv7-m", cortexm3
, "7M", FL_CO_PROC
| FL_FOR_ARCH7M
, NULL
},
940 {"armv7e-m", cortexm4
, "7EM", FL_CO_PROC
| FL_FOR_ARCH7EM
, NULL
},
941 {"ep9312", ep9312
, "4T", FL_LDSCHED
| FL_CIRRUS
| FL_FOR_ARCH4
, NULL
},
942 {"iwmmxt", iwmmxt
, "5TE", FL_LDSCHED
| FL_STRONG
| FL_FOR_ARCH5TE
| FL_XSCALE
| FL_IWMMXT
, NULL
},
943 {"iwmmxt2", iwmmxt2
, "5TE", FL_LDSCHED
| FL_STRONG
| FL_FOR_ARCH5TE
| FL_XSCALE
| FL_IWMMXT
, NULL
},
944 {NULL
, arm_none
, NULL
, 0 , NULL
}
948 /* These are populated as commandline arguments are processed, or NULL
950 static const struct processors
*arm_selected_arch
;
951 static const struct processors
*arm_selected_cpu
;
952 static const struct processors
*arm_selected_tune
;
954 /* The name of the preprocessor macro to define for this architecture. */
956 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
958 /* Available values for -mfpu=. */
960 static const struct arm_fpu_desc all_fpus
[] =
962 {"fpa", ARM_FP_MODEL_FPA
, 0, VFP_NONE
, false, false},
963 {"fpe2", ARM_FP_MODEL_FPA
, 2, VFP_NONE
, false, false},
964 {"fpe3", ARM_FP_MODEL_FPA
, 3, VFP_NONE
, false, false},
965 {"maverick", ARM_FP_MODEL_MAVERICK
, 0, VFP_NONE
, false, false},
966 {"vfp", ARM_FP_MODEL_VFP
, 2, VFP_REG_D16
, false, false},
967 {"vfpv3", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, false},
968 {"vfpv3-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, true},
969 {"vfpv3-d16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D16
, false, false},
970 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D16
, false, true},
971 {"vfpv3xd", ARM_FP_MODEL_VFP
, 3, VFP_REG_SINGLE
, false, false},
972 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_SINGLE
, false, true},
973 {"neon", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, true , false},
974 {"neon-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, true , true },
975 {"vfpv4", ARM_FP_MODEL_VFP
, 4, VFP_REG_D32
, false, true},
976 {"vfpv4-d16", ARM_FP_MODEL_VFP
, 4, VFP_REG_D16
, false, true},
977 {"fpv4-sp-d16", ARM_FP_MODEL_VFP
, 4, VFP_REG_SINGLE
, false, true},
978 {"neon-vfpv4", ARM_FP_MODEL_VFP
, 4, VFP_REG_D32
, true, true},
979 /* Compatibility aliases. */
980 {"vfp3", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, false},
987 enum float_abi_type abi_type
;
991 /* Available values for -mfloat-abi=. */
993 static const struct float_abi all_float_abis
[] =
995 {"soft", ARM_FLOAT_ABI_SOFT
},
996 {"softfp", ARM_FLOAT_ABI_SOFTFP
},
997 {"hard", ARM_FLOAT_ABI_HARD
}
1004 enum arm_fp16_format_type fp16_format_type
;
1008 /* Available values for -mfp16-format=. */
1010 static const struct fp16_format all_fp16_formats
[] =
1012 {"none", ARM_FP16_FORMAT_NONE
},
1013 {"ieee", ARM_FP16_FORMAT_IEEE
},
1014 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE
}
1021 enum arm_abi_type abi_type
;
1025 /* Available values for -mabi=. */
1027 static const struct abi_name arm_all_abis
[] =
1029 {"apcs-gnu", ARM_ABI_APCS
},
1030 {"atpcs", ARM_ABI_ATPCS
},
1031 {"aapcs", ARM_ABI_AAPCS
},
1032 {"iwmmxt", ARM_ABI_IWMMXT
},
1033 {"aapcs-linux", ARM_ABI_AAPCS_LINUX
}
1036 /* Supported TLS relocations. */
1046 /* The maximum number of insns to be used when loading a constant. */
1048 arm_constant_limit (bool size_p
)
1050 return size_p
? 1 : current_tune
->constant_limit
;
1053 /* Emit an insn that's a simple single-set. Both the operands must be known
1056 emit_set_insn (rtx x
, rtx y
)
1058 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
1061 /* Return the number of bits set in VALUE. */
1063 bit_count (unsigned long value
)
1065 unsigned long count
= 0;
1070 value
&= value
- 1; /* Clear the least-significant set bit. */
1076 /* Set up library functions unique to ARM. */
1079 arm_init_libfuncs (void)
1081 /* There are no special library functions unless we are using the
1086 /* The functions below are described in Section 4 of the "Run-Time
1087 ABI for the ARM architecture", Version 1.0. */
1089 /* Double-precision floating-point arithmetic. Table 2. */
1090 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
1091 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
1092 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
1093 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
1094 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
1096 /* Double-precision comparisons. Table 3. */
1097 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
1098 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
1099 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
1100 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
1101 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
1102 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
1103 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
1105 /* Single-precision floating-point arithmetic. Table 4. */
1106 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
1107 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
1108 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
1109 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
1110 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
1112 /* Single-precision comparisons. Table 5. */
1113 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1114 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1115 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1116 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1117 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1118 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1119 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1121 /* Floating-point to integer conversions. Table 6. */
1122 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1123 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1124 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1125 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1126 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1127 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1128 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1129 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1131 /* Conversions between floating types. Table 7. */
1132 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1133 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1135 /* Integer to floating-point conversions. Table 8. */
1136 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1137 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1138 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1139 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1140 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1141 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1142 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1143 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1145 /* Long long. Table 9. */
1146 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1147 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1148 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1149 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1150 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1151 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1152 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1153 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1155 /* Integer (32/32->32) division. \S 4.3.1. */
1156 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1157 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1159 /* The divmod functions are designed so that they can be used for
1160 plain division, even though they return both the quotient and the
1161 remainder. The quotient is returned in the usual location (i.e.,
1162 r0 for SImode, {r0, r1} for DImode), just as would be expected
1163 for an ordinary division routine. Because the AAPCS calling
1164 conventions specify that all of { r0, r1, r2, r3 } are
1165 callee-saved registers, there is no need to tell the compiler
1166 explicitly that those registers are clobbered by these
1168 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1169 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1171 /* For SImode division the ABI provides div-without-mod routines,
1172 which are faster. */
1173 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1174 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1176 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1177 divmod libcalls instead. */
1178 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1179 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1180 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1181 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1183 /* Half-precision float operations. The compiler handles all operations
1184 with NULL libfuncs by converting the SFmode. */
1185 switch (arm_fp16_format
)
1187 case ARM_FP16_FORMAT_IEEE
:
1188 case ARM_FP16_FORMAT_ALTERNATIVE
:
1191 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1192 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1194 : "__gnu_f2h_alternative"));
1195 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
1196 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1198 : "__gnu_h2f_alternative"));
1201 set_optab_libfunc (add_optab
, HFmode
, NULL
);
1202 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
1203 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
1204 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
1205 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
1208 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
1209 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
1210 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
1211 set_optab_libfunc (le_optab
, HFmode
, NULL
);
1212 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
1213 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
1214 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
1221 if (TARGET_AAPCS_BASED
)
1222 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
1225 /* On AAPCS systems, this is the "struct __va_list". */
1226 static GTY(()) tree va_list_type
;
1228 /* Return the type to use as __builtin_va_list. */
1230 arm_build_builtin_va_list (void)
1235 if (!TARGET_AAPCS_BASED
)
1236 return std_build_builtin_va_list ();
1238 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1246 The C Library ABI further reinforces this definition in \S
1249 We must follow this definition exactly. The structure tag
1250 name is visible in C++ mangled names, and thus forms a part
1251 of the ABI. The field name may be used by people who
1252 #include <stdarg.h>. */
1253 /* Create the type. */
1254 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1255 /* Give it the required name. */
1256 va_list_name
= build_decl (BUILTINS_LOCATION
,
1258 get_identifier ("__va_list"),
1260 DECL_ARTIFICIAL (va_list_name
) = 1;
1261 TYPE_NAME (va_list_type
) = va_list_name
;
1262 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
1263 /* Create the __ap field. */
1264 ap_field
= build_decl (BUILTINS_LOCATION
,
1266 get_identifier ("__ap"),
1268 DECL_ARTIFICIAL (ap_field
) = 1;
1269 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
1270 TYPE_FIELDS (va_list_type
) = ap_field
;
1271 /* Compute its layout. */
1272 layout_type (va_list_type
);
1274 return va_list_type
;
1277 /* Return an expression of type "void *" pointing to the next
1278 available argument in a variable-argument list. VALIST is the
1279 user-level va_list object, of type __builtin_va_list. */
1281 arm_extract_valist_ptr (tree valist
)
1283 if (TREE_TYPE (valist
) == error_mark_node
)
1284 return error_mark_node
;
1286 /* On an AAPCS target, the pointer is stored within "struct
1288 if (TARGET_AAPCS_BASED
)
1290 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
1291 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
1292 valist
, ap_field
, NULL_TREE
);
1298 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1300 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
1302 valist
= arm_extract_valist_ptr (valist
);
1303 std_expand_builtin_va_start (valist
, nextarg
);
1306 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1308 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
1311 valist
= arm_extract_valist_ptr (valist
);
1312 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
1315 /* Lookup NAME in SEL. */
1317 static const struct processors
*
1318 arm_find_cpu (const char *name
, const struct processors
*sel
, const char *desc
)
1320 if (!(name
&& *name
))
1323 for (; sel
->name
!= NULL
; sel
++)
1325 if (streq (name
, sel
->name
))
1329 error ("bad value (%s) for %s switch", name
, desc
);
1333 /* Implement TARGET_HANDLE_OPTION. */
1336 arm_handle_option (size_t code
, const char *arg
, int value ATTRIBUTE_UNUSED
)
1341 arm_selected_arch
= arm_find_cpu(arg
, all_architectures
, "-march");
1345 arm_selected_cpu
= arm_find_cpu(arg
, all_cores
, "-mcpu");
1348 case OPT_mhard_float
:
1349 target_float_abi_name
= "hard";
1352 case OPT_msoft_float
:
1353 target_float_abi_name
= "soft";
1357 arm_selected_tune
= arm_find_cpu(arg
, all_cores
, "-mtune");
1366 arm_target_help (void)
1369 static int columns
= 0;
1372 /* If we have not done so already, obtain the desired maximum width of
1373 the output. Note - this is a duplication of the code at the start of
1374 gcc/opts.c:print_specific_help() - the two copies should probably be
1375 replaced by a single function. */
1380 p
= getenv ("COLUMNS");
1383 int value
= atoi (p
);
1390 /* Use a reasonable default. */
1394 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1396 /* The - 2 is because we know that the last entry in the array is NULL. */
1397 i
= ARRAY_SIZE (all_cores
) - 2;
1399 printf (" %s", all_cores
[i
].name
);
1400 remaining
= columns
- (strlen (all_cores
[i
].name
) + 4);
1401 gcc_assert (remaining
>= 0);
1405 int len
= strlen (all_cores
[i
].name
);
1407 if (remaining
> len
+ 2)
1409 printf (", %s", all_cores
[i
].name
);
1410 remaining
-= len
+ 2;
1416 printf ("\n %s", all_cores
[i
].name
);
1417 remaining
= columns
- (len
+ 4);
1421 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1423 i
= ARRAY_SIZE (all_architectures
) - 2;
1426 printf (" %s", all_architectures
[i
].name
);
1427 remaining
= columns
- (strlen (all_architectures
[i
].name
) + 4);
1428 gcc_assert (remaining
>= 0);
1432 int len
= strlen (all_architectures
[i
].name
);
1434 if (remaining
> len
+ 2)
1436 printf (", %s", all_architectures
[i
].name
);
1437 remaining
-= len
+ 2;
1443 printf ("\n %s", all_architectures
[i
].name
);
1444 remaining
= columns
- (len
+ 4);
1451 /* Fix up any incompatible options that the user has specified. */
1453 arm_option_override (void)
1457 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1458 SUBTARGET_OVERRIDE_OPTIONS
;
1461 if (arm_selected_arch
)
1463 if (arm_selected_cpu
)
1465 /* Check for conflict between mcpu and march. */
1466 if ((arm_selected_cpu
->flags
^ arm_selected_arch
->flags
) & ~FL_TUNE
)
1468 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1469 arm_selected_cpu
->name
, arm_selected_arch
->name
);
1470 /* -march wins for code generation.
1471 -mcpu wins for default tuning. */
1472 if (!arm_selected_tune
)
1473 arm_selected_tune
= arm_selected_cpu
;
1475 arm_selected_cpu
= arm_selected_arch
;
1479 arm_selected_arch
= NULL
;
1482 /* Pick a CPU based on the architecture. */
1483 arm_selected_cpu
= arm_selected_arch
;
1486 /* If the user did not specify a processor, choose one for them. */
1487 if (!arm_selected_cpu
)
1489 const struct processors
* sel
;
1490 unsigned int sought
;
1492 arm_selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
];
1493 if (!arm_selected_cpu
->name
)
1495 #ifdef SUBTARGET_CPU_DEFAULT
1496 /* Use the subtarget default CPU if none was specified by
1498 arm_selected_cpu
= &all_cores
[SUBTARGET_CPU_DEFAULT
];
1500 /* Default to ARM6. */
1501 if (!arm_selected_cpu
->name
)
1502 arm_selected_cpu
= &all_cores
[arm6
];
1505 sel
= arm_selected_cpu
;
1506 insn_flags
= sel
->flags
;
1508 /* Now check to see if the user has specified some command line
1509 switch that require certain abilities from the cpu. */
1512 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1514 sought
|= (FL_THUMB
| FL_MODE32
);
1516 /* There are no ARM processors that support both APCS-26 and
1517 interworking. Therefore we force FL_MODE26 to be removed
1518 from insn_flags here (if it was set), so that the search
1519 below will always be able to find a compatible processor. */
1520 insn_flags
&= ~FL_MODE26
;
1523 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1525 /* Try to locate a CPU type that supports all of the abilities
1526 of the default CPU, plus the extra abilities requested by
1528 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1529 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1532 if (sel
->name
== NULL
)
1534 unsigned current_bit_count
= 0;
1535 const struct processors
* best_fit
= NULL
;
1537 /* Ideally we would like to issue an error message here
1538 saying that it was not possible to find a CPU compatible
1539 with the default CPU, but which also supports the command
1540 line options specified by the programmer, and so they
1541 ought to use the -mcpu=<name> command line option to
1542 override the default CPU type.
1544 If we cannot find a cpu that has both the
1545 characteristics of the default cpu and the given
1546 command line options we scan the array again looking
1547 for a best match. */
1548 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1549 if ((sel
->flags
& sought
) == sought
)
1553 count
= bit_count (sel
->flags
& insn_flags
);
1555 if (count
>= current_bit_count
)
1558 current_bit_count
= count
;
1562 gcc_assert (best_fit
);
1566 arm_selected_cpu
= sel
;
1570 gcc_assert (arm_selected_cpu
);
1571 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1572 if (!arm_selected_tune
)
1573 arm_selected_tune
= &all_cores
[arm_selected_cpu
->core
];
1575 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", arm_selected_cpu
->arch
);
1576 insn_flags
= arm_selected_cpu
->flags
;
1578 arm_tune
= arm_selected_tune
->core
;
1579 tune_flags
= arm_selected_tune
->flags
;
1580 current_tune
= arm_selected_tune
->tune
;
1582 if (target_fp16_format_name
)
1584 for (i
= 0; i
< ARRAY_SIZE (all_fp16_formats
); i
++)
1586 if (streq (all_fp16_formats
[i
].name
, target_fp16_format_name
))
1588 arm_fp16_format
= all_fp16_formats
[i
].fp16_format_type
;
1592 if (i
== ARRAY_SIZE (all_fp16_formats
))
1593 error ("invalid __fp16 format option: -mfp16-format=%s",
1594 target_fp16_format_name
);
1597 arm_fp16_format
= ARM_FP16_FORMAT_NONE
;
1599 if (target_abi_name
)
1601 for (i
= 0; i
< ARRAY_SIZE (arm_all_abis
); i
++)
1603 if (streq (arm_all_abis
[i
].name
, target_abi_name
))
1605 arm_abi
= arm_all_abis
[i
].abi_type
;
1609 if (i
== ARRAY_SIZE (arm_all_abis
))
1610 error ("invalid ABI option: -mabi=%s", target_abi_name
);
1613 arm_abi
= ARM_DEFAULT_ABI
;
1615 /* Make sure that the processor choice does not conflict with any of the
1616 other command line choices. */
1617 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
1618 error ("target CPU does not support ARM mode");
1620 /* BPABI targets use linker tricks to allow interworking on cores
1621 without thumb support. */
1622 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
1624 warning (0, "target CPU does not support interworking" );
1625 target_flags
&= ~MASK_INTERWORK
;
1628 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
1630 warning (0, "target CPU does not support THUMB instructions");
1631 target_flags
&= ~MASK_THUMB
;
1634 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
1636 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1637 target_flags
&= ~MASK_APCS_FRAME
;
1640 /* Callee super interworking implies thumb interworking. Adding
1641 this to the flags here simplifies the logic elsewhere. */
1642 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
1643 target_flags
|= MASK_INTERWORK
;
1645 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1646 from here where no function is being compiled currently. */
1647 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
1648 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1650 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
1651 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1653 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
1655 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1656 target_flags
|= MASK_APCS_FRAME
;
1659 if (TARGET_POKE_FUNCTION_NAME
)
1660 target_flags
|= MASK_APCS_FRAME
;
1662 if (TARGET_APCS_REENT
&& flag_pic
)
1663 error ("-fpic and -mapcs-reent are incompatible");
1665 if (TARGET_APCS_REENT
)
1666 warning (0, "APCS reentrant code not supported. Ignored");
1668 /* If this target is normally configured to use APCS frames, warn if they
1669 are turned off and debugging is turned on. */
1671 && write_symbols
!= NO_DEBUG
1672 && !TARGET_APCS_FRAME
1673 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
1674 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1676 if (TARGET_APCS_FLOAT
)
1677 warning (0, "passing floating point arguments in fp regs not yet supported");
1679 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1680 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
1681 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
1682 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
1683 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
1684 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
1685 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
1686 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
1687 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
1688 arm_arch7
= (insn_flags
& FL_ARCH7
) != 0;
1689 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
1690 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
1691 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
1692 arm_arch_cirrus
= (insn_flags
& FL_CIRRUS
) != 0;
1694 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
1695 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
1696 thumb_code
= TARGET_ARM
== 0;
1697 thumb1_code
= TARGET_THUMB1
!= 0;
1698 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
1699 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
1700 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
1701 arm_arch_hwdiv
= (insn_flags
& FL_DIV
) != 0;
1702 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
1704 /* If we are not using the default (ARM mode) section anchor offset
1705 ranges, then set the correct ranges now. */
1708 /* Thumb-1 LDR instructions cannot have negative offsets.
1709 Permissible positive offset ranges are 5-bit (for byte loads),
1710 6-bit (for halfword loads), or 7-bit (for word loads).
1711 Empirical results suggest a 7-bit anchor range gives the best
1712 overall code size. */
1713 targetm
.min_anchor_offset
= 0;
1714 targetm
.max_anchor_offset
= 127;
1716 else if (TARGET_THUMB2
)
1718 /* The minimum is set such that the total size of the block
1719 for a particular anchor is 248 + 1 + 4095 bytes, which is
1720 divisible by eight, ensuring natural spacing of anchors. */
1721 targetm
.min_anchor_offset
= -248;
1722 targetm
.max_anchor_offset
= 4095;
1725 /* V5 code we generate is completely interworking capable, so we turn off
1726 TARGET_INTERWORK here to avoid many tests later on. */
1728 /* XXX However, we must pass the right pre-processor defines to CPP
1729 or GLD can get confused. This is a hack. */
1730 if (TARGET_INTERWORK
)
1731 arm_cpp_interwork
= 1;
1734 target_flags
&= ~MASK_INTERWORK
;
1736 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
1737 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1739 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
1740 error ("iwmmxt abi requires an iwmmxt capable cpu");
1742 if (target_fpu_name
== NULL
&& target_fpe_name
!= NULL
)
1744 if (streq (target_fpe_name
, "2"))
1745 target_fpu_name
= "fpe2";
1746 else if (streq (target_fpe_name
, "3"))
1747 target_fpu_name
= "fpe3";
1749 error ("invalid floating point emulation option: -mfpe=%s",
1753 if (target_fpu_name
== NULL
)
1755 #ifdef FPUTYPE_DEFAULT
1756 target_fpu_name
= FPUTYPE_DEFAULT
;
1758 if (arm_arch_cirrus
)
1759 target_fpu_name
= "maverick";
1761 target_fpu_name
= "fpe2";
1765 arm_fpu_desc
= NULL
;
1766 for (i
= 0; i
< ARRAY_SIZE (all_fpus
); i
++)
1768 if (streq (all_fpus
[i
].name
, target_fpu_name
))
1770 arm_fpu_desc
= &all_fpus
[i
];
1777 error ("invalid floating point option: -mfpu=%s", target_fpu_name
);
1781 switch (arm_fpu_desc
->model
)
1783 case ARM_FP_MODEL_FPA
:
1784 if (arm_fpu_desc
->rev
== 2)
1785 arm_fpu_attr
= FPU_FPE2
;
1786 else if (arm_fpu_desc
->rev
== 3)
1787 arm_fpu_attr
= FPU_FPE3
;
1789 arm_fpu_attr
= FPU_FPA
;
1792 case ARM_FP_MODEL_MAVERICK
:
1793 arm_fpu_attr
= FPU_MAVERICK
;
1796 case ARM_FP_MODEL_VFP
:
1797 arm_fpu_attr
= FPU_VFP
;
1804 if (target_float_abi_name
!= NULL
)
1806 /* The user specified a FP ABI. */
1807 for (i
= 0; i
< ARRAY_SIZE (all_float_abis
); i
++)
1809 if (streq (all_float_abis
[i
].name
, target_float_abi_name
))
1811 arm_float_abi
= all_float_abis
[i
].abi_type
;
1815 if (i
== ARRAY_SIZE (all_float_abis
))
1816 error ("invalid floating point abi: -mfloat-abi=%s",
1817 target_float_abi_name
);
1820 arm_float_abi
= TARGET_DEFAULT_FLOAT_ABI
;
1822 if (TARGET_AAPCS_BASED
1823 && (arm_fpu_desc
->model
== ARM_FP_MODEL_FPA
))
1824 error ("FPA is unsupported in the AAPCS");
1826 if (TARGET_AAPCS_BASED
)
1828 if (TARGET_CALLER_INTERWORKING
)
1829 error ("AAPCS does not support -mcaller-super-interworking");
1831 if (TARGET_CALLEE_INTERWORKING
)
1832 error ("AAPCS does not support -mcallee-super-interworking");
1835 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1836 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1837 will ever exist. GCC makes no attempt to support this combination. */
1838 if (TARGET_IWMMXT
&& !TARGET_SOFT_FLOAT
)
1839 sorry ("iWMMXt and hardware floating point");
1841 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1842 if (TARGET_THUMB2
&& TARGET_IWMMXT
)
1843 sorry ("Thumb-2 iWMMXt");
1845 /* __fp16 support currently assumes the core has ldrh. */
1846 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
1847 sorry ("__fp16 and no ldrh");
1849 /* If soft-float is specified then don't use FPU. */
1850 if (TARGET_SOFT_FLOAT
)
1851 arm_fpu_attr
= FPU_NONE
;
1853 if (TARGET_AAPCS_BASED
)
1855 if (arm_abi
== ARM_ABI_IWMMXT
)
1856 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
1857 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
1858 && TARGET_HARD_FLOAT
1860 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
1862 arm_pcs_default
= ARM_PCS_AAPCS
;
1866 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
1867 sorry ("-mfloat-abi=hard and VFP");
1869 if (arm_abi
== ARM_ABI_APCS
)
1870 arm_pcs_default
= ARM_PCS_APCS
;
1872 arm_pcs_default
= ARM_PCS_ATPCS
;
1875 /* For arm2/3 there is no need to do any scheduling if there is only
1876 a floating point emulator, or we are doing software floating-point. */
1877 if ((TARGET_SOFT_FLOAT
1878 || (TARGET_FPA
&& arm_fpu_desc
->rev
))
1879 && (tune_flags
& FL_MODE32
) == 0)
1880 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
1882 if (target_thread_switch
)
1884 if (strcmp (target_thread_switch
, "soft") == 0)
1885 target_thread_pointer
= TP_SOFT
;
1886 else if (strcmp (target_thread_switch
, "auto") == 0)
1887 target_thread_pointer
= TP_AUTO
;
1888 else if (strcmp (target_thread_switch
, "cp15") == 0)
1889 target_thread_pointer
= TP_CP15
;
1891 error ("invalid thread pointer option: -mtp=%s", target_thread_switch
);
1894 /* Use the cp15 method if it is available. */
1895 if (target_thread_pointer
== TP_AUTO
)
1897 if (arm_arch6k
&& !TARGET_THUMB1
)
1898 target_thread_pointer
= TP_CP15
;
1900 target_thread_pointer
= TP_SOFT
;
1903 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
1904 error ("can not use -mtp=cp15 with 16-bit Thumb");
1906 /* Override the default structure alignment for AAPCS ABI. */
1907 if (TARGET_AAPCS_BASED
)
1908 arm_structure_size_boundary
= 8;
1910 if (structure_size_string
!= NULL
)
1912 int size
= strtol (structure_size_string
, NULL
, 0);
1914 if (size
== 8 || size
== 32
1915 || (ARM_DOUBLEWORD_ALIGN
&& size
== 64))
1916 arm_structure_size_boundary
= size
;
1918 warning (0, "structure size boundary can only be set to %s",
1919 ARM_DOUBLEWORD_ALIGN
? "8, 32 or 64": "8 or 32");
1922 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
1924 error ("RTP PIC is incompatible with Thumb");
1928 /* If stack checking is disabled, we can use r10 as the PIC register,
1929 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1930 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
1932 if (TARGET_VXWORKS_RTP
)
1933 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1934 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
1937 if (flag_pic
&& TARGET_VXWORKS_RTP
)
1938 arm_pic_register
= 9;
1940 if (arm_pic_register_string
!= NULL
)
1942 int pic_register
= decode_reg_name (arm_pic_register_string
);
1945 warning (0, "-mpic-register= is useless without -fpic");
1947 /* Prevent the user from choosing an obviously stupid PIC register. */
1948 else if (pic_register
< 0 || call_used_regs
[pic_register
]
1949 || pic_register
== HARD_FRAME_POINTER_REGNUM
1950 || pic_register
== STACK_POINTER_REGNUM
1951 || pic_register
>= PC_REGNUM
1952 || (TARGET_VXWORKS_RTP
1953 && (unsigned int) pic_register
!= arm_pic_register
))
1954 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
1956 arm_pic_register
= pic_register
;
1959 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1960 if (fix_cm3_ldrd
== 2)
1962 if (arm_selected_cpu
->core
== cortexm3
)
1968 if (TARGET_THUMB1
&& flag_schedule_insns
)
1970 /* Don't warn since it's on by default in -O2. */
1971 flag_schedule_insns
= 0;
1976 /* If optimizing for size, bump the number of instructions that we
1977 are prepared to conditionally execute (even on a StrongARM). */
1978 max_insns_skipped
= 6;
1982 /* StrongARM has early execution of branches, so a sequence
1983 that is worth skipping is shorter. */
1984 if (arm_tune_strongarm
)
1985 max_insns_skipped
= 3;
1988 /* Hot/Cold partitioning is not currently supported, since we can't
1989 handle literal pool placement in that case. */
1990 if (flag_reorder_blocks_and_partition
)
1992 inform (input_location
,
1993 "-freorder-blocks-and-partition not supported on this architecture");
1994 flag_reorder_blocks_and_partition
= 0;
1995 flag_reorder_blocks
= 1;
1999 /* Hoisting PIC address calculations more aggressively provides a small,
2000 but measurable, size reduction for PIC code. Therefore, we decrease
2001 the bar for unrestricted expression hoisting to the cost of PIC address
2002 calculation, which is 2 instructions. */
2003 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST
, 2,
2004 global_options
.x_param_values
,
2005 global_options_set
.x_param_values
);
2007 /* ARM EABI defaults to strict volatile bitfields. */
2008 if (TARGET_AAPCS_BASED
&& flag_strict_volatile_bitfields
< 0)
2009 flag_strict_volatile_bitfields
= 1;
2011 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2012 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2013 if (flag_prefetch_loop_arrays
< 0
2016 && current_tune
->num_prefetch_slots
> 0)
2017 flag_prefetch_loop_arrays
= 1;
2019 /* Set up parameters to be used in prefetching algorithm. Do not override the
2020 defaults unless we are tuning for a core we have researched values for. */
2021 if (current_tune
->num_prefetch_slots
> 0)
2022 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES
,
2023 current_tune
->num_prefetch_slots
,
2024 global_options
.x_param_values
,
2025 global_options_set
.x_param_values
);
2026 if (current_tune
->l1_cache_line_size
>= 0)
2027 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE
,
2028 current_tune
->l1_cache_line_size
,
2029 global_options
.x_param_values
,
2030 global_options_set
.x_param_values
);
2031 if (current_tune
->l1_cache_size
>= 0)
2032 maybe_set_param_value (PARAM_L1_CACHE_SIZE
,
2033 current_tune
->l1_cache_size
,
2034 global_options
.x_param_values
,
2035 global_options_set
.x_param_values
);
2037 /* Register global variables with the garbage collector. */
2038 arm_add_gc_roots ();
2042 arm_add_gc_roots (void)
2044 gcc_obstack_init(&minipool_obstack
);
2045 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
2048 /* A table of known ARM exception types.
2049 For use with the interrupt function attribute. */
2053 const char *const arg
;
2054 const unsigned long return_value
;
2058 static const isr_attribute_arg isr_attribute_args
[] =
2060 { "IRQ", ARM_FT_ISR
},
2061 { "irq", ARM_FT_ISR
},
2062 { "FIQ", ARM_FT_FIQ
},
2063 { "fiq", ARM_FT_FIQ
},
2064 { "ABORT", ARM_FT_ISR
},
2065 { "abort", ARM_FT_ISR
},
2066 { "ABORT", ARM_FT_ISR
},
2067 { "abort", ARM_FT_ISR
},
2068 { "UNDEF", ARM_FT_EXCEPTION
},
2069 { "undef", ARM_FT_EXCEPTION
},
2070 { "SWI", ARM_FT_EXCEPTION
},
2071 { "swi", ARM_FT_EXCEPTION
},
2072 { NULL
, ARM_FT_NORMAL
}
2075 /* Returns the (interrupt) function type of the current
2076 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
2078 static unsigned long
2079 arm_isr_value (tree argument
)
2081 const isr_attribute_arg
* ptr
;
2085 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
2087 /* No argument - default to IRQ. */
2088 if (argument
== NULL_TREE
)
2091 /* Get the value of the argument. */
2092 if (TREE_VALUE (argument
) == NULL_TREE
2093 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
2094 return ARM_FT_UNKNOWN
;
2096 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
2098 /* Check it against the list of known arguments. */
2099 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
2100 if (streq (arg
, ptr
->arg
))
2101 return ptr
->return_value
;
2103 /* An unrecognized interrupt type. */
2104 return ARM_FT_UNKNOWN
;
2107 /* Computes the type of the current function. */
2109 static unsigned long
2110 arm_compute_func_type (void)
2112 unsigned long type
= ARM_FT_UNKNOWN
;
2116 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
2118 /* Decide if the current function is volatile. Such functions
2119 never return, and many memory cycles can be saved by not storing
2120 register values that will never be needed again. This optimization
2121 was added to speed up context switching in a kernel application. */
2123 && (TREE_NOTHROW (current_function_decl
)
2124 || !(flag_unwind_tables
2126 && arm_except_unwind_info (&global_options
) != UI_SJLJ
)))
2127 && TREE_THIS_VOLATILE (current_function_decl
))
2128 type
|= ARM_FT_VOLATILE
;
2130 if (cfun
->static_chain_decl
!= NULL
)
2131 type
|= ARM_FT_NESTED
;
2133 attr
= DECL_ATTRIBUTES (current_function_decl
);
2135 a
= lookup_attribute ("naked", attr
);
2137 type
|= ARM_FT_NAKED
;
2139 a
= lookup_attribute ("isr", attr
);
2141 a
= lookup_attribute ("interrupt", attr
);
2144 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2146 type
|= arm_isr_value (TREE_VALUE (a
));
2151 /* Returns the type of the current function. */
2154 arm_current_func_type (void)
2156 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2157 cfun
->machine
->func_type
= arm_compute_func_type ();
2159 return cfun
->machine
->func_type
;
2163 arm_allocate_stack_slots_for_args (void)
2165 /* Naked functions should not allocate stack slots for arguments. */
2166 return !IS_NAKED (arm_current_func_type ());
2170 /* Output assembler code for a block containing the constant parts
2171 of a trampoline, leaving space for the variable parts.
2173 On the ARM, (if r8 is the static chain regnum, and remembering that
2174 referencing pc adds an offset of 8) the trampoline looks like:
2177 .word static chain value
2178 .word function's address
2179 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2182 arm_asm_trampoline_template (FILE *f
)
2186 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2187 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2189 else if (TARGET_THUMB2
)
2191 /* The Thumb-2 trampoline is similar to the arm implementation.
2192 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2193 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2194 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2195 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2199 ASM_OUTPUT_ALIGN (f
, 2);
2200 fprintf (f
, "\t.code\t16\n");
2201 fprintf (f
, ".Ltrampoline_start:\n");
2202 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2203 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2204 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2205 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2206 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2207 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2209 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2210 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2213 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2216 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
2218 rtx fnaddr
, mem
, a_tramp
;
2220 emit_block_move (m_tramp
, assemble_trampoline_template (),
2221 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
2223 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
2224 emit_move_insn (mem
, chain_value
);
2226 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
2227 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
2228 emit_move_insn (mem
, fnaddr
);
2230 a_tramp
= XEXP (m_tramp
, 0);
2231 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
2232 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
2233 plus_constant (a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
2236 /* Thumb trampolines should be entered in thumb mode, so set
2237 the bottom bit of the address. */
2240 arm_trampoline_adjust_address (rtx addr
)
2243 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
2244 NULL
, 0, OPTAB_LIB_WIDEN
);
2248 /* Return 1 if it is possible to return using a single instruction.
2249 If SIBLING is non-null, this is a test for a return before a sibling
2250 call. SIBLING is the call insn, so we can examine its register usage. */
2253 use_return_insn (int iscond
, rtx sibling
)
2256 unsigned int func_type
;
2257 unsigned long saved_int_regs
;
2258 unsigned HOST_WIDE_INT stack_adjust
;
2259 arm_stack_offsets
*offsets
;
2261 /* Never use a return instruction before reload has run. */
2262 if (!reload_completed
)
2265 func_type
= arm_current_func_type ();
2267 /* Naked, volatile and stack alignment functions need special
2269 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
2272 /* So do interrupt functions that use the frame pointer and Thumb
2273 interrupt functions. */
2274 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
2277 offsets
= arm_get_frame_offsets ();
2278 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
2280 /* As do variadic functions. */
2281 if (crtl
->args
.pretend_args_size
2282 || cfun
->machine
->uses_anonymous_args
2283 /* Or if the function calls __builtin_eh_return () */
2284 || crtl
->calls_eh_return
2285 /* Or if the function calls alloca */
2286 || cfun
->calls_alloca
2287 /* Or if there is a stack adjustment. However, if the stack pointer
2288 is saved on the stack, we can use a pre-incrementing stack load. */
2289 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
2290 && stack_adjust
== 4)))
2293 saved_int_regs
= offsets
->saved_regs_mask
;
2295 /* Unfortunately, the insn
2297 ldmib sp, {..., sp, ...}
2299 triggers a bug on most SA-110 based devices, such that the stack
2300 pointer won't be correctly restored if the instruction takes a
2301 page fault. We work around this problem by popping r3 along with
2302 the other registers, since that is never slower than executing
2303 another instruction.
2305 We test for !arm_arch5 here, because code for any architecture
2306 less than this could potentially be run on one of the buggy
2308 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
2310 /* Validate that r3 is a call-clobbered register (always true in
2311 the default abi) ... */
2312 if (!call_used_regs
[3])
2315 /* ... that it isn't being used for a return value ... */
2316 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
2319 /* ... or for a tail-call argument ... */
2322 gcc_assert (GET_CODE (sibling
) == CALL_INSN
);
2324 if (find_regno_fusage (sibling
, USE
, 3))
2328 /* ... and that there are no call-saved registers in r0-r2
2329 (always true in the default ABI). */
2330 if (saved_int_regs
& 0x7)
2334 /* Can't be done if interworking with Thumb, and any registers have been
2336 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
2339 /* On StrongARM, conditional returns are expensive if they aren't
2340 taken and multiple registers have been stacked. */
2341 if (iscond
&& arm_tune_strongarm
)
2343 /* Conditional return when just the LR is stored is a simple
2344 conditional-load instruction, that's not expensive. */
2345 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
2349 && arm_pic_register
!= INVALID_REGNUM
2350 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
2354 /* If there are saved registers but the LR isn't saved, then we need
2355 two instructions for the return. */
2356 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
2359 /* Can't be done if any of the FPA regs are pushed,
2360 since this also requires an insn. */
2361 if (TARGET_HARD_FLOAT
&& TARGET_FPA
)
2362 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
2363 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2366 /* Likewise VFP regs. */
2367 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
2368 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
2369 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2372 if (TARGET_REALLY_IWMMXT
)
2373 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
2374 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2380 /* Return TRUE if int I is a valid immediate ARM constant. */
2383 const_ok_for_arm (HOST_WIDE_INT i
)
2387 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2388 be all zero, or all one. */
2389 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
2390 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
2391 != ((~(unsigned HOST_WIDE_INT
) 0)
2392 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
2395 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
2397 /* Fast return for 0 and small values. We must do this for zero, since
2398 the code below can't handle that one case. */
2399 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
2402 /* Get the number of trailing zeros. */
2403 lowbit
= ffs((int) i
) - 1;
2405 /* Only even shifts are allowed in ARM mode so round down to the
2406 nearest even number. */
2410 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
2415 /* Allow rotated constants in ARM mode. */
2417 && ((i
& ~0xc000003f) == 0
2418 || (i
& ~0xf000000f) == 0
2419 || (i
& ~0xfc000003) == 0))
2426 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2429 if (i
== v
|| i
== (v
| (v
<< 8)))
2432 /* Allow repeated pattern 0xXY00XY00. */
2442 /* Return true if I is a valid constant for the operation CODE. */
2444 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
2446 if (const_ok_for_arm (i
))
2470 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
2472 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
2478 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2482 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2489 /* Emit a sequence of insns to handle a large constant.
2490 CODE is the code of the operation required, it can be any of SET, PLUS,
2491 IOR, AND, XOR, MINUS;
2492 MODE is the mode in which the operation is being performed;
2493 VAL is the integer to operate on;
2494 SOURCE is the other operand (a register, or a null-pointer for SET);
2495 SUBTARGETS means it is safe to create scratch registers if that will
2496 either produce a simpler sequence, or we will want to cse the values.
2497 Return value is the number of insns emitted. */
2499 /* ??? Tweak this for thumb2. */
2501 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
2502 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
2506 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
2507 cond
= COND_EXEC_TEST (PATTERN (insn
));
2511 if (subtargets
|| code
== SET
2512 || (GET_CODE (target
) == REG
&& GET_CODE (source
) == REG
2513 && REGNO (target
) != REGNO (source
)))
2515 /* After arm_reorg has been called, we can't fix up expensive
2516 constants by pushing them into memory so we must synthesize
2517 them in-line, regardless of the cost. This is only likely to
2518 be more costly on chips that have load delay slots and we are
2519 compiling without running the scheduler (so no splitting
2520 occurred before the final instruction emission).
2522 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2524 if (!after_arm_reorg
2526 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
2528 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
2533 /* Currently SET is the only monadic value for CODE, all
2534 the rest are diadic. */
2535 if (TARGET_USE_MOVT
)
2536 arm_emit_movpair (target
, GEN_INT (val
));
2538 emit_set_insn (target
, GEN_INT (val
));
2544 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
2546 if (TARGET_USE_MOVT
)
2547 arm_emit_movpair (temp
, GEN_INT (val
));
2549 emit_set_insn (temp
, GEN_INT (val
));
2551 /* For MINUS, the value is subtracted from, since we never
2552 have subtraction of a constant. */
2554 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
2556 emit_set_insn (target
,
2557 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
2563 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
2567 /* Return the number of instructions required to synthesize the given
2568 constant, if we start emitting them from bit-position I. */
2570 count_insns_for_constant (HOST_WIDE_INT remainder
, int i
)
2572 HOST_WIDE_INT temp1
;
2573 int step_size
= TARGET_ARM
? 2 : 1;
2576 gcc_assert (TARGET_ARM
|| i
== 0);
2584 if (remainder
& (((1 << step_size
) - 1) << (i
- step_size
)))
2589 temp1
= remainder
& ((0x0ff << end
)
2590 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
2591 remainder
&= ~temp1
;
2596 } while (remainder
);
2601 find_best_start (unsigned HOST_WIDE_INT remainder
)
2603 int best_consecutive_zeros
= 0;
2607 /* If we aren't targetting ARM, the best place to start is always at
2612 for (i
= 0; i
< 32; i
+= 2)
2614 int consecutive_zeros
= 0;
2616 if (!(remainder
& (3 << i
)))
2618 while ((i
< 32) && !(remainder
& (3 << i
)))
2620 consecutive_zeros
+= 2;
2623 if (consecutive_zeros
> best_consecutive_zeros
)
2625 best_consecutive_zeros
= consecutive_zeros
;
2626 best_start
= i
- consecutive_zeros
;
2632 /* So long as it won't require any more insns to do so, it's
2633 desirable to emit a small constant (in bits 0...9) in the last
2634 insn. This way there is more chance that it can be combined with
2635 a later addressing insn to form a pre-indexed load or store
2636 operation. Consider:
2638 *((volatile int *)0xe0000100) = 1;
2639 *((volatile int *)0xe0000110) = 2;
2641 We want this to wind up as:
2645 str rB, [rA, #0x100]
2647 str rB, [rA, #0x110]
2649 rather than having to synthesize both large constants from scratch.
2651 Therefore, we calculate how many insns would be required to emit
2652 the constant starting from `best_start', and also starting from
2653 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2654 yield a shorter sequence, we may as well use zero. */
2656 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < remainder
)
2657 && (count_insns_for_constant (remainder
, 0) <=
2658 count_insns_for_constant (remainder
, best_start
)))
2664 /* Emit an instruction with the indicated PATTERN. If COND is
2665 non-NULL, conditionalize the execution of the instruction on COND
2669 emit_constant_insn (rtx cond
, rtx pattern
)
2672 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
2673 emit_insn (pattern
);
2676 /* As above, but extra parameter GENERATE which, if clear, suppresses
2678 /* ??? This needs more work for thumb2. */
2681 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
2682 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
2687 int final_invert
= 0;
2688 int can_negate_initial
= 0;
2690 int num_bits_set
= 0;
2691 int set_sign_bit_copies
= 0;
2692 int clear_sign_bit_copies
= 0;
2693 int clear_zero_bit_copies
= 0;
2694 int set_zero_bit_copies
= 0;
2696 unsigned HOST_WIDE_INT temp1
, temp2
;
2697 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
2698 int step_size
= TARGET_ARM
? 2 : 1;
2700 /* Find out which operations are safe for a given CODE. Also do a quick
2701 check for degenerate cases; these can occur when DImode operations
2712 can_negate_initial
= 1;
2716 if (remainder
== 0xffffffff)
2719 emit_constant_insn (cond
,
2720 gen_rtx_SET (VOIDmode
, target
,
2721 GEN_INT (ARM_SIGN_EXTEND (val
))));
2727 if (reload_completed
&& rtx_equal_p (target
, source
))
2731 emit_constant_insn (cond
,
2732 gen_rtx_SET (VOIDmode
, target
, source
));
2744 emit_constant_insn (cond
,
2745 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
2748 if (remainder
== 0xffffffff)
2750 if (reload_completed
&& rtx_equal_p (target
, source
))
2753 emit_constant_insn (cond
,
2754 gen_rtx_SET (VOIDmode
, target
, source
));
2763 if (reload_completed
&& rtx_equal_p (target
, source
))
2766 emit_constant_insn (cond
,
2767 gen_rtx_SET (VOIDmode
, target
, source
));
2771 if (remainder
== 0xffffffff)
2774 emit_constant_insn (cond
,
2775 gen_rtx_SET (VOIDmode
, target
,
2776 gen_rtx_NOT (mode
, source
)));
2782 /* We treat MINUS as (val - source), since (source - val) is always
2783 passed as (source + (-val)). */
2787 emit_constant_insn (cond
,
2788 gen_rtx_SET (VOIDmode
, target
,
2789 gen_rtx_NEG (mode
, source
)));
2792 if (const_ok_for_arm (val
))
2795 emit_constant_insn (cond
,
2796 gen_rtx_SET (VOIDmode
, target
,
2797 gen_rtx_MINUS (mode
, GEN_INT (val
),
2809 /* If we can do it in one insn get out quickly. */
2810 if (const_ok_for_arm (val
)
2811 || (can_negate_initial
&& const_ok_for_arm (-val
))
2812 || (can_invert
&& const_ok_for_arm (~val
)))
2815 emit_constant_insn (cond
,
2816 gen_rtx_SET (VOIDmode
, target
,
2818 ? gen_rtx_fmt_ee (code
, mode
, source
,
2824 /* Calculate a few attributes that may be useful for specific
2826 /* Count number of leading zeros. */
2827 for (i
= 31; i
>= 0; i
--)
2829 if ((remainder
& (1 << i
)) == 0)
2830 clear_sign_bit_copies
++;
2835 /* Count number of leading 1's. */
2836 for (i
= 31; i
>= 0; i
--)
2838 if ((remainder
& (1 << i
)) != 0)
2839 set_sign_bit_copies
++;
2844 /* Count number of trailing zero's. */
2845 for (i
= 0; i
<= 31; i
++)
2847 if ((remainder
& (1 << i
)) == 0)
2848 clear_zero_bit_copies
++;
2853 /* Count number of trailing 1's. */
2854 for (i
= 0; i
<= 31; i
++)
2856 if ((remainder
& (1 << i
)) != 0)
2857 set_zero_bit_copies
++;
2865 /* See if we can use movw. */
2866 if (arm_arch_thumb2
&& (remainder
& 0xffff0000) == 0)
2869 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
2874 /* See if we can do this by sign_extending a constant that is known
2875 to be negative. This is a good, way of doing it, since the shift
2876 may well merge into a subsequent insn. */
2877 if (set_sign_bit_copies
> 1)
2879 if (const_ok_for_arm
2880 (temp1
= ARM_SIGN_EXTEND (remainder
2881 << (set_sign_bit_copies
- 1))))
2885 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2886 emit_constant_insn (cond
,
2887 gen_rtx_SET (VOIDmode
, new_src
,
2889 emit_constant_insn (cond
,
2890 gen_ashrsi3 (target
, new_src
,
2891 GEN_INT (set_sign_bit_copies
- 1)));
2895 /* For an inverted constant, we will need to set the low bits,
2896 these will be shifted out of harm's way. */
2897 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
2898 if (const_ok_for_arm (~temp1
))
2902 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2903 emit_constant_insn (cond
,
2904 gen_rtx_SET (VOIDmode
, new_src
,
2906 emit_constant_insn (cond
,
2907 gen_ashrsi3 (target
, new_src
,
2908 GEN_INT (set_sign_bit_copies
- 1)));
2914 /* See if we can calculate the value as the difference between two
2915 valid immediates. */
2916 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
2918 int topshift
= clear_sign_bit_copies
& ~1;
2920 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
2921 & (0xff000000 >> topshift
));
2923 /* If temp1 is zero, then that means the 9 most significant
2924 bits of remainder were 1 and we've caused it to overflow.
2925 When topshift is 0 we don't need to do anything since we
2926 can borrow from 'bit 32'. */
2927 if (temp1
== 0 && topshift
!= 0)
2928 temp1
= 0x80000000 >> (topshift
- 1);
2930 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
2932 if (const_ok_for_arm (temp2
))
2936 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2937 emit_constant_insn (cond
,
2938 gen_rtx_SET (VOIDmode
, new_src
,
2940 emit_constant_insn (cond
,
2941 gen_addsi3 (target
, new_src
,
2949 /* See if we can generate this by setting the bottom (or the top)
2950 16 bits, and then shifting these into the other half of the
2951 word. We only look for the simplest cases, to do more would cost
2952 too much. Be careful, however, not to generate this when the
2953 alternative would take fewer insns. */
2954 if (val
& 0xffff0000)
2956 temp1
= remainder
& 0xffff0000;
2957 temp2
= remainder
& 0x0000ffff;
2959 /* Overlaps outside this range are best done using other methods. */
2960 for (i
= 9; i
< 24; i
++)
2962 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
2963 && !const_ok_for_arm (temp2
))
2965 rtx new_src
= (subtargets
2966 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2968 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
2969 source
, subtargets
, generate
);
2977 gen_rtx_ASHIFT (mode
, source
,
2984 /* Don't duplicate cases already considered. */
2985 for (i
= 17; i
< 24; i
++)
2987 if (((temp1
| (temp1
>> i
)) == remainder
)
2988 && !const_ok_for_arm (temp1
))
2990 rtx new_src
= (subtargets
2991 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2993 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
2994 source
, subtargets
, generate
);
2999 gen_rtx_SET (VOIDmode
, target
,
3002 gen_rtx_LSHIFTRT (mode
, source
,
3013 /* If we have IOR or XOR, and the constant can be loaded in a
3014 single instruction, and we can find a temporary to put it in,
3015 then this can be done in two instructions instead of 3-4. */
3017 /* TARGET can't be NULL if SUBTARGETS is 0 */
3018 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
3020 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
3024 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3026 emit_constant_insn (cond
,
3027 gen_rtx_SET (VOIDmode
, sub
,
3029 emit_constant_insn (cond
,
3030 gen_rtx_SET (VOIDmode
, target
,
3031 gen_rtx_fmt_ee (code
, mode
,
3042 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
3043 and the remainder 0s for e.g. 0xfff00000)
3044 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
3046 This can be done in 2 instructions by using shifts with mov or mvn.
3051 mvn r0, r0, lsr #12 */
3052 if (set_sign_bit_copies
> 8
3053 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
3057 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3058 rtx shift
= GEN_INT (set_sign_bit_copies
);
3062 gen_rtx_SET (VOIDmode
, sub
,
3064 gen_rtx_ASHIFT (mode
,
3069 gen_rtx_SET (VOIDmode
, target
,
3071 gen_rtx_LSHIFTRT (mode
, sub
,
3078 x = y | constant (which has set_zero_bit_copies number of trailing ones).
3080 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
3082 For eg. r0 = r0 | 0xfff
3087 if (set_zero_bit_copies
> 8
3088 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
3092 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3093 rtx shift
= GEN_INT (set_zero_bit_copies
);
3097 gen_rtx_SET (VOIDmode
, sub
,
3099 gen_rtx_LSHIFTRT (mode
,
3104 gen_rtx_SET (VOIDmode
, target
,
3106 gen_rtx_ASHIFT (mode
, sub
,
3112 /* This will never be reached for Thumb2 because orn is a valid
3113 instruction. This is for Thumb1 and the ARM 32 bit cases.
3115 x = y | constant (such that ~constant is a valid constant)
3117 x = ~(~y & ~constant).
3119 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
3123 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
3124 emit_constant_insn (cond
,
3125 gen_rtx_SET (VOIDmode
, sub
,
3126 gen_rtx_NOT (mode
, source
)));
3129 sub
= gen_reg_rtx (mode
);
3130 emit_constant_insn (cond
,
3131 gen_rtx_SET (VOIDmode
, sub
,
3132 gen_rtx_AND (mode
, source
,
3134 emit_constant_insn (cond
,
3135 gen_rtx_SET (VOIDmode
, target
,
3136 gen_rtx_NOT (mode
, sub
)));
3143 /* See if two shifts will do 2 or more insn's worth of work. */
3144 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
3146 HOST_WIDE_INT shift_mask
= ((0xffffffff
3147 << (32 - clear_sign_bit_copies
))
3150 if ((remainder
| shift_mask
) != 0xffffffff)
3154 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3155 insns
= arm_gen_constant (AND
, mode
, cond
,
3156 remainder
| shift_mask
,
3157 new_src
, source
, subtargets
, 1);
3162 rtx targ
= subtargets
? NULL_RTX
: target
;
3163 insns
= arm_gen_constant (AND
, mode
, cond
,
3164 remainder
| shift_mask
,
3165 targ
, source
, subtargets
, 0);
3171 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3172 rtx shift
= GEN_INT (clear_sign_bit_copies
);
3174 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
3175 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
3181 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
3183 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
3185 if ((remainder
| shift_mask
) != 0xffffffff)
3189 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3191 insns
= arm_gen_constant (AND
, mode
, cond
,
3192 remainder
| shift_mask
,
3193 new_src
, source
, subtargets
, 1);
3198 rtx targ
= subtargets
? NULL_RTX
: target
;
3200 insns
= arm_gen_constant (AND
, mode
, cond
,
3201 remainder
| shift_mask
,
3202 targ
, source
, subtargets
, 0);
3208 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3209 rtx shift
= GEN_INT (clear_zero_bit_copies
);
3211 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
3212 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
3224 for (i
= 0; i
< 32; i
++)
3225 if (remainder
& (1 << i
))
3229 || (code
!= IOR
&& can_invert
&& num_bits_set
> 16))
3230 remainder
^= 0xffffffff;
3231 else if (code
== PLUS
&& num_bits_set
> 16)
3232 remainder
= (-remainder
) & 0xffffffff;
3234 /* For XOR, if more than half the bits are set and there's a sequence
3235 of more than 8 consecutive ones in the pattern then we can XOR by the
3236 inverted constant and then invert the final result; this may save an
3237 instruction and might also lead to the final mvn being merged with
3238 some other operation. */
3239 else if (code
== XOR
&& num_bits_set
> 16
3240 && (count_insns_for_constant (remainder
^ 0xffffffff,
3242 (remainder
^ 0xffffffff))
3243 < count_insns_for_constant (remainder
,
3244 find_best_start (remainder
))))
3246 remainder
^= 0xffffffff;
3255 /* Now try and find a way of doing the job in either two or three
3257 We start by looking for the largest block of zeros that are aligned on
3258 a 2-bit boundary, we then fill up the temps, wrapping around to the
3259 top of the word when we drop off the bottom.
3260 In the worst case this code should produce no more than four insns.
3261 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3262 best place to start. */
3264 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3267 /* Now start emitting the insns. */
3268 i
= find_best_start (remainder
);
3275 if (remainder
& (3 << (i
- 2)))
3280 temp1
= remainder
& ((0x0ff << end
)
3281 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
3282 remainder
&= ~temp1
;
3286 rtx new_src
, temp1_rtx
;
3288 if (code
== SET
|| code
== MINUS
)
3290 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
3291 if (can_invert
&& code
!= MINUS
)
3296 if ((final_invert
|| remainder
) && subtargets
)
3297 new_src
= gen_reg_rtx (mode
);
3302 else if (can_negate
)
3306 temp1
= trunc_int_for_mode (temp1
, mode
);
3307 temp1_rtx
= GEN_INT (temp1
);
3311 else if (code
== MINUS
)
3312 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
3314 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
3316 emit_constant_insn (cond
,
3317 gen_rtx_SET (VOIDmode
, new_src
,
3327 else if (code
== MINUS
)
3333 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3343 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
3344 gen_rtx_NOT (mode
, source
)));
3351 /* Canonicalize a comparison so that we are more likely to recognize it.
3352 This can be done for a few constant compares, where we can make the
3353 immediate value easier to load. */
3356 arm_canonicalize_comparison (enum rtx_code code
, rtx
*op0
, rtx
*op1
)
3358 enum machine_mode mode
;
3359 unsigned HOST_WIDE_INT i
, maxval
;
3361 mode
= GET_MODE (*op0
);
3362 if (mode
== VOIDmode
)
3363 mode
= GET_MODE (*op1
);
3365 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
3367 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3368 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3369 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3370 for GTU/LEU in Thumb mode. */
3375 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3377 if (TARGET_ARM
&& TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
3380 if (code
== GT
|| code
== LE
3381 || (!TARGET_ARM
&& (code
== GTU
|| code
== LEU
)))
3383 /* Missing comparison. First try to use an available
3385 if (GET_CODE (*op1
) == CONST_INT
)
3393 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3395 *op1
= GEN_INT (i
+ 1);
3396 return code
== GT
? GE
: LT
;
3401 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3402 && arm_const_double_by_immediates (GEN_INT (i
+ 1)))
3404 *op1
= GEN_INT (i
+ 1);
3405 return code
== GTU
? GEU
: LTU
;
3413 /* If that did not work, reverse the condition. */
3417 return swap_condition (code
);
3423 /* Comparisons smaller than DImode. Only adjust comparisons against
3424 an out-of-range constant. */
3425 if (GET_CODE (*op1
) != CONST_INT
3426 || const_ok_for_arm (INTVAL (*op1
))
3427 || const_ok_for_arm (- INTVAL (*op1
)))
3441 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3443 *op1
= GEN_INT (i
+ 1);
3444 return code
== GT
? GE
: LT
;
3451 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3453 *op1
= GEN_INT (i
- 1);
3454 return code
== GE
? GT
: LE
;
3460 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3461 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3463 *op1
= GEN_INT (i
+ 1);
3464 return code
== GTU
? GEU
: LTU
;
3471 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3473 *op1
= GEN_INT (i
- 1);
3474 return code
== GEU
? GTU
: LEU
;
3486 /* Define how to find the value returned by a function. */
3489 arm_function_value(const_tree type
, const_tree func
,
3490 bool outgoing ATTRIBUTE_UNUSED
)
3492 enum machine_mode mode
;
3493 int unsignedp ATTRIBUTE_UNUSED
;
3494 rtx r ATTRIBUTE_UNUSED
;
3496 mode
= TYPE_MODE (type
);
3498 if (TARGET_AAPCS_BASED
)
3499 return aapcs_allocate_return_reg (mode
, type
, func
);
3501 /* Promote integer types. */
3502 if (INTEGRAL_TYPE_P (type
))
3503 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
3505 /* Promotes small structs returned in a register to full-word size
3506 for big-endian AAPCS. */
3507 if (arm_return_in_msb (type
))
3509 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3510 if (size
% UNITS_PER_WORD
!= 0)
3512 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
3513 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
3517 return LIBCALL_VALUE (mode
);
3521 libcall_eq (const void *p1
, const void *p2
)
3523 return rtx_equal_p ((const_rtx
) p1
, (const_rtx
) p2
);
3527 libcall_hash (const void *p1
)
3529 return hash_rtx ((const_rtx
) p1
, VOIDmode
, NULL
, NULL
, FALSE
);
3533 add_libcall (htab_t htab
, rtx libcall
)
3535 *htab_find_slot (htab
, libcall
, INSERT
) = libcall
;
3539 arm_libcall_uses_aapcs_base (const_rtx libcall
)
3541 static bool init_done
= false;
3542 static htab_t libcall_htab
;
3548 libcall_htab
= htab_create (31, libcall_hash
, libcall_eq
,
3550 add_libcall (libcall_htab
,
3551 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
3552 add_libcall (libcall_htab
,
3553 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
3554 add_libcall (libcall_htab
,
3555 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
3556 add_libcall (libcall_htab
,
3557 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
3559 add_libcall (libcall_htab
,
3560 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
3561 add_libcall (libcall_htab
,
3562 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
3563 add_libcall (libcall_htab
,
3564 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
3565 add_libcall (libcall_htab
,
3566 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
3568 add_libcall (libcall_htab
,
3569 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
3570 add_libcall (libcall_htab
,
3571 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
3572 add_libcall (libcall_htab
,
3573 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
3574 add_libcall (libcall_htab
,
3575 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
3576 add_libcall (libcall_htab
,
3577 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
3578 add_libcall (libcall_htab
,
3579 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
3582 return libcall
&& htab_find (libcall_htab
, libcall
) != NULL
;
3586 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
3588 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
3589 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
3591 /* The following libcalls return their result in integer registers,
3592 even though they return a floating point value. */
3593 if (arm_libcall_uses_aapcs_base (libcall
))
3594 return gen_rtx_REG (mode
, ARG_REGISTER(1));
3598 return LIBCALL_VALUE (mode
);
3601 /* Determine the amount of memory needed to store the possible return
3602 registers of an untyped call. */
3604 arm_apply_result_size (void)
3610 if (TARGET_HARD_FLOAT_ABI
)
3616 if (TARGET_MAVERICK
)
3619 if (TARGET_IWMMXT_ABI
)
3626 /* Decide whether TYPE should be returned in memory (true)
3627 or in a register (false). FNTYPE is the type of the function making
3630 arm_return_in_memory (const_tree type
, const_tree fntype
)
3634 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
3636 if (TARGET_AAPCS_BASED
)
3638 /* Simple, non-aggregate types (ie not including vectors and
3639 complex) are always returned in a register (or registers).
3640 We don't care about which register here, so we can short-cut
3641 some of the detail. */
3642 if (!AGGREGATE_TYPE_P (type
)
3643 && TREE_CODE (type
) != VECTOR_TYPE
3644 && TREE_CODE (type
) != COMPLEX_TYPE
)
3647 /* Any return value that is no larger than one word can be
3649 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
3652 /* Check any available co-processors to see if they accept the
3653 type as a register candidate (VFP, for example, can return
3654 some aggregates in consecutive registers). These aren't
3655 available if the call is variadic. */
3656 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
3659 /* Vector values should be returned using ARM registers, not
3660 memory (unless they're over 16 bytes, which will break since
3661 we only have four call-clobbered registers to play with). */
3662 if (TREE_CODE (type
) == VECTOR_TYPE
)
3663 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3665 /* The rest go in memory. */
3669 if (TREE_CODE (type
) == VECTOR_TYPE
)
3670 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3672 if (!AGGREGATE_TYPE_P (type
) &&
3673 (TREE_CODE (type
) != VECTOR_TYPE
))
3674 /* All simple types are returned in registers. */
3677 if (arm_abi
!= ARM_ABI_APCS
)
3679 /* ATPCS and later return aggregate types in memory only if they are
3680 larger than a word (or are variable size). */
3681 return (size
< 0 || size
> UNITS_PER_WORD
);
3684 /* For the arm-wince targets we choose to be compatible with Microsoft's
3685 ARM and Thumb compilers, which always return aggregates in memory. */
3687 /* All structures/unions bigger than one word are returned in memory.
3688 Also catch the case where int_size_in_bytes returns -1. In this case
3689 the aggregate is either huge or of variable size, and in either case
3690 we will want to return it via memory and not in a register. */
3691 if (size
< 0 || size
> UNITS_PER_WORD
)
3694 if (TREE_CODE (type
) == RECORD_TYPE
)
3698 /* For a struct the APCS says that we only return in a register
3699 if the type is 'integer like' and every addressable element
3700 has an offset of zero. For practical purposes this means
3701 that the structure can have at most one non bit-field element
3702 and that this element must be the first one in the structure. */
3704 /* Find the first field, ignoring non FIELD_DECL things which will
3705 have been created by C++. */
3706 for (field
= TYPE_FIELDS (type
);
3707 field
&& TREE_CODE (field
) != FIELD_DECL
;
3708 field
= DECL_CHAIN (field
))
3712 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3714 /* Check that the first field is valid for returning in a register. */
3716 /* ... Floats are not allowed */
3717 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3720 /* ... Aggregates that are not themselves valid for returning in
3721 a register are not allowed. */
3722 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3725 /* Now check the remaining fields, if any. Only bitfields are allowed,
3726 since they are not addressable. */
3727 for (field
= DECL_CHAIN (field
);
3729 field
= DECL_CHAIN (field
))
3731 if (TREE_CODE (field
) != FIELD_DECL
)
3734 if (!DECL_BIT_FIELD_TYPE (field
))
3741 if (TREE_CODE (type
) == UNION_TYPE
)
3745 /* Unions can be returned in registers if every element is
3746 integral, or can be returned in an integer register. */
3747 for (field
= TYPE_FIELDS (type
);
3749 field
= DECL_CHAIN (field
))
3751 if (TREE_CODE (field
) != FIELD_DECL
)
3754 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3757 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3763 #endif /* not ARM_WINCE */
3765 /* Return all other types in memory. */
3769 /* Indicate whether or not words of a double are in big-endian order. */
3772 arm_float_words_big_endian (void)
3774 if (TARGET_MAVERICK
)
3777 /* For FPA, float words are always big-endian. For VFP, floats words
3778 follow the memory system mode. */
3786 return (TARGET_BIG_END
? 1 : 0);
3791 const struct pcs_attribute_arg
3795 } pcs_attribute_args
[] =
3797 {"aapcs", ARM_PCS_AAPCS
},
3798 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
3800 /* We could recognize these, but changes would be needed elsewhere
3801 * to implement them. */
3802 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
3803 {"atpcs", ARM_PCS_ATPCS
},
3804 {"apcs", ARM_PCS_APCS
},
3806 {NULL
, ARM_PCS_UNKNOWN
}
3810 arm_pcs_from_attribute (tree attr
)
3812 const struct pcs_attribute_arg
*ptr
;
3815 /* Get the value of the argument. */
3816 if (TREE_VALUE (attr
) == NULL_TREE
3817 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
3818 return ARM_PCS_UNKNOWN
;
3820 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
3822 /* Check it against the list of known arguments. */
3823 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3824 if (streq (arg
, ptr
->arg
))
3827 /* An unrecognized interrupt type. */
3828 return ARM_PCS_UNKNOWN
;
3831 /* Get the PCS variant to use for this call. TYPE is the function's type
3832 specification, DECL is the specific declartion. DECL may be null if
3833 the call could be indirect or if this is a library call. */
3835 arm_get_pcs_model (const_tree type
, const_tree decl
)
3837 bool user_convention
= false;
3838 enum arm_pcs user_pcs
= arm_pcs_default
;
3843 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
3846 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
3847 user_convention
= true;
3850 if (TARGET_AAPCS_BASED
)
3852 /* Detect varargs functions. These always use the base rules
3853 (no argument is ever a candidate for a co-processor
3855 bool base_rules
= stdarg_p (type
);
3857 if (user_convention
)
3859 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
3860 sorry ("non-AAPCS derived PCS variant");
3861 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
3862 error ("variadic functions must use the base AAPCS variant");
3866 return ARM_PCS_AAPCS
;
3867 else if (user_convention
)
3869 else if (decl
&& flag_unit_at_a_time
)
3871 /* Local functions never leak outside this compilation unit,
3872 so we are free to use whatever conventions are
3874 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3875 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3877 return ARM_PCS_AAPCS_LOCAL
;
3880 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
3881 sorry ("PCS variant");
3883 /* For everything else we use the target's default. */
3884 return arm_pcs_default
;
3889 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
3890 const_tree fntype ATTRIBUTE_UNUSED
,
3891 rtx libcall ATTRIBUTE_UNUSED
,
3892 const_tree fndecl ATTRIBUTE_UNUSED
)
3894 /* Record the unallocated VFP registers. */
3895 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
3896 pcum
->aapcs_vfp_reg_alloc
= 0;
3899 /* Walk down the type tree of TYPE counting consecutive base elements.
3900 If *MODEP is VOIDmode, then set it to the first valid floating point
3901 type. If a non-floating point type is found, or if a floating point
3902 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3903 otherwise return the count in the sub-tree. */
3905 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
3907 enum machine_mode mode
;
3910 switch (TREE_CODE (type
))
3913 mode
= TYPE_MODE (type
);
3914 if (mode
!= DFmode
&& mode
!= SFmode
)
3917 if (*modep
== VOIDmode
)
3926 mode
= TYPE_MODE (TREE_TYPE (type
));
3927 if (mode
!= DFmode
&& mode
!= SFmode
)
3930 if (*modep
== VOIDmode
)
3939 /* Use V2SImode and V4SImode as representatives of all 64-bit
3940 and 128-bit vector types, whether or not those modes are
3941 supported with the present options. */
3942 size
= int_size_in_bytes (type
);
3955 if (*modep
== VOIDmode
)
3958 /* Vector modes are considered to be opaque: two vectors are
3959 equivalent for the purposes of being homogeneous aggregates
3960 if they are the same size. */
3969 tree index
= TYPE_DOMAIN (type
);
3971 /* Can't handle incomplete types. */
3972 if (!COMPLETE_TYPE_P(type
))
3975 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
3978 || !TYPE_MAX_VALUE (index
)
3979 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
3980 || !TYPE_MIN_VALUE (index
)
3981 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
3985 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
3986 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
3988 /* There must be no padding. */
3989 if (!host_integerp (TYPE_SIZE (type
), 1)
3990 || (tree_low_cst (TYPE_SIZE (type
), 1)
3991 != count
* GET_MODE_BITSIZE (*modep
)))
4003 /* Can't handle incomplete types. */
4004 if (!COMPLETE_TYPE_P(type
))
4007 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
4009 if (TREE_CODE (field
) != FIELD_DECL
)
4012 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
4018 /* There must be no padding. */
4019 if (!host_integerp (TYPE_SIZE (type
), 1)
4020 || (tree_low_cst (TYPE_SIZE (type
), 1)
4021 != count
* GET_MODE_BITSIZE (*modep
)))
4028 case QUAL_UNION_TYPE
:
4030 /* These aren't very interesting except in a degenerate case. */
4035 /* Can't handle incomplete types. */
4036 if (!COMPLETE_TYPE_P(type
))
4039 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
4041 if (TREE_CODE (field
) != FIELD_DECL
)
4044 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
4047 count
= count
> sub_count
? count
: sub_count
;
4050 /* There must be no padding. */
4051 if (!host_integerp (TYPE_SIZE (type
), 1)
4052 || (tree_low_cst (TYPE_SIZE (type
), 1)
4053 != count
* GET_MODE_BITSIZE (*modep
)))
4066 /* Return true if PCS_VARIANT should use VFP registers. */
4068 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
4070 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
4072 static bool seen_thumb1_vfp
= false;
4074 if (TARGET_THUMB1
&& !seen_thumb1_vfp
)
4076 sorry ("Thumb-1 hard-float VFP ABI");
4077 /* sorry() is not immediately fatal, so only display this once. */
4078 seen_thumb1_vfp
= true;
4084 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
4087 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
4088 (TARGET_VFP_DOUBLE
|| !is_double
));
4092 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
4093 enum machine_mode mode
, const_tree type
,
4094 enum machine_mode
*base_mode
, int *count
)
4096 enum machine_mode new_mode
= VOIDmode
;
4098 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
4099 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
4100 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
4105 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
4108 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
4110 else if (type
&& (mode
== BLKmode
|| TREE_CODE (type
) == VECTOR_TYPE
))
4112 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
4114 if (ag_count
> 0 && ag_count
<= 4)
4123 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
4126 *base_mode
= new_mode
;
4131 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
4132 enum machine_mode mode
, const_tree type
)
4134 int count ATTRIBUTE_UNUSED
;
4135 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
4137 if (!use_vfp_abi (pcs_variant
, false))
4139 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4144 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4147 if (!use_vfp_abi (pcum
->pcs_variant
, false))
4150 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
4151 &pcum
->aapcs_vfp_rmode
,
4152 &pcum
->aapcs_vfp_rcount
);
4156 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4157 const_tree type ATTRIBUTE_UNUSED
)
4159 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
4160 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
4163 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
4164 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
4166 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
4167 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4170 int rcount
= pcum
->aapcs_vfp_rcount
;
4172 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
4176 /* Avoid using unsupported vector modes. */
4177 if (rmode
== V2SImode
)
4179 else if (rmode
== V4SImode
)
4186 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
4187 for (i
= 0; i
< rcount
; i
++)
4189 rtx tmp
= gen_rtx_REG (rmode
,
4190 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
4191 tmp
= gen_rtx_EXPR_LIST
4193 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
4194 XVECEXP (par
, 0, i
) = tmp
;
4197 pcum
->aapcs_reg
= par
;
4200 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
4207 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
4208 enum machine_mode mode
,
4209 const_tree type ATTRIBUTE_UNUSED
)
4211 if (!use_vfp_abi (pcs_variant
, false))
4214 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
4217 enum machine_mode ag_mode
;
4222 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4227 if (ag_mode
== V2SImode
)
4229 else if (ag_mode
== V4SImode
)
4235 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
4236 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
4237 for (i
= 0; i
< count
; i
++)
4239 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
4240 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
4241 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
4242 XVECEXP (par
, 0, i
) = tmp
;
4248 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
4252 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4253 enum machine_mode mode ATTRIBUTE_UNUSED
,
4254 const_tree type ATTRIBUTE_UNUSED
)
4256 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
4257 pcum
->aapcs_vfp_reg_alloc
= 0;
4261 #define AAPCS_CP(X) \
4263 aapcs_ ## X ## _cum_init, \
4264 aapcs_ ## X ## _is_call_candidate, \
4265 aapcs_ ## X ## _allocate, \
4266 aapcs_ ## X ## _is_return_candidate, \
4267 aapcs_ ## X ## _allocate_return_reg, \
4268 aapcs_ ## X ## _advance \
4271 /* Table of co-processors that can be used to pass arguments in
4272 registers. Idealy no arugment should be a candidate for more than
4273 one co-processor table entry, but the table is processed in order
4274 and stops after the first match. If that entry then fails to put
4275 the argument into a co-processor register, the argument will go on
4279 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4280 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
4282 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4283 BLKmode) is a candidate for this co-processor's registers; this
4284 function should ignore any position-dependent state in
4285 CUMULATIVE_ARGS and only use call-type dependent information. */
4286 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4288 /* Return true if the argument does get a co-processor register; it
4289 should set aapcs_reg to an RTX of the register allocated as is
4290 required for a return from FUNCTION_ARG. */
4291 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4293 /* Return true if a result of mode MODE (or type TYPE if MODE is
4294 BLKmode) is can be returned in this co-processor's registers. */
4295 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4297 /* Allocate and return an RTX element to hold the return type of a
4298 call, this routine must not fail and will only be called if
4299 is_return_candidate returned true with the same parameters. */
4300 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4302 /* Finish processing this argument and prepare to start processing
4304 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4305 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
4313 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4318 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4319 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
4326 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
4328 /* We aren't passed a decl, so we can't check that a call is local.
4329 However, it isn't clear that that would be a win anyway, since it
4330 might limit some tail-calling opportunities. */
4331 enum arm_pcs pcs_variant
;
4335 const_tree fndecl
= NULL_TREE
;
4337 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4340 fntype
= TREE_TYPE (fntype
);
4343 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4346 pcs_variant
= arm_pcs_default
;
4348 if (pcs_variant
!= ARM_PCS_AAPCS
)
4352 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4353 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
4362 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
4365 /* We aren't passed a decl, so we can't check that a call is local.
4366 However, it isn't clear that that would be a win anyway, since it
4367 might limit some tail-calling opportunities. */
4368 enum arm_pcs pcs_variant
;
4369 int unsignedp ATTRIBUTE_UNUSED
;
4373 const_tree fndecl
= NULL_TREE
;
4375 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4378 fntype
= TREE_TYPE (fntype
);
4381 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4384 pcs_variant
= arm_pcs_default
;
4386 /* Promote integer types. */
4387 if (type
&& INTEGRAL_TYPE_P (type
))
4388 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
4390 if (pcs_variant
!= ARM_PCS_AAPCS
)
4394 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4395 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
4397 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
4401 /* Promotes small structs returned in a register to full-word size
4402 for big-endian AAPCS. */
4403 if (type
&& arm_return_in_msb (type
))
4405 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4406 if (size
% UNITS_PER_WORD
!= 0)
4408 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4409 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4413 return gen_rtx_REG (mode
, R0_REGNUM
);
4417 aapcs_libcall_value (enum machine_mode mode
)
4419 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
4422 /* Lay out a function argument using the AAPCS rules. The rule
4423 numbers referred to here are those in the AAPCS. */
4425 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4426 const_tree type
, bool named
)
4431 /* We only need to do this once per argument. */
4432 if (pcum
->aapcs_arg_processed
)
4435 pcum
->aapcs_arg_processed
= true;
4437 /* Special case: if named is false then we are handling an incoming
4438 anonymous argument which is on the stack. */
4442 /* Is this a potential co-processor register candidate? */
4443 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4445 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
4446 pcum
->aapcs_cprc_slot
= slot
;
4448 /* We don't have to apply any of the rules from part B of the
4449 preparation phase, these are handled elsewhere in the
4454 /* A Co-processor register candidate goes either in its own
4455 class of registers or on the stack. */
4456 if (!pcum
->aapcs_cprc_failed
[slot
])
4458 /* C1.cp - Try to allocate the argument to co-processor
4460 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
4463 /* C2.cp - Put the argument on the stack and note that we
4464 can't assign any more candidates in this slot. We also
4465 need to note that we have allocated stack space, so that
4466 we won't later try to split a non-cprc candidate between
4467 core registers and the stack. */
4468 pcum
->aapcs_cprc_failed
[slot
] = true;
4469 pcum
->can_split
= false;
4472 /* We didn't get a register, so this argument goes on the
4474 gcc_assert (pcum
->can_split
== false);
4479 /* C3 - For double-word aligned arguments, round the NCRN up to the
4480 next even number. */
4481 ncrn
= pcum
->aapcs_ncrn
;
4482 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
4485 nregs
= ARM_NUM_REGS2(mode
, type
);
4487 /* Sigh, this test should really assert that nregs > 0, but a GCC
4488 extension allows empty structs and then gives them empty size; it
4489 then allows such a structure to be passed by value. For some of
4490 the code below we have to pretend that such an argument has
4491 non-zero size so that we 'locate' it correctly either in
4492 registers or on the stack. */
4493 gcc_assert (nregs
>= 0);
4495 nregs2
= nregs
? nregs
: 1;
4497 /* C4 - Argument fits entirely in core registers. */
4498 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
4500 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4501 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
4505 /* C5 - Some core registers left and there are no arguments already
4506 on the stack: split this argument between the remaining core
4507 registers and the stack. */
4508 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
4510 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4511 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4512 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
4516 /* C6 - NCRN is set to 4. */
4517 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4519 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4523 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4524 for a call to a function whose data type is FNTYPE.
4525 For a library call, FNTYPE is NULL. */
4527 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
4529 tree fndecl ATTRIBUTE_UNUSED
)
4531 /* Long call handling. */
4533 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4535 pcum
->pcs_variant
= arm_pcs_default
;
4537 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4539 if (arm_libcall_uses_aapcs_base (libname
))
4540 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
4542 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
4543 pcum
->aapcs_reg
= NULL_RTX
;
4544 pcum
->aapcs_partial
= 0;
4545 pcum
->aapcs_arg_processed
= false;
4546 pcum
->aapcs_cprc_slot
= -1;
4547 pcum
->can_split
= true;
4549 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4553 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4555 pcum
->aapcs_cprc_failed
[i
] = false;
4556 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
4564 /* On the ARM, the offset starts at 0. */
4566 pcum
->iwmmxt_nregs
= 0;
4567 pcum
->can_split
= true;
4569 /* Varargs vectors are treated the same as long long.
4570 named_count avoids having to change the way arm handles 'named' */
4571 pcum
->named_count
= 0;
4574 if (TARGET_REALLY_IWMMXT
&& fntype
)
4578 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
4580 fn_arg
= TREE_CHAIN (fn_arg
))
4581 pcum
->named_count
+= 1;
4583 if (! pcum
->named_count
)
4584 pcum
->named_count
= INT_MAX
;
4589 /* Return true if mode/type need doubleword alignment. */
4591 arm_needs_doubleword_align (enum machine_mode mode
, const_tree type
)
4593 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
4594 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
4598 /* Determine where to put an argument to a function.
4599 Value is zero to push the argument on the stack,
4600 or a hard register in which to store the argument.
4602 MODE is the argument's machine mode.
4603 TYPE is the data type of the argument (as a tree).
4604 This is null for libcalls where that information may
4606 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4607 the preceding args and about the function being called.
4608 NAMED is nonzero if this argument is a named parameter
4609 (otherwise it is an extra parameter matching an ellipsis).
4611 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4612 other arguments are passed on the stack. If (NAMED == 0) (which happens
4613 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4614 defined), say it is passed in the stack (function_prologue will
4615 indeed make it pass in the stack if necessary). */
4618 arm_function_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4619 const_tree type
, bool named
)
4623 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4624 a call insn (op3 of a call_value insn). */
4625 if (mode
== VOIDmode
)
4628 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4630 aapcs_layout_arg (pcum
, mode
, type
, named
);
4631 return pcum
->aapcs_reg
;
4634 /* Varargs vectors are treated the same as long long.
4635 named_count avoids having to change the way arm handles 'named' */
4636 if (TARGET_IWMMXT_ABI
4637 && arm_vector_mode_supported_p (mode
)
4638 && pcum
->named_count
> pcum
->nargs
+ 1)
4640 if (pcum
->iwmmxt_nregs
<= 9)
4641 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
4644 pcum
->can_split
= false;
4649 /* Put doubleword aligned quantities in even register pairs. */
4651 && ARM_DOUBLEWORD_ALIGN
4652 && arm_needs_doubleword_align (mode
, type
))
4655 /* Only allow splitting an arg between regs and memory if all preceding
4656 args were allocated to regs. For args passed by reference we only count
4657 the reference pointer. */
4658 if (pcum
->can_split
)
4661 nregs
= ARM_NUM_REGS2 (mode
, type
);
4663 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
4666 return gen_rtx_REG (mode
, pcum
->nregs
);
4670 arm_function_arg_boundary (enum machine_mode mode
, const_tree type
)
4672 return (ARM_DOUBLEWORD_ALIGN
&& arm_needs_doubleword_align (mode
, type
)
4673 ? DOUBLEWORD_ALIGNMENT
4678 arm_arg_partial_bytes (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4679 tree type
, bool named
)
4681 int nregs
= pcum
->nregs
;
4683 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4685 aapcs_layout_arg (pcum
, mode
, type
, named
);
4686 return pcum
->aapcs_partial
;
4689 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
4692 if (NUM_ARG_REGS
> nregs
4693 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
4695 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
4700 /* Update the data in PCUM to advance over an argument
4701 of mode MODE and data type TYPE.
4702 (TYPE is null for libcalls where that information may not be available.) */
4705 arm_function_arg_advance (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4706 const_tree type
, bool named
)
4708 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4710 aapcs_layout_arg (pcum
, mode
, type
, named
);
4712 if (pcum
->aapcs_cprc_slot
>= 0)
4714 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
4716 pcum
->aapcs_cprc_slot
= -1;
4719 /* Generic stuff. */
4720 pcum
->aapcs_arg_processed
= false;
4721 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
4722 pcum
->aapcs_reg
= NULL_RTX
;
4723 pcum
->aapcs_partial
= 0;
4728 if (arm_vector_mode_supported_p (mode
)
4729 && pcum
->named_count
> pcum
->nargs
4730 && TARGET_IWMMXT_ABI
)
4731 pcum
->iwmmxt_nregs
+= 1;
4733 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
4737 /* Variable sized types are passed by reference. This is a GCC
4738 extension to the ARM ABI. */
4741 arm_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4742 enum machine_mode mode ATTRIBUTE_UNUSED
,
4743 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4745 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4748 /* Encode the current state of the #pragma [no_]long_calls. */
4751 OFF
, /* No #pragma [no_]long_calls is in effect. */
4752 LONG
, /* #pragma long_calls is in effect. */
4753 SHORT
/* #pragma no_long_calls is in effect. */
4756 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
4759 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4761 arm_pragma_long_calls
= LONG
;
4765 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4767 arm_pragma_long_calls
= SHORT
;
4771 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4773 arm_pragma_long_calls
= OFF
;
4776 /* Handle an attribute requiring a FUNCTION_DECL;
4777 arguments as in struct attribute_spec.handler. */
4779 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
4780 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4782 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4784 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4786 *no_add_attrs
= true;
4792 /* Handle an "interrupt" or "isr" attribute;
4793 arguments as in struct attribute_spec.handler. */
4795 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
4800 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4802 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4804 *no_add_attrs
= true;
4806 /* FIXME: the argument if any is checked for type attributes;
4807 should it be checked for decl ones? */
4811 if (TREE_CODE (*node
) == FUNCTION_TYPE
4812 || TREE_CODE (*node
) == METHOD_TYPE
)
4814 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
4816 warning (OPT_Wattributes
, "%qE attribute ignored",
4818 *no_add_attrs
= true;
4821 else if (TREE_CODE (*node
) == POINTER_TYPE
4822 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
4823 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
4824 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
4826 *node
= build_variant_type_copy (*node
);
4827 TREE_TYPE (*node
) = build_type_attribute_variant
4829 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
4830 *no_add_attrs
= true;
4834 /* Possibly pass this attribute on from the type to a decl. */
4835 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
4836 | (int) ATTR_FLAG_FUNCTION_NEXT
4837 | (int) ATTR_FLAG_ARRAY_NEXT
))
4839 *no_add_attrs
= true;
4840 return tree_cons (name
, args
, NULL_TREE
);
4844 warning (OPT_Wattributes
, "%qE attribute ignored",
4853 /* Handle a "pcs" attribute; arguments as in struct
4854 attribute_spec.handler. */
4856 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
4857 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4859 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
4861 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
4862 *no_add_attrs
= true;
4867 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4868 /* Handle the "notshared" attribute. This attribute is another way of
4869 requesting hidden visibility. ARM's compiler supports
4870 "__declspec(notshared)"; we support the same thing via an
4874 arm_handle_notshared_attribute (tree
*node
,
4875 tree name ATTRIBUTE_UNUSED
,
4876 tree args ATTRIBUTE_UNUSED
,
4877 int flags ATTRIBUTE_UNUSED
,
4880 tree decl
= TYPE_NAME (*node
);
4884 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
4885 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
4886 *no_add_attrs
= false;
4892 /* Return 0 if the attributes for two types are incompatible, 1 if they
4893 are compatible, and 2 if they are nearly compatible (which causes a
4894 warning to be generated). */
4896 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
4900 /* Check for mismatch of non-default calling convention. */
4901 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
4904 /* Check for mismatched call attributes. */
4905 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4906 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4907 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4908 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4910 /* Only bother to check if an attribute is defined. */
4911 if (l1
| l2
| s1
| s2
)
4913 /* If one type has an attribute, the other must have the same attribute. */
4914 if ((l1
!= l2
) || (s1
!= s2
))
4917 /* Disallow mixed attributes. */
4918 if ((l1
& s2
) || (l2
& s1
))
4922 /* Check for mismatched ISR attribute. */
4923 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
4925 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
4926 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
4928 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
4935 /* Assigns default attributes to newly defined type. This is used to
4936 set short_call/long_call attributes for function types of
4937 functions defined inside corresponding #pragma scopes. */
4939 arm_set_default_type_attributes (tree type
)
4941 /* Add __attribute__ ((long_call)) to all functions, when
4942 inside #pragma long_calls or __attribute__ ((short_call)),
4943 when inside #pragma no_long_calls. */
4944 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
4946 tree type_attr_list
, attr_name
;
4947 type_attr_list
= TYPE_ATTRIBUTES (type
);
4949 if (arm_pragma_long_calls
== LONG
)
4950 attr_name
= get_identifier ("long_call");
4951 else if (arm_pragma_long_calls
== SHORT
)
4952 attr_name
= get_identifier ("short_call");
4956 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
4957 TYPE_ATTRIBUTES (type
) = type_attr_list
;
4961 /* Return true if DECL is known to be linked into section SECTION. */
4964 arm_function_in_section_p (tree decl
, section
*section
)
4966 /* We can only be certain about functions defined in the same
4967 compilation unit. */
4968 if (!TREE_STATIC (decl
))
4971 /* Make sure that SYMBOL always binds to the definition in this
4972 compilation unit. */
4973 if (!targetm
.binds_local_p (decl
))
4976 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4977 if (!DECL_SECTION_NAME (decl
))
4979 /* Make sure that we will not create a unique section for DECL. */
4980 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
4984 return function_section (decl
) == section
;
4987 /* Return nonzero if a 32-bit "long_call" should be generated for
4988 a call from the current function to DECL. We generate a long_call
4991 a. has an __attribute__((long call))
4992 or b. is within the scope of a #pragma long_calls
4993 or c. the -mlong-calls command line switch has been specified
4995 However we do not generate a long call if the function:
4997 d. has an __attribute__ ((short_call))
4998 or e. is inside the scope of a #pragma no_long_calls
4999 or f. is defined in the same section as the current function. */
5002 arm_is_long_call_p (tree decl
)
5007 return TARGET_LONG_CALLS
;
5009 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
5010 if (lookup_attribute ("short_call", attrs
))
5013 /* For "f", be conservative, and only cater for cases in which the
5014 whole of the current function is placed in the same section. */
5015 if (!flag_reorder_blocks_and_partition
5016 && TREE_CODE (decl
) == FUNCTION_DECL
5017 && arm_function_in_section_p (decl
, current_function_section ()))
5020 if (lookup_attribute ("long_call", attrs
))
5023 return TARGET_LONG_CALLS
;
5026 /* Return nonzero if it is ok to make a tail-call to DECL. */
5028 arm_function_ok_for_sibcall (tree decl
, tree exp
)
5030 unsigned long func_type
;
5032 if (cfun
->machine
->sibcall_blocked
)
5035 /* Never tailcall something for which we have no decl, or if we
5036 are generating code for Thumb-1. */
5037 if (decl
== NULL
|| TARGET_THUMB1
)
5040 /* The PIC register is live on entry to VxWorks PLT entries, so we
5041 must make the call before restoring the PIC register. */
5042 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
5045 /* Cannot tail-call to long calls, since these are out of range of
5046 a branch instruction. */
5047 if (arm_is_long_call_p (decl
))
5050 /* If we are interworking and the function is not declared static
5051 then we can't tail-call it unless we know that it exists in this
5052 compilation unit (since it might be a Thumb routine). */
5053 if (TARGET_INTERWORK
&& TREE_PUBLIC (decl
) && !TREE_ASM_WRITTEN (decl
))
5056 func_type
= arm_current_func_type ();
5057 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
5058 if (IS_INTERRUPT (func_type
))
5061 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
5063 /* Check that the return value locations are the same. For
5064 example that we aren't returning a value from the sibling in
5065 a VFP register but then need to transfer it to a core
5069 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
5070 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
5072 if (!rtx_equal_p (a
, b
))
5076 /* Never tailcall if function may be called with a misaligned SP. */
5077 if (IS_STACKALIGN (func_type
))
5080 /* Everything else is ok. */
5085 /* Addressing mode support functions. */
5087 /* Return nonzero if X is a legitimate immediate operand when compiling
5088 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
5090 legitimate_pic_operand_p (rtx x
)
5092 if (GET_CODE (x
) == SYMBOL_REF
5093 || (GET_CODE (x
) == CONST
5094 && GET_CODE (XEXP (x
, 0)) == PLUS
5095 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5101 /* Record that the current function needs a PIC register. Initialize
5102 cfun->machine->pic_reg if we have not already done so. */
5105 require_pic_register (void)
5107 /* A lot of the logic here is made obscure by the fact that this
5108 routine gets called as part of the rtx cost estimation process.
5109 We don't want those calls to affect any assumptions about the real
5110 function; and further, we can't call entry_of_function() until we
5111 start the real expansion process. */
5112 if (!crtl
->uses_pic_offset_table
)
5114 gcc_assert (can_create_pseudo_p ());
5115 if (arm_pic_register
!= INVALID_REGNUM
)
5117 if (!cfun
->machine
->pic_reg
)
5118 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
5120 /* Play games to avoid marking the function as needing pic
5121 if we are being called as part of the cost-estimation
5123 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5124 crtl
->uses_pic_offset_table
= 1;
5130 if (!cfun
->machine
->pic_reg
)
5131 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
5133 /* Play games to avoid marking the function as needing pic
5134 if we are being called as part of the cost-estimation
5136 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
5138 crtl
->uses_pic_offset_table
= 1;
5141 arm_load_pic_register (0UL);
5146 for (insn
= seq
; insn
; insn
= NEXT_INSN (insn
))
5148 INSN_LOCATOR (insn
) = prologue_locator
;
5150 /* We can be called during expansion of PHI nodes, where
5151 we can't yet emit instructions directly in the final
5152 insn stream. Queue the insns on the entry edge, they will
5153 be committed after everything else is expanded. */
5154 insert_insn_on_edge (seq
, single_succ_edge (ENTRY_BLOCK_PTR
));
5161 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
5163 if (GET_CODE (orig
) == SYMBOL_REF
5164 || GET_CODE (orig
) == LABEL_REF
)
5170 gcc_assert (can_create_pseudo_p ());
5171 reg
= gen_reg_rtx (Pmode
);
5174 /* VxWorks does not impose a fixed gap between segments; the run-time
5175 gap can be different from the object-file gap. We therefore can't
5176 use GOTOFF unless we are absolutely sure that the symbol is in the
5177 same segment as the GOT. Unfortunately, the flexibility of linker
5178 scripts means that we can't be sure of that in general, so assume
5179 that GOTOFF is never valid on VxWorks. */
5180 if ((GET_CODE (orig
) == LABEL_REF
5181 || (GET_CODE (orig
) == SYMBOL_REF
&&
5182 SYMBOL_REF_LOCAL_P (orig
)))
5184 && !TARGET_VXWORKS_RTP
)
5185 insn
= arm_pic_static_addr (orig
, reg
);
5191 /* If this function doesn't have a pic register, create one now. */
5192 require_pic_register ();
5194 pat
= gen_calculate_pic_address (reg
, cfun
->machine
->pic_reg
, orig
);
5196 /* Make the MEM as close to a constant as possible. */
5197 mem
= SET_SRC (pat
);
5198 gcc_assert (MEM_P (mem
) && !MEM_VOLATILE_P (mem
));
5199 MEM_READONLY_P (mem
) = 1;
5200 MEM_NOTRAP_P (mem
) = 1;
5202 insn
= emit_insn (pat
);
5205 /* Put a REG_EQUAL note on this insn, so that it can be optimized
5207 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
5211 else if (GET_CODE (orig
) == CONST
)
5215 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5216 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
5219 /* Handle the case where we have: const (UNSPEC_TLS). */
5220 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
5221 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
5224 /* Handle the case where we have:
5225 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
5227 if (GET_CODE (XEXP (orig
, 0)) == PLUS
5228 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
5229 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
5231 gcc_assert (GET_CODE (XEXP (XEXP (orig
, 0), 1)) == CONST_INT
);
5237 gcc_assert (can_create_pseudo_p ());
5238 reg
= gen_reg_rtx (Pmode
);
5241 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
5243 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
5244 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
5245 base
== reg
? 0 : reg
);
5247 if (GET_CODE (offset
) == CONST_INT
)
5249 /* The base register doesn't really matter, we only want to
5250 test the index for the appropriate mode. */
5251 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
5253 gcc_assert (can_create_pseudo_p ());
5254 offset
= force_reg (Pmode
, offset
);
5257 if (GET_CODE (offset
) == CONST_INT
)
5258 return plus_constant (base
, INTVAL (offset
));
5261 if (GET_MODE_SIZE (mode
) > 4
5262 && (GET_MODE_CLASS (mode
) == MODE_INT
5263 || TARGET_SOFT_FLOAT
))
5265 emit_insn (gen_addsi3 (reg
, base
, offset
));
5269 return gen_rtx_PLUS (Pmode
, base
, offset
);
5276 /* Find a spare register to use during the prolog of a function. */
5279 thumb_find_work_register (unsigned long pushed_regs_mask
)
5283 /* Check the argument registers first as these are call-used. The
5284 register allocation order means that sometimes r3 might be used
5285 but earlier argument registers might not, so check them all. */
5286 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
5287 if (!df_regs_ever_live_p (reg
))
5290 /* Before going on to check the call-saved registers we can try a couple
5291 more ways of deducing that r3 is available. The first is when we are
5292 pushing anonymous arguments onto the stack and we have less than 4
5293 registers worth of fixed arguments(*). In this case r3 will be part of
5294 the variable argument list and so we can be sure that it will be
5295 pushed right at the start of the function. Hence it will be available
5296 for the rest of the prologue.
5297 (*): ie crtl->args.pretend_args_size is greater than 0. */
5298 if (cfun
->machine
->uses_anonymous_args
5299 && crtl
->args
.pretend_args_size
> 0)
5300 return LAST_ARG_REGNUM
;
5302 /* The other case is when we have fixed arguments but less than 4 registers
5303 worth. In this case r3 might be used in the body of the function, but
5304 it is not being used to convey an argument into the function. In theory
5305 we could just check crtl->args.size to see how many bytes are
5306 being passed in argument registers, but it seems that it is unreliable.
5307 Sometimes it will have the value 0 when in fact arguments are being
5308 passed. (See testcase execute/20021111-1.c for an example). So we also
5309 check the args_info.nregs field as well. The problem with this field is
5310 that it makes no allowances for arguments that are passed to the
5311 function but which are not used. Hence we could miss an opportunity
5312 when a function has an unused argument in r3. But it is better to be
5313 safe than to be sorry. */
5314 if (! cfun
->machine
->uses_anonymous_args
5315 && crtl
->args
.size
>= 0
5316 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
5317 && crtl
->args
.info
.nregs
< 4)
5318 return LAST_ARG_REGNUM
;
5320 /* Otherwise look for a call-saved register that is going to be pushed. */
5321 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
5322 if (pushed_regs_mask
& (1 << reg
))
5327 /* Thumb-2 can use high regs. */
5328 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
5329 if (pushed_regs_mask
& (1 << reg
))
5332 /* Something went wrong - thumb_compute_save_reg_mask()
5333 should have arranged for a suitable register to be pushed. */
5337 static GTY(()) int pic_labelno
;
5339 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5343 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
5345 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
5347 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
5350 gcc_assert (flag_pic
);
5352 pic_reg
= cfun
->machine
->pic_reg
;
5353 if (TARGET_VXWORKS_RTP
)
5355 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
5356 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5357 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5359 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
5361 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5362 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
5366 /* We use an UNSPEC rather than a LABEL_REF because this label
5367 never appears in the code stream. */
5369 labelno
= GEN_INT (pic_labelno
++);
5370 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5371 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5373 /* On the ARM the PC register contains 'dot + 8' at the time of the
5374 addition, on the Thumb it is 'dot + 4'. */
5375 pic_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5376 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
5378 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5382 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5384 emit_insn (gen_pic_add_dot_plus_eight (pic_reg
, pic_reg
, labelno
));
5386 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5388 else /* TARGET_THUMB1 */
5390 if (arm_pic_register
!= INVALID_REGNUM
5391 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
5393 /* We will have pushed the pic register, so we should always be
5394 able to find a work register. */
5395 pic_tmp
= gen_rtx_REG (SImode
,
5396 thumb_find_work_register (saved_regs
));
5397 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
5398 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
5401 emit_insn (gen_pic_load_addr_thumb1 (pic_reg
, pic_rtx
));
5402 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5406 /* Need to emit this whether or not we obey regdecls,
5407 since setjmp/longjmp can cause life info to screw up. */
5411 /* Generate code to load the address of a static var when flag_pic is set. */
5413 arm_pic_static_addr (rtx orig
, rtx reg
)
5415 rtx l1
, labelno
, offset_rtx
, insn
;
5417 gcc_assert (flag_pic
);
5419 /* We use an UNSPEC rather than a LABEL_REF because this label
5420 never appears in the code stream. */
5421 labelno
= GEN_INT (pic_labelno
++);
5422 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5423 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5425 /* On the ARM the PC register contains 'dot + 8' at the time of the
5426 addition, on the Thumb it is 'dot + 4'. */
5427 offset_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5428 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
5429 UNSPEC_SYMBOL_OFFSET
);
5430 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
5434 emit_insn (gen_pic_load_addr_32bit (reg
, offset_rtx
));
5436 insn
= emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5438 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5440 else /* TARGET_THUMB1 */
5442 emit_insn (gen_pic_load_addr_thumb1 (reg
, offset_rtx
));
5443 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5449 /* Return nonzero if X is valid as an ARM state addressing register. */
5451 arm_address_register_rtx_p (rtx x
, int strict_p
)
5455 if (GET_CODE (x
) != REG
)
5461 return ARM_REGNO_OK_FOR_BASE_P (regno
);
5463 return (regno
<= LAST_ARM_REGNUM
5464 || regno
>= FIRST_PSEUDO_REGISTER
5465 || regno
== FRAME_POINTER_REGNUM
5466 || regno
== ARG_POINTER_REGNUM
);
5469 /* Return TRUE if this rtx is the difference of a symbol and a label,
5470 and will reduce to a PC-relative relocation in the object file.
5471 Expressions like this can be left alone when generating PIC, rather
5472 than forced through the GOT. */
5474 pcrel_constant_p (rtx x
)
5476 if (GET_CODE (x
) == MINUS
)
5477 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
5482 /* Return true if X will surely end up in an index register after next
5485 will_be_in_index_register (const_rtx x
)
5487 /* arm.md: calculate_pic_address will split this into a register. */
5488 return GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_SYM
;
5491 /* Return nonzero if X is a valid ARM state address operand. */
5493 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
5497 enum rtx_code code
= GET_CODE (x
);
5499 if (arm_address_register_rtx_p (x
, strict_p
))
5502 use_ldrd
= (TARGET_LDRD
5504 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5506 if (code
== POST_INC
|| code
== PRE_DEC
5507 || ((code
== PRE_INC
|| code
== POST_DEC
)
5508 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5509 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5511 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5512 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5513 && GET_CODE (XEXP (x
, 1)) == PLUS
5514 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5516 rtx addend
= XEXP (XEXP (x
, 1), 1);
5518 /* Don't allow ldrd post increment by register because it's hard
5519 to fixup invalid register choices. */
5521 && GET_CODE (x
) == POST_MODIFY
5522 && GET_CODE (addend
) == REG
)
5525 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
5526 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
5529 /* After reload constants split into minipools will have addresses
5530 from a LABEL_REF. */
5531 else if (reload_completed
5532 && (code
== LABEL_REF
5534 && GET_CODE (XEXP (x
, 0)) == PLUS
5535 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5536 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5539 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5542 else if (code
== PLUS
)
5544 rtx xop0
= XEXP (x
, 0);
5545 rtx xop1
= XEXP (x
, 1);
5547 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5548 && ((GET_CODE(xop1
) == CONST_INT
5549 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
5550 || (!strict_p
&& will_be_in_index_register (xop1
))))
5551 || (arm_address_register_rtx_p (xop1
, strict_p
)
5552 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
5556 /* Reload currently can't handle MINUS, so disable this for now */
5557 else if (GET_CODE (x
) == MINUS
)
5559 rtx xop0
= XEXP (x
, 0);
5560 rtx xop1
= XEXP (x
, 1);
5562 return (arm_address_register_rtx_p (xop0
, strict_p
)
5563 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
5567 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5568 && code
== SYMBOL_REF
5569 && CONSTANT_POOL_ADDRESS_P (x
)
5571 && symbol_mentioned_p (get_pool_constant (x
))
5572 && ! pcrel_constant_p (get_pool_constant (x
))))
5578 /* Return nonzero if X is a valid Thumb-2 address operand. */
5580 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5583 enum rtx_code code
= GET_CODE (x
);
5585 if (arm_address_register_rtx_p (x
, strict_p
))
5588 use_ldrd
= (TARGET_LDRD
5590 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5592 if (code
== POST_INC
|| code
== PRE_DEC
5593 || ((code
== PRE_INC
|| code
== POST_DEC
)
5594 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5595 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5597 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5598 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5599 && GET_CODE (XEXP (x
, 1)) == PLUS
5600 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5602 /* Thumb-2 only has autoincrement by constant. */
5603 rtx addend
= XEXP (XEXP (x
, 1), 1);
5604 HOST_WIDE_INT offset
;
5606 if (GET_CODE (addend
) != CONST_INT
)
5609 offset
= INTVAL(addend
);
5610 if (GET_MODE_SIZE (mode
) <= 4)
5611 return (offset
> -256 && offset
< 256);
5613 return (use_ldrd
&& offset
> -1024 && offset
< 1024
5614 && (offset
& 3) == 0);
5617 /* After reload constants split into minipools will have addresses
5618 from a LABEL_REF. */
5619 else if (reload_completed
5620 && (code
== LABEL_REF
5622 && GET_CODE (XEXP (x
, 0)) == PLUS
5623 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5624 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5627 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5630 else if (code
== PLUS
)
5632 rtx xop0
= XEXP (x
, 0);
5633 rtx xop1
= XEXP (x
, 1);
5635 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5636 && (thumb2_legitimate_index_p (mode
, xop1
, strict_p
)
5637 || (!strict_p
&& will_be_in_index_register (xop1
))))
5638 || (arm_address_register_rtx_p (xop1
, strict_p
)
5639 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
5642 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5643 && code
== SYMBOL_REF
5644 && CONSTANT_POOL_ADDRESS_P (x
)
5646 && symbol_mentioned_p (get_pool_constant (x
))
5647 && ! pcrel_constant_p (get_pool_constant (x
))))
5653 /* Return nonzero if INDEX is valid for an address index operand in
5656 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
5659 HOST_WIDE_INT range
;
5660 enum rtx_code code
= GET_CODE (index
);
5662 /* Standard coprocessor addressing modes. */
5663 if (TARGET_HARD_FLOAT
5664 && (TARGET_VFP
|| TARGET_FPA
|| TARGET_MAVERICK
)
5665 && (mode
== SFmode
|| mode
== DFmode
5666 || (TARGET_MAVERICK
&& mode
== DImode
)))
5667 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5668 && INTVAL (index
) > -1024
5669 && (INTVAL (index
) & 3) == 0);
5671 /* For quad modes, we restrict the constant offset to be slightly less
5672 than what the instruction format permits. We do this because for
5673 quad mode moves, we will actually decompose them into two separate
5674 double-mode reads or writes. INDEX must therefore be a valid
5675 (double-mode) offset and so should INDEX+8. */
5676 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
5677 return (code
== CONST_INT
5678 && INTVAL (index
) < 1016
5679 && INTVAL (index
) > -1024
5680 && (INTVAL (index
) & 3) == 0);
5682 /* We have no such constraint on double mode offsets, so we permit the
5683 full range of the instruction format. */
5684 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
5685 return (code
== CONST_INT
5686 && INTVAL (index
) < 1024
5687 && INTVAL (index
) > -1024
5688 && (INTVAL (index
) & 3) == 0);
5690 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5691 return (code
== CONST_INT
5692 && INTVAL (index
) < 1024
5693 && INTVAL (index
) > -1024
5694 && (INTVAL (index
) & 3) == 0);
5696 if (arm_address_register_rtx_p (index
, strict_p
)
5697 && (GET_MODE_SIZE (mode
) <= 4))
5700 if (mode
== DImode
|| mode
== DFmode
)
5702 if (code
== CONST_INT
)
5704 HOST_WIDE_INT val
= INTVAL (index
);
5707 return val
> -256 && val
< 256;
5709 return val
> -4096 && val
< 4092;
5712 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
5715 if (GET_MODE_SIZE (mode
) <= 4
5719 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
5723 rtx xiop0
= XEXP (index
, 0);
5724 rtx xiop1
= XEXP (index
, 1);
5726 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5727 && power_of_two_operand (xiop1
, SImode
))
5728 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5729 && power_of_two_operand (xiop0
, SImode
)));
5731 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
5732 || code
== ASHIFT
|| code
== ROTATERT
)
5734 rtx op
= XEXP (index
, 1);
5736 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5737 && GET_CODE (op
) == CONST_INT
5739 && INTVAL (op
) <= 31);
5743 /* For ARM v4 we may be doing a sign-extend operation during the
5749 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
5755 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
5757 return (code
== CONST_INT
5758 && INTVAL (index
) < range
5759 && INTVAL (index
) > -range
);
5762 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5763 index operand. i.e. 1, 2, 4 or 8. */
5765 thumb2_index_mul_operand (rtx op
)
5769 if (GET_CODE(op
) != CONST_INT
)
5773 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
5776 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5778 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
5780 enum rtx_code code
= GET_CODE (index
);
5782 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5783 /* Standard coprocessor addressing modes. */
5784 if (TARGET_HARD_FLOAT
5785 && (TARGET_VFP
|| TARGET_FPA
|| TARGET_MAVERICK
)
5786 && (mode
== SFmode
|| mode
== DFmode
5787 || (TARGET_MAVERICK
&& mode
== DImode
)))
5788 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5789 /* Thumb-2 allows only > -256 index range for it's core register
5790 load/stores. Since we allow SF/DF in core registers, we have
5791 to use the intersection between -256~4096 (core) and -1024~1024
5793 && INTVAL (index
) > -256
5794 && (INTVAL (index
) & 3) == 0);
5796 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5798 /* For DImode assume values will usually live in core regs
5799 and only allow LDRD addressing modes. */
5800 if (!TARGET_LDRD
|| mode
!= DImode
)
5801 return (code
== CONST_INT
5802 && INTVAL (index
) < 1024
5803 && INTVAL (index
) > -1024
5804 && (INTVAL (index
) & 3) == 0);
5807 /* For quad modes, we restrict the constant offset to be slightly less
5808 than what the instruction format permits. We do this because for
5809 quad mode moves, we will actually decompose them into two separate
5810 double-mode reads or writes. INDEX must therefore be a valid
5811 (double-mode) offset and so should INDEX+8. */
5812 if (TARGET_NEON
&& VALID_NEON_QREG_MODE (mode
))
5813 return (code
== CONST_INT
5814 && INTVAL (index
) < 1016
5815 && INTVAL (index
) > -1024
5816 && (INTVAL (index
) & 3) == 0);
5818 /* We have no such constraint on double mode offsets, so we permit the
5819 full range of the instruction format. */
5820 if (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
))
5821 return (code
== CONST_INT
5822 && INTVAL (index
) < 1024
5823 && INTVAL (index
) > -1024
5824 && (INTVAL (index
) & 3) == 0);
5826 if (arm_address_register_rtx_p (index
, strict_p
)
5827 && (GET_MODE_SIZE (mode
) <= 4))
5830 if (mode
== DImode
|| mode
== DFmode
)
5832 if (code
== CONST_INT
)
5834 HOST_WIDE_INT val
= INTVAL (index
);
5835 /* ??? Can we assume ldrd for thumb2? */
5836 /* Thumb-2 ldrd only has reg+const addressing modes. */
5837 /* ldrd supports offsets of +-1020.
5838 However the ldr fallback does not. */
5839 return val
> -256 && val
< 256 && (val
& 3) == 0;
5847 rtx xiop0
= XEXP (index
, 0);
5848 rtx xiop1
= XEXP (index
, 1);
5850 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5851 && thumb2_index_mul_operand (xiop1
))
5852 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5853 && thumb2_index_mul_operand (xiop0
)));
5855 else if (code
== ASHIFT
)
5857 rtx op
= XEXP (index
, 1);
5859 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5860 && GET_CODE (op
) == CONST_INT
5862 && INTVAL (op
) <= 3);
5865 return (code
== CONST_INT
5866 && INTVAL (index
) < 4096
5867 && INTVAL (index
) > -256);
5870 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5872 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
5876 if (GET_CODE (x
) != REG
)
5882 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
5884 return (regno
<= LAST_LO_REGNUM
5885 || regno
> LAST_VIRTUAL_REGISTER
5886 || regno
== FRAME_POINTER_REGNUM
5887 || (GET_MODE_SIZE (mode
) >= 4
5888 && (regno
== STACK_POINTER_REGNUM
5889 || regno
>= FIRST_PSEUDO_REGISTER
5890 || x
== hard_frame_pointer_rtx
5891 || x
== arg_pointer_rtx
)));
5894 /* Return nonzero if x is a legitimate index register. This is the case
5895 for any base register that can access a QImode object. */
5897 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
5899 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
5902 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5904 The AP may be eliminated to either the SP or the FP, so we use the
5905 least common denominator, e.g. SImode, and offsets from 0 to 64.
5907 ??? Verify whether the above is the right approach.
5909 ??? Also, the FP may be eliminated to the SP, so perhaps that
5910 needs special handling also.
5912 ??? Look at how the mips16 port solves this problem. It probably uses
5913 better ways to solve some of these problems.
5915 Although it is not incorrect, we don't accept QImode and HImode
5916 addresses based on the frame pointer or arg pointer until the
5917 reload pass starts. This is so that eliminating such addresses
5918 into stack based ones won't produce impossible code. */
5920 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5922 /* ??? Not clear if this is right. Experiment. */
5923 if (GET_MODE_SIZE (mode
) < 4
5924 && !(reload_in_progress
|| reload_completed
)
5925 && (reg_mentioned_p (frame_pointer_rtx
, x
)
5926 || reg_mentioned_p (arg_pointer_rtx
, x
)
5927 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
5928 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
5929 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
5930 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
5933 /* Accept any base register. SP only in SImode or larger. */
5934 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
5937 /* This is PC relative data before arm_reorg runs. */
5938 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
5939 && GET_CODE (x
) == SYMBOL_REF
5940 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
5943 /* This is PC relative data after arm_reorg runs. */
5944 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
5946 && (GET_CODE (x
) == LABEL_REF
5947 || (GET_CODE (x
) == CONST
5948 && GET_CODE (XEXP (x
, 0)) == PLUS
5949 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5950 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5953 /* Post-inc indexing only supported for SImode and larger. */
5954 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
5955 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
5958 else if (GET_CODE (x
) == PLUS
)
5960 /* REG+REG address can be any two index registers. */
5961 /* We disallow FRAME+REG addressing since we know that FRAME
5962 will be replaced with STACK, and SP relative addressing only
5963 permits SP+OFFSET. */
5964 if (GET_MODE_SIZE (mode
) <= 4
5965 && XEXP (x
, 0) != frame_pointer_rtx
5966 && XEXP (x
, 1) != frame_pointer_rtx
5967 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5968 && (thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
)
5969 || (!strict_p
&& will_be_in_index_register (XEXP (x
, 1)))))
5972 /* REG+const has 5-7 bit offset for non-SP registers. */
5973 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5974 || XEXP (x
, 0) == arg_pointer_rtx
)
5975 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5976 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
5979 /* REG+const has 10-bit offset for SP, but only SImode and
5980 larger is supported. */
5981 /* ??? Should probably check for DI/DFmode overflow here
5982 just like GO_IF_LEGITIMATE_OFFSET does. */
5983 else if (GET_CODE (XEXP (x
, 0)) == REG
5984 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
5985 && GET_MODE_SIZE (mode
) >= 4
5986 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5987 && INTVAL (XEXP (x
, 1)) >= 0
5988 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
5989 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5992 else if (GET_CODE (XEXP (x
, 0)) == REG
5993 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
5994 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
5995 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
5996 && REGNO (XEXP (x
, 0))
5997 <= LAST_VIRTUAL_POINTER_REGISTER
))
5998 && GET_MODE_SIZE (mode
) >= 4
5999 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6000 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
6004 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
6005 && GET_MODE_SIZE (mode
) == 4
6006 && GET_CODE (x
) == SYMBOL_REF
6007 && CONSTANT_POOL_ADDRESS_P (x
)
6009 && symbol_mentioned_p (get_pool_constant (x
))
6010 && ! pcrel_constant_p (get_pool_constant (x
))))
6016 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
6017 instruction of mode MODE. */
6019 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
6021 switch (GET_MODE_SIZE (mode
))
6024 return val
>= 0 && val
< 32;
6027 return val
>= 0 && val
< 64 && (val
& 1) == 0;
6031 && (val
+ GET_MODE_SIZE (mode
)) <= 128
6037 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
6040 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
6041 else if (TARGET_THUMB2
)
6042 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
6043 else /* if (TARGET_THUMB1) */
6044 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
6047 /* Build the SYMBOL_REF for __tls_get_addr. */
6049 static GTY(()) rtx tls_get_addr_libfunc
;
6052 get_tls_get_addr (void)
6054 if (!tls_get_addr_libfunc
)
6055 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
6056 return tls_get_addr_libfunc
;
6060 arm_load_tp (rtx target
)
6063 target
= gen_reg_rtx (SImode
);
6067 /* Can return in any reg. */
6068 emit_insn (gen_load_tp_hard (target
));
6072 /* Always returned in r0. Immediately copy the result into a pseudo,
6073 otherwise other uses of r0 (e.g. setting up function arguments) may
6074 clobber the value. */
6078 emit_insn (gen_load_tp_soft ());
6080 tmp
= gen_rtx_REG (SImode
, 0);
6081 emit_move_insn (target
, tmp
);
6087 load_tls_operand (rtx x
, rtx reg
)
6091 if (reg
== NULL_RTX
)
6092 reg
= gen_reg_rtx (SImode
);
6094 tmp
= gen_rtx_CONST (SImode
, x
);
6096 emit_move_insn (reg
, tmp
);
6102 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
6104 rtx insns
, label
, labelno
, sum
;
6108 labelno
= GEN_INT (pic_labelno
++);
6109 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6110 label
= gen_rtx_CONST (VOIDmode
, label
);
6112 sum
= gen_rtx_UNSPEC (Pmode
,
6113 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
6114 GEN_INT (TARGET_ARM
? 8 : 4)),
6116 reg
= load_tls_operand (sum
, reg
);
6119 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
6120 else if (TARGET_THUMB2
)
6121 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6122 else /* TARGET_THUMB1 */
6123 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6125 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
, LCT_PURE
, /* LCT_CONST? */
6126 Pmode
, 1, reg
, Pmode
);
6128 insns
= get_insns ();
6135 legitimize_tls_address (rtx x
, rtx reg
)
6137 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
6138 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
6142 case TLS_MODEL_GLOBAL_DYNAMIC
:
6143 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
6144 dest
= gen_reg_rtx (Pmode
);
6145 emit_libcall_block (insns
, dest
, ret
, x
);
6148 case TLS_MODEL_LOCAL_DYNAMIC
:
6149 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
6151 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
6152 share the LDM result with other LD model accesses. */
6153 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
6155 dest
= gen_reg_rtx (Pmode
);
6156 emit_libcall_block (insns
, dest
, ret
, eqv
);
6158 /* Load the addend. */
6159 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
, GEN_INT (TLS_LDO32
)),
6161 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
6162 return gen_rtx_PLUS (Pmode
, dest
, addend
);
6164 case TLS_MODEL_INITIAL_EXEC
:
6165 labelno
= GEN_INT (pic_labelno
++);
6166 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
6167 label
= gen_rtx_CONST (VOIDmode
, label
);
6168 sum
= gen_rtx_UNSPEC (Pmode
,
6169 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
6170 GEN_INT (TARGET_ARM
? 8 : 4)),
6172 reg
= load_tls_operand (sum
, reg
);
6175 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
6176 else if (TARGET_THUMB2
)
6177 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
6180 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
6181 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
6184 tp
= arm_load_tp (NULL_RTX
);
6186 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6188 case TLS_MODEL_LOCAL_EXEC
:
6189 tp
= arm_load_tp (NULL_RTX
);
6191 reg
= gen_rtx_UNSPEC (Pmode
,
6192 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
6194 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
6196 return gen_rtx_PLUS (Pmode
, tp
, reg
);
6203 /* Try machine-dependent ways of modifying an illegitimate address
6204 to be legitimate. If we find one, return the new, valid address. */
6206 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6210 /* TODO: legitimize_address for Thumb2. */
6213 return thumb_legitimize_address (x
, orig_x
, mode
);
6216 if (arm_tls_symbol_p (x
))
6217 return legitimize_tls_address (x
, NULL_RTX
);
6219 if (GET_CODE (x
) == PLUS
)
6221 rtx xop0
= XEXP (x
, 0);
6222 rtx xop1
= XEXP (x
, 1);
6224 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
6225 xop0
= force_reg (SImode
, xop0
);
6227 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
6228 xop1
= force_reg (SImode
, xop1
);
6230 if (ARM_BASE_REGISTER_RTX_P (xop0
)
6231 && GET_CODE (xop1
) == CONST_INT
)
6233 HOST_WIDE_INT n
, low_n
;
6237 /* VFP addressing modes actually allow greater offsets, but for
6238 now we just stick with the lowest common denominator. */
6240 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
6252 low_n
= ((mode
) == TImode
? 0
6253 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
6257 base_reg
= gen_reg_rtx (SImode
);
6258 val
= force_operand (plus_constant (xop0
, n
), NULL_RTX
);
6259 emit_move_insn (base_reg
, val
);
6260 x
= plus_constant (base_reg
, low_n
);
6262 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6263 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6266 /* XXX We don't allow MINUS any more -- see comment in
6267 arm_legitimate_address_outer_p (). */
6268 else if (GET_CODE (x
) == MINUS
)
6270 rtx xop0
= XEXP (x
, 0);
6271 rtx xop1
= XEXP (x
, 1);
6273 if (CONSTANT_P (xop0
))
6274 xop0
= force_reg (SImode
, xop0
);
6276 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
6277 xop1
= force_reg (SImode
, xop1
);
6279 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
6280 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
6283 /* Make sure to take full advantage of the pre-indexed addressing mode
6284 with absolute addresses which often allows for the base register to
6285 be factorized for multiple adjacent memory references, and it might
6286 even allows for the mini pool to be avoided entirely. */
6287 else if (GET_CODE (x
) == CONST_INT
&& optimize
> 0)
6290 HOST_WIDE_INT mask
, base
, index
;
6293 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6294 use a 8-bit index. So let's use a 12-bit index for SImode only and
6295 hope that arm_gen_constant will enable ldrb to use more bits. */
6296 bits
= (mode
== SImode
) ? 12 : 8;
6297 mask
= (1 << bits
) - 1;
6298 base
= INTVAL (x
) & ~mask
;
6299 index
= INTVAL (x
) & mask
;
6300 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
6302 /* It'll most probably be more efficient to generate the base
6303 with more bits set and use a negative index instead. */
6307 base_reg
= force_reg (SImode
, GEN_INT (base
));
6308 x
= plus_constant (base_reg
, index
);
6313 /* We need to find and carefully transform any SYMBOL and LABEL
6314 references; so go back to the original address expression. */
6315 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6317 if (new_x
!= orig_x
)
6325 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6326 to be legitimate. If we find one, return the new, valid address. */
6328 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6330 if (arm_tls_symbol_p (x
))
6331 return legitimize_tls_address (x
, NULL_RTX
);
6333 if (GET_CODE (x
) == PLUS
6334 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6335 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
6336 || INTVAL (XEXP (x
, 1)) < 0))
6338 rtx xop0
= XEXP (x
, 0);
6339 rtx xop1
= XEXP (x
, 1);
6340 HOST_WIDE_INT offset
= INTVAL (xop1
);
6342 /* Try and fold the offset into a biasing of the base register and
6343 then offsetting that. Don't do this when optimizing for space
6344 since it can cause too many CSEs. */
6345 if (optimize_size
&& offset
>= 0
6346 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
6348 HOST_WIDE_INT delta
;
6351 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
6352 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
6353 delta
= 31 * GET_MODE_SIZE (mode
);
6355 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
6357 xop0
= force_operand (plus_constant (xop0
, offset
- delta
),
6359 x
= plus_constant (xop0
, delta
);
6361 else if (offset
< 0 && offset
> -256)
6362 /* Small negative offsets are best done with a subtract before the
6363 dereference, forcing these into a register normally takes two
6365 x
= force_operand (x
, NULL_RTX
);
6368 /* For the remaining cases, force the constant into a register. */
6369 xop1
= force_reg (SImode
, xop1
);
6370 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6373 else if (GET_CODE (x
) == PLUS
6374 && s_register_operand (XEXP (x
, 1), SImode
)
6375 && !s_register_operand (XEXP (x
, 0), SImode
))
6377 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
6379 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
6384 /* We need to find and carefully transform any SYMBOL and LABEL
6385 references; so go back to the original address expression. */
6386 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6388 if (new_x
!= orig_x
)
6396 thumb_legitimize_reload_address (rtx
*x_p
,
6397 enum machine_mode mode
,
6398 int opnum
, int type
,
6399 int ind_levels ATTRIBUTE_UNUSED
)
6403 if (GET_CODE (x
) == PLUS
6404 && GET_MODE_SIZE (mode
) < 4
6405 && REG_P (XEXP (x
, 0))
6406 && XEXP (x
, 0) == stack_pointer_rtx
6407 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6408 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6413 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6414 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6418 /* If both registers are hi-regs, then it's better to reload the
6419 entire expression rather than each register individually. That
6420 only requires one reload register rather than two. */
6421 if (GET_CODE (x
) == PLUS
6422 && REG_P (XEXP (x
, 0))
6423 && REG_P (XEXP (x
, 1))
6424 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
6425 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
6430 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6431 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6438 /* Test for various thread-local symbols. */
6440 /* Return TRUE if X is a thread-local symbol. */
6443 arm_tls_symbol_p (rtx x
)
6445 if (! TARGET_HAVE_TLS
)
6448 if (GET_CODE (x
) != SYMBOL_REF
)
6451 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6454 /* Helper for arm_tls_referenced_p. */
6457 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
6459 if (GET_CODE (*x
) == SYMBOL_REF
)
6460 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
6462 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6463 TLS offsets, not real symbol references. */
6464 if (GET_CODE (*x
) == UNSPEC
6465 && XINT (*x
, 1) == UNSPEC_TLS
)
6471 /* Return TRUE if X contains any TLS symbol references. */
6474 arm_tls_referenced_p (rtx x
)
6476 if (! TARGET_HAVE_TLS
)
6479 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
6482 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6485 arm_cannot_force_const_mem (rtx x
)
6489 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
6491 split_const (x
, &base
, &offset
);
6492 if (GET_CODE (base
) == SYMBOL_REF
6493 && !offset_within_block_p (base
, INTVAL (offset
)))
6496 return arm_tls_referenced_p (x
);
6499 #define REG_OR_SUBREG_REG(X) \
6500 (GET_CODE (X) == REG \
6501 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6503 #define REG_OR_SUBREG_RTX(X) \
6504 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6507 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
6509 enum machine_mode mode
= GET_MODE (x
);
6523 return COSTS_N_INSNS (1);
6526 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6529 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
6536 return COSTS_N_INSNS (2) + cycles
;
6538 return COSTS_N_INSNS (1) + 16;
6541 return (COSTS_N_INSNS (1)
6542 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
6543 + GET_CODE (SET_DEST (x
)) == MEM
));
6548 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
6550 if (thumb_shiftable_const (INTVAL (x
)))
6551 return COSTS_N_INSNS (2);
6552 return COSTS_N_INSNS (3);
6554 else if ((outer
== PLUS
|| outer
== COMPARE
)
6555 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
6557 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
6558 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
6559 return COSTS_N_INSNS (1);
6560 else if (outer
== AND
)
6563 /* This duplicates the tests in the andsi3 expander. */
6564 for (i
= 9; i
<= 31; i
++)
6565 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
6566 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
6567 return COSTS_N_INSNS (2);
6569 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
6570 || outer
== LSHIFTRT
)
6572 return COSTS_N_INSNS (2);
6578 return COSTS_N_INSNS (3);
6596 /* XXX another guess. */
6597 /* Memory costs quite a lot for the first word, but subsequent words
6598 load at the equivalent of a single insn each. */
6599 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
6600 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
6605 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6611 total
= mode
== DImode
? COSTS_N_INSNS (1) : 0;
6612 total
+= thumb1_rtx_costs (XEXP (x
, 0), GET_CODE (XEXP (x
, 0)), code
);
6618 return total
+ COSTS_N_INSNS (1);
6620 /* Assume a two-shift sequence. Increase the cost slightly so
6621 we prefer actual shifts over an extend operation. */
6622 return total
+ 1 + COSTS_N_INSNS (2);
6630 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
6632 enum machine_mode mode
= GET_MODE (x
);
6633 enum rtx_code subcode
;
6635 enum rtx_code code
= GET_CODE (x
);
6641 /* Memory costs quite a lot for the first word, but subsequent words
6642 load at the equivalent of a single insn each. */
6643 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
6650 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
6651 *total
= COSTS_N_INSNS (2);
6652 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
6653 *total
= COSTS_N_INSNS (4);
6655 *total
= COSTS_N_INSNS (20);
6659 if (GET_CODE (XEXP (x
, 1)) == REG
)
6660 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
6661 else if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6662 *total
= rtx_cost (XEXP (x
, 1), code
, speed
);
6668 *total
+= COSTS_N_INSNS (4);
6673 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
6674 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6677 *total
+= COSTS_N_INSNS (3);
6681 *total
+= COSTS_N_INSNS (1);
6682 /* Increase the cost of complex shifts because they aren't any faster,
6683 and reduce dual issue opportunities. */
6684 if (arm_tune_cortex_a9
6685 && outer
!= SET
&& GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6693 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
6694 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6695 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6697 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6701 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6702 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
6704 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6711 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6713 if (TARGET_HARD_FLOAT
6715 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6717 *total
= COSTS_N_INSNS (1);
6718 if (GET_CODE (XEXP (x
, 0)) == CONST_DOUBLE
6719 && arm_const_double_rtx (XEXP (x
, 0)))
6721 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6725 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6726 && arm_const_double_rtx (XEXP (x
, 1)))
6728 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6734 *total
= COSTS_N_INSNS (20);
6738 *total
= COSTS_N_INSNS (1);
6739 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6740 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6742 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6746 subcode
= GET_CODE (XEXP (x
, 1));
6747 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6748 || subcode
== LSHIFTRT
6749 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6751 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6752 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6756 /* A shift as a part of RSB costs no more than RSB itself. */
6757 if (GET_CODE (XEXP (x
, 0)) == MULT
6758 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6760 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, speed
);
6761 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6766 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
6768 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6769 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6773 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
6774 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
6776 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6777 if (GET_CODE (XEXP (XEXP (x
, 1), 0)) == REG
6778 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
6779 *total
+= COSTS_N_INSNS (1);
6787 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
6788 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
6789 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
6791 *total
= COSTS_N_INSNS (1);
6792 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
6794 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6798 /* MLA: All arguments must be registers. We filter out
6799 multiplication by a power of two, so that we fall down into
6801 if (GET_CODE (XEXP (x
, 0)) == MULT
6802 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6804 /* The cost comes from the cost of the multiply. */
6808 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6810 if (TARGET_HARD_FLOAT
6812 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6814 *total
= COSTS_N_INSNS (1);
6815 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6816 && arm_const_double_rtx (XEXP (x
, 1)))
6818 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6825 *total
= COSTS_N_INSNS (20);
6829 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
6830 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
6832 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, speed
);
6833 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
6834 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
6835 *total
+= COSTS_N_INSNS (1);
6841 case AND
: case XOR
: case IOR
:
6843 /* Normally the frame registers will be spilt into reg+const during
6844 reload, so it is a bad idea to combine them with other instructions,
6845 since then they might not be moved outside of loops. As a compromise
6846 we allow integration with ops that have a constant as their second
6848 if (REG_OR_SUBREG_REG (XEXP (x
, 0))
6849 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
6850 && GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6851 *total
= COSTS_N_INSNS (1);
6855 *total
+= COSTS_N_INSNS (2);
6856 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6857 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6859 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6866 *total
+= COSTS_N_INSNS (1);
6867 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6868 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6870 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6873 subcode
= GET_CODE (XEXP (x
, 0));
6874 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6875 || subcode
== LSHIFTRT
6876 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6878 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6879 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6884 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6886 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6887 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6891 if (subcode
== UMIN
|| subcode
== UMAX
6892 || subcode
== SMIN
|| subcode
== SMAX
)
6894 *total
= COSTS_N_INSNS (3);
6901 /* This should have been handled by the CPU specific routines. */
6905 if (arm_arch3m
&& mode
== SImode
6906 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6907 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6908 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
6909 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
6910 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6911 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
6913 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, speed
);
6916 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6920 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6922 if (TARGET_HARD_FLOAT
6924 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6926 *total
= COSTS_N_INSNS (1);
6929 *total
= COSTS_N_INSNS (2);
6935 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
6936 if (mode
== SImode
&& code
== NOT
)
6938 subcode
= GET_CODE (XEXP (x
, 0));
6939 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6940 || subcode
== LSHIFTRT
6941 || subcode
== ROTATE
|| subcode
== ROTATERT
6943 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
6945 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6946 /* Register shifts cost an extra cycle. */
6947 if (GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
6948 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
6957 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6959 *total
= COSTS_N_INSNS (4);
6963 operand
= XEXP (x
, 0);
6965 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
6966 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
6967 && GET_CODE (XEXP (operand
, 0)) == REG
6968 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
6969 *total
+= COSTS_N_INSNS (1);
6970 *total
+= (rtx_cost (XEXP (x
, 1), code
, speed
)
6971 + rtx_cost (XEXP (x
, 2), code
, speed
));
6975 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6977 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6983 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6984 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6986 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6992 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6993 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6995 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
7015 /* SCC insns. In the case where the comparison has already been
7016 performed, then they cost 2 instructions. Otherwise they need
7017 an additional comparison before them. */
7018 *total
= COSTS_N_INSNS (2);
7019 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7026 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
7032 *total
+= COSTS_N_INSNS (1);
7033 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
7034 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
7036 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7040 subcode
= GET_CODE (XEXP (x
, 0));
7041 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
7042 || subcode
== LSHIFTRT
7043 || subcode
== ROTATE
|| subcode
== ROTATERT
)
7045 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7046 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
7051 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7053 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7054 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
7064 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
7065 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
7066 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
7067 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
7071 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7073 if (TARGET_HARD_FLOAT
7075 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7077 *total
= COSTS_N_INSNS (1);
7080 *total
= COSTS_N_INSNS (20);
7083 *total
= COSTS_N_INSNS (1);
7085 *total
+= COSTS_N_INSNS (3);
7091 if (GET_MODE_CLASS (mode
) == MODE_INT
)
7093 rtx op
= XEXP (x
, 0);
7094 enum machine_mode opmode
= GET_MODE (op
);
7097 *total
+= COSTS_N_INSNS (1);
7099 if (opmode
!= SImode
)
7103 /* If !arm_arch4, we use one of the extendhisi2_mem
7104 or movhi_bytes patterns for HImode. For a QImode
7105 sign extension, we first zero-extend from memory
7106 and then perform a shift sequence. */
7107 if (!arm_arch4
&& (opmode
!= QImode
|| code
== SIGN_EXTEND
))
7108 *total
+= COSTS_N_INSNS (2);
7111 *total
+= COSTS_N_INSNS (1);
7113 /* We don't have the necessary insn, so we need to perform some
7115 else if (TARGET_ARM
&& code
== ZERO_EXTEND
&& mode
== QImode
)
7116 /* An and with constant 255. */
7117 *total
+= COSTS_N_INSNS (1);
7119 /* A shift sequence. Increase costs slightly to avoid
7120 combining two shifts into an extend operation. */
7121 *total
+= COSTS_N_INSNS (2) + 1;
7127 switch (GET_MODE (XEXP (x
, 0)))
7134 *total
= COSTS_N_INSNS (1);
7144 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
7148 if (const_ok_for_arm (INTVAL (x
))
7149 || const_ok_for_arm (~INTVAL (x
)))
7150 *total
= COSTS_N_INSNS (1);
7152 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
7153 INTVAL (x
), NULL_RTX
,
7160 *total
= COSTS_N_INSNS (3);
7164 *total
= COSTS_N_INSNS (1);
7168 *total
= COSTS_N_INSNS (1);
7169 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7173 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
7174 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7175 *total
= COSTS_N_INSNS (1);
7177 *total
= COSTS_N_INSNS (4);
7181 *total
= COSTS_N_INSNS (4);
7186 /* Estimates the size cost of thumb1 instructions.
7187 For now most of the code is copied from thumb1_rtx_costs. We need more
7188 fine grain tuning when we have more related test cases. */
7190 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
7192 enum machine_mode mode
= GET_MODE (x
);
7205 return COSTS_N_INSNS (1);
7208 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7210 /* Thumb1 mul instruction can't operate on const. We must Load it
7211 into a register first. */
7212 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
7213 return COSTS_N_INSNS (1) + const_size
;
7215 return COSTS_N_INSNS (1);
7218 return (COSTS_N_INSNS (1)
7219 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
7220 + GET_CODE (SET_DEST (x
)) == MEM
));
7225 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
7226 return COSTS_N_INSNS (1);
7227 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7228 if (INTVAL (x
) >= -255 && INTVAL (x
) <= -1)
7229 return COSTS_N_INSNS (2);
7230 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
7231 if (thumb_shiftable_const (INTVAL (x
)))
7232 return COSTS_N_INSNS (2);
7233 return COSTS_N_INSNS (3);
7235 else if ((outer
== PLUS
|| outer
== COMPARE
)
7236 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
7238 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
7239 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
7240 return COSTS_N_INSNS (1);
7241 else if (outer
== AND
)
7244 /* This duplicates the tests in the andsi3 expander. */
7245 for (i
= 9; i
<= 31; i
++)
7246 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
7247 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
7248 return COSTS_N_INSNS (2);
7250 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
7251 || outer
== LSHIFTRT
)
7253 return COSTS_N_INSNS (2);
7259 return COSTS_N_INSNS (3);
7277 /* XXX another guess. */
7278 /* Memory costs quite a lot for the first word, but subsequent words
7279 load at the equivalent of a single insn each. */
7280 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7281 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7286 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7291 /* XXX still guessing. */
7292 switch (GET_MODE (XEXP (x
, 0)))
7295 return (1 + (mode
== DImode
? 4 : 0)
7296 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7299 return (4 + (mode
== DImode
? 4 : 0)
7300 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7303 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7314 /* RTX costs when optimizing for size. */
7316 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7319 enum machine_mode mode
= GET_MODE (x
);
7322 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
7326 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7330 /* A memory access costs 1 insn if the mode is small, or the address is
7331 a single register, otherwise it costs one insn per word. */
7332 if (REG_P (XEXP (x
, 0)))
7333 *total
= COSTS_N_INSNS (1);
7335 && GET_CODE (XEXP (x
, 0)) == PLUS
7336 && will_be_in_index_register (XEXP (XEXP (x
, 0), 1)))
7337 /* This will be split into two instructions.
7338 See arm.md:calculate_pic_address. */
7339 *total
= COSTS_N_INSNS (2);
7341 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7348 /* Needs a libcall, so it costs about this. */
7349 *total
= COSTS_N_INSNS (2);
7353 if (mode
== SImode
&& GET_CODE (XEXP (x
, 1)) == REG
)
7355 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, false);
7363 if (mode
== DImode
&& GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7365 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, false);
7368 else if (mode
== SImode
)
7370 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, false);
7371 /* Slightly disparage register shifts, but not by much. */
7372 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7373 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, false);
7377 /* Needs a libcall. */
7378 *total
= COSTS_N_INSNS (2);
7382 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7383 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7385 *total
= COSTS_N_INSNS (1);
7391 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
7392 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
7394 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
7395 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
7396 || subcode1
== ROTATE
|| subcode1
== ROTATERT
7397 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
7398 || subcode1
== ASHIFTRT
)
7400 /* It's just the cost of the two operands. */
7405 *total
= COSTS_N_INSNS (1);
7409 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7413 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7414 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7416 *total
= COSTS_N_INSNS (1);
7420 /* A shift as a part of ADD costs nothing. */
7421 if (GET_CODE (XEXP (x
, 0)) == MULT
7422 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7424 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
7425 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, false);
7426 *total
+= rtx_cost (XEXP (x
, 1), code
, false);
7431 case AND
: case XOR
: case IOR
:
7434 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
7436 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
7437 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
7438 || (code
== AND
&& subcode
== NOT
))
7440 /* It's just the cost of the two operands. */
7446 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7450 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7454 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7455 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7457 *total
= COSTS_N_INSNS (1);
7463 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7472 if (cc_register (XEXP (x
, 0), VOIDmode
))
7475 *total
= COSTS_N_INSNS (1);
7479 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7480 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7481 *total
= COSTS_N_INSNS (1);
7483 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
7488 return arm_rtx_costs_1 (x
, outer_code
, total
, 0);
7491 if (const_ok_for_arm (INTVAL (x
)))
7492 /* A multiplication by a constant requires another instruction
7493 to load the constant to a register. */
7494 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
7496 else if (const_ok_for_arm (~INTVAL (x
)))
7497 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
7498 else if (const_ok_for_arm (-INTVAL (x
)))
7500 if (outer_code
== COMPARE
|| outer_code
== PLUS
7501 || outer_code
== MINUS
)
7504 *total
= COSTS_N_INSNS (1);
7507 *total
= COSTS_N_INSNS (2);
7513 *total
= COSTS_N_INSNS (2);
7517 *total
= COSTS_N_INSNS (4);
7522 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7523 cost of these slightly. */
7524 *total
= COSTS_N_INSNS (1) + 1;
7528 if (mode
!= VOIDmode
)
7529 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7531 *total
= COSTS_N_INSNS (4); /* How knows? */
7536 /* RTX costs when optimizing for size. */
7538 arm_rtx_costs (rtx x
, int code
, int outer_code
, int *total
,
7542 return arm_size_rtx_costs (x
, (enum rtx_code
) code
,
7543 (enum rtx_code
) outer_code
, total
);
7545 return current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
7546 (enum rtx_code
) outer_code
,
7550 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7551 supported on any "slowmul" cores, so it can be ignored. */
7554 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7555 int *total
, bool speed
)
7557 enum machine_mode mode
= GET_MODE (x
);
7561 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7568 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
7571 *total
= COSTS_N_INSNS (20);
7575 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7577 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7578 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7579 int cost
, const_ok
= const_ok_for_arm (i
);
7580 int j
, booth_unit_size
;
7582 /* Tune as appropriate. */
7583 cost
= const_ok
? 4 : 8;
7584 booth_unit_size
= 2;
7585 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7587 i
>>= booth_unit_size
;
7591 *total
= COSTS_N_INSNS (cost
);
7592 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7596 *total
= COSTS_N_INSNS (20);
7600 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
7605 /* RTX cost for cores with a fast multiply unit (M variants). */
7608 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7609 int *total
, bool speed
)
7611 enum machine_mode mode
= GET_MODE (x
);
7615 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7619 /* ??? should thumb2 use different costs? */
7623 /* There is no point basing this on the tuning, since it is always the
7624 fast variant if it exists at all. */
7626 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7627 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7628 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7630 *total
= COSTS_N_INSNS(2);
7637 *total
= COSTS_N_INSNS (5);
7641 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7643 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7644 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7645 int cost
, const_ok
= const_ok_for_arm (i
);
7646 int j
, booth_unit_size
;
7648 /* Tune as appropriate. */
7649 cost
= const_ok
? 4 : 8;
7650 booth_unit_size
= 8;
7651 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7653 i
>>= booth_unit_size
;
7657 *total
= COSTS_N_INSNS(cost
);
7663 *total
= COSTS_N_INSNS (4);
7667 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7669 if (TARGET_HARD_FLOAT
7671 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7673 *total
= COSTS_N_INSNS (1);
7678 /* Requires a lib call */
7679 *total
= COSTS_N_INSNS (20);
7683 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7688 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7689 so it can be ignored. */
7692 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7693 int *total
, bool speed
)
7695 enum machine_mode mode
= GET_MODE (x
);
7699 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7706 if (GET_CODE (XEXP (x
, 0)) != MULT
)
7707 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7709 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7710 will stall until the multiplication is complete. */
7711 *total
= COSTS_N_INSNS (3);
7715 /* There is no point basing this on the tuning, since it is always the
7716 fast variant if it exists at all. */
7718 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7719 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7720 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7722 *total
= COSTS_N_INSNS (2);
7729 *total
= COSTS_N_INSNS (5);
7733 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7735 /* If operand 1 is a constant we can more accurately
7736 calculate the cost of the multiply. The multiplier can
7737 retire 15 bits on the first cycle and a further 12 on the
7738 second. We do, of course, have to load the constant into
7739 a register first. */
7740 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7741 /* There's a general overhead of one cycle. */
7743 unsigned HOST_WIDE_INT masked_const
;
7748 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
7750 masked_const
= i
& 0xffff8000;
7751 if (masked_const
!= 0)
7754 masked_const
= i
& 0xf8000000;
7755 if (masked_const
!= 0)
7758 *total
= COSTS_N_INSNS (cost
);
7764 *total
= COSTS_N_INSNS (3);
7768 /* Requires a lib call */
7769 *total
= COSTS_N_INSNS (20);
7773 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7778 /* RTX costs for 9e (and later) cores. */
7781 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7782 int *total
, bool speed
)
7784 enum machine_mode mode
= GET_MODE (x
);
7791 *total
= COSTS_N_INSNS (3);
7795 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7803 /* There is no point basing this on the tuning, since it is always the
7804 fast variant if it exists at all. */
7806 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7807 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7808 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7810 *total
= COSTS_N_INSNS (2);
7817 *total
= COSTS_N_INSNS (5);
7823 *total
= COSTS_N_INSNS (2);
7827 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7829 if (TARGET_HARD_FLOAT
7831 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7833 *total
= COSTS_N_INSNS (1);
7838 *total
= COSTS_N_INSNS (20);
7842 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7845 /* All address computations that can be done are free, but rtx cost returns
7846 the same for practically all of them. So we weight the different types
7847 of address here in the order (most pref first):
7848 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7850 arm_arm_address_cost (rtx x
)
7852 enum rtx_code c
= GET_CODE (x
);
7854 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
7856 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
7861 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7864 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
7874 arm_thumb_address_cost (rtx x
)
7876 enum rtx_code c
= GET_CODE (x
);
7881 && GET_CODE (XEXP (x
, 0)) == REG
7882 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7889 arm_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
7891 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
7894 /* Adjust cost hook for XScale. */
7896 xscale_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
7898 /* Some true dependencies can have a higher cost depending
7899 on precisely how certain input operands are used. */
7900 if (REG_NOTE_KIND(link
) == 0
7901 && recog_memoized (insn
) >= 0
7902 && recog_memoized (dep
) >= 0)
7904 int shift_opnum
= get_attr_shift (insn
);
7905 enum attr_type attr_type
= get_attr_type (dep
);
7907 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7908 operand for INSN. If we have a shifted input operand and the
7909 instruction we depend on is another ALU instruction, then we may
7910 have to account for an additional stall. */
7911 if (shift_opnum
!= 0
7912 && (attr_type
== TYPE_ALU_SHIFT
|| attr_type
== TYPE_ALU_SHIFT_REG
))
7914 rtx shifted_operand
;
7917 /* Get the shifted operand. */
7918 extract_insn (insn
);
7919 shifted_operand
= recog_data
.operand
[shift_opnum
];
7921 /* Iterate over all the operands in DEP. If we write an operand
7922 that overlaps with SHIFTED_OPERAND, then we have increase the
7923 cost of this dependency. */
7925 preprocess_constraints ();
7926 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
7928 /* We can ignore strict inputs. */
7929 if (recog_data
.operand_type
[opno
] == OP_IN
)
7932 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
7944 /* Adjust cost hook for Cortex A9. */
7946 cortex_a9_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
7948 switch (REG_NOTE_KIND (link
))
7955 case REG_DEP_OUTPUT
:
7956 if (recog_memoized (insn
) >= 0
7957 && recog_memoized (dep
) >= 0)
7959 if (GET_CODE (PATTERN (insn
)) == SET
)
7962 (GET_MODE (SET_DEST (PATTERN (insn
)))) == MODE_FLOAT
7964 (GET_MODE (SET_SRC (PATTERN (insn
)))) == MODE_FLOAT
)
7966 enum attr_type attr_type_insn
= get_attr_type (insn
);
7967 enum attr_type attr_type_dep
= get_attr_type (dep
);
7969 /* By default all dependencies of the form
7972 have an extra latency of 1 cycle because
7973 of the input and output dependency in this
7974 case. However this gets modeled as an true
7975 dependency and hence all these checks. */
7976 if (REG_P (SET_DEST (PATTERN (insn
)))
7977 && REG_P (SET_DEST (PATTERN (dep
)))
7978 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn
)),
7979 SET_DEST (PATTERN (dep
))))
7981 /* FMACS is a special case where the dependant
7982 instruction can be issued 3 cycles before
7983 the normal latency in case of an output
7985 if ((attr_type_insn
== TYPE_FMACS
7986 || attr_type_insn
== TYPE_FMACD
)
7987 && (attr_type_dep
== TYPE_FMACS
7988 || attr_type_dep
== TYPE_FMACD
))
7990 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
7991 *cost
= insn_default_latency (dep
) - 3;
7993 *cost
= insn_default_latency (dep
);
7998 if (REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
7999 *cost
= insn_default_latency (dep
) + 1;
8001 *cost
= insn_default_latency (dep
);
8017 /* Adjust cost hook for FA726TE. */
8019 fa726te_sched_adjust_cost (rtx insn
, rtx link
, rtx dep
, int * cost
)
8021 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8022 have penalty of 3. */
8023 if (REG_NOTE_KIND (link
) == REG_DEP_TRUE
8024 && recog_memoized (insn
) >= 0
8025 && recog_memoized (dep
) >= 0
8026 && get_attr_conds (dep
) == CONDS_SET
)
8028 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8029 if (get_attr_conds (insn
) == CONDS_USE
8030 && get_attr_type (insn
) != TYPE_BRANCH
)
8036 if (GET_CODE (PATTERN (insn
)) == COND_EXEC
8037 || get_attr_conds (insn
) == CONDS_USE
)
8047 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8048 It corrects the value of COST based on the relationship between
8049 INSN and DEP through the dependence LINK. It returns the new
8050 value. There is a per-core adjust_cost hook to adjust scheduler costs
8051 and the per-core hook can choose to completely override the generic
8052 adjust_cost function. Only put bits of code into arm_adjust_cost that
8053 are common across all cores. */
8055 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
8059 /* When generating Thumb-1 code, we want to place flag-setting operations
8060 close to a conditional branch which depends on them, so that we can
8061 omit the comparison. */
8063 && REG_NOTE_KIND (link
) == 0
8064 && recog_memoized (insn
) == CODE_FOR_cbranchsi4_insn
8065 && recog_memoized (dep
) >= 0
8066 && get_attr_conds (dep
) == CONDS_SET
)
8069 if (current_tune
->sched_adjust_cost
!= NULL
)
8071 if (!current_tune
->sched_adjust_cost (insn
, link
, dep
, &cost
))
8075 /* XXX This is not strictly true for the FPA. */
8076 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
8077 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
8080 /* Call insns don't incur a stall, even if they follow a load. */
8081 if (REG_NOTE_KIND (link
) == 0
8082 && GET_CODE (insn
) == CALL_INSN
)
8085 if ((i_pat
= single_set (insn
)) != NULL
8086 && GET_CODE (SET_SRC (i_pat
)) == MEM
8087 && (d_pat
= single_set (dep
)) != NULL
8088 && GET_CODE (SET_DEST (d_pat
)) == MEM
)
8090 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
8091 /* This is a load after a store, there is no conflict if the load reads
8092 from a cached area. Assume that loads from the stack, and from the
8093 constant pool are cached, and that others will miss. This is a
8096 if ((GET_CODE (src_mem
) == SYMBOL_REF
8097 && CONSTANT_POOL_ADDRESS_P (src_mem
))
8098 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
8099 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
8100 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
8107 static int fp_consts_inited
= 0;
8109 /* Only zero is valid for VFP. Other values are also valid for FPA. */
8110 static const char * const strings_fp
[8] =
8113 "4", "5", "0.5", "10"
8116 static REAL_VALUE_TYPE values_fp
[8];
8119 init_fp_table (void)
8125 fp_consts_inited
= 1;
8127 fp_consts_inited
= 8;
8129 for (i
= 0; i
< fp_consts_inited
; i
++)
8131 r
= REAL_VALUE_ATOF (strings_fp
[i
], DFmode
);
8136 /* Return TRUE if rtx X is a valid immediate FP constant. */
8138 arm_const_double_rtx (rtx x
)
8143 if (!fp_consts_inited
)
8146 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8147 if (REAL_VALUE_MINUS_ZERO (r
))
8150 for (i
= 0; i
< fp_consts_inited
; i
++)
8151 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
8157 /* Return TRUE if rtx X is a valid immediate FPA constant. */
8159 neg_const_double_rtx_ok_for_fpa (rtx x
)
8164 if (!fp_consts_inited
)
8167 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8168 r
= real_value_negate (&r
);
8169 if (REAL_VALUE_MINUS_ZERO (r
))
8172 for (i
= 0; i
< 8; i
++)
8173 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
8180 /* VFPv3 has a fairly wide range of representable immediates, formed from
8181 "quarter-precision" floating-point values. These can be evaluated using this
8182 formula (with ^ for exponentiation):
8186 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
8187 16 <= n <= 31 and 0 <= r <= 7.
8189 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
8191 - A (most-significant) is the sign bit.
8192 - BCD are the exponent (encoded as r XOR 3).
8193 - EFGH are the mantissa (encoded as n - 16).
8196 /* Return an integer index for a VFPv3 immediate operand X suitable for the
8197 fconst[sd] instruction, or -1 if X isn't suitable. */
8199 vfp3_const_double_index (rtx x
)
8201 REAL_VALUE_TYPE r
, m
;
8203 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
8204 unsigned HOST_WIDE_INT mask
;
8205 HOST_WIDE_INT m1
, m2
;
8206 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8208 if (!TARGET_VFP3
|| GET_CODE (x
) != CONST_DOUBLE
)
8211 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8213 /* We can't represent these things, so detect them first. */
8214 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
8217 /* Extract sign, exponent and mantissa. */
8218 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
8219 r
= real_value_abs (&r
);
8220 exponent
= REAL_EXP (&r
);
8221 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8222 highest (sign) bit, with a fixed binary point at bit point_pos.
8223 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
8224 bits for the mantissa, this may fail (low bits would be lost). */
8225 real_ldexp (&m
, &r
, point_pos
- exponent
);
8226 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
8230 /* If there are bits set in the low part of the mantissa, we can't
8231 represent this value. */
8235 /* Now make it so that mantissa contains the most-significant bits, and move
8236 the point_pos to indicate that the least-significant bits have been
8238 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8241 /* We can permit four significant bits of mantissa only, plus a high bit
8242 which is always 1. */
8243 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8244 if ((mantissa
& mask
) != 0)
8247 /* Now we know the mantissa is in range, chop off the unneeded bits. */
8248 mantissa
>>= point_pos
- 5;
8250 /* The mantissa may be zero. Disallow that case. (It's possible to load the
8251 floating-point immediate zero with Neon using an integer-zero load, but
8252 that case is handled elsewhere.) */
8256 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
8258 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
8259 normalized significands are in the range [1, 2). (Our mantissa is shifted
8260 left 4 places at this point relative to normalized IEEE754 values). GCC
8261 internally uses [0.5, 1) (see real.c), so the exponent returned from
8262 REAL_EXP must be altered. */
8263 exponent
= 5 - exponent
;
8265 if (exponent
< 0 || exponent
> 7)
8268 /* Sign, mantissa and exponent are now in the correct form to plug into the
8269 formula described in the comment above. */
8270 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
8273 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
8275 vfp3_const_double_rtx (rtx x
)
8280 return vfp3_const_double_index (x
) != -1;
8283 /* Recognize immediates which can be used in various Neon instructions. Legal
8284 immediates are described by the following table (for VMVN variants, the
8285 bitwise inverse of the constant shown is recognized. In either case, VMOV
8286 is output and the correct instruction to use for a given constant is chosen
8287 by the assembler). The constant shown is replicated across all elements of
8288 the destination vector.
8290 insn elems variant constant (binary)
8291 ---- ----- ------- -----------------
8292 vmov i32 0 00000000 00000000 00000000 abcdefgh
8293 vmov i32 1 00000000 00000000 abcdefgh 00000000
8294 vmov i32 2 00000000 abcdefgh 00000000 00000000
8295 vmov i32 3 abcdefgh 00000000 00000000 00000000
8296 vmov i16 4 00000000 abcdefgh
8297 vmov i16 5 abcdefgh 00000000
8298 vmvn i32 6 00000000 00000000 00000000 abcdefgh
8299 vmvn i32 7 00000000 00000000 abcdefgh 00000000
8300 vmvn i32 8 00000000 abcdefgh 00000000 00000000
8301 vmvn i32 9 abcdefgh 00000000 00000000 00000000
8302 vmvn i16 10 00000000 abcdefgh
8303 vmvn i16 11 abcdefgh 00000000
8304 vmov i32 12 00000000 00000000 abcdefgh 11111111
8305 vmvn i32 13 00000000 00000000 abcdefgh 11111111
8306 vmov i32 14 00000000 abcdefgh 11111111 11111111
8307 vmvn i32 15 00000000 abcdefgh 11111111 11111111
8309 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
8310 eeeeeeee ffffffff gggggggg hhhhhhhh
8311 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
8313 For case 18, B = !b. Representable values are exactly those accepted by
8314 vfp3_const_double_index, but are output as floating-point numbers rather
8317 Variants 0-5 (inclusive) may also be used as immediates for the second
8318 operand of VORR/VBIC instructions.
8320 The INVERSE argument causes the bitwise inverse of the given operand to be
8321 recognized instead (used for recognizing legal immediates for the VAND/VORN
8322 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
8323 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
8324 output, rather than the real insns vbic/vorr).
8326 INVERSE makes no difference to the recognition of float vectors.
8328 The return value is the variant of immediate as shown in the above table, or
8329 -1 if the given value doesn't match any of the listed patterns.
8332 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
8333 rtx
*modconst
, int *elementwidth
)
8335 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
8337 for (i = 0; i < idx; i += (STRIDE)) \
8342 immtype = (CLASS); \
8343 elsize = (ELSIZE); \
8347 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
8348 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8349 unsigned char bytes
[16];
8350 int immtype
= -1, matches
;
8351 unsigned int invmask
= inverse
? 0xff : 0;
8353 /* Vectors of float constants. */
8354 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8356 rtx el0
= CONST_VECTOR_ELT (op
, 0);
8359 if (!vfp3_const_double_rtx (el0
))
8362 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
8364 for (i
= 1; i
< n_elts
; i
++)
8366 rtx elt
= CONST_VECTOR_ELT (op
, i
);
8369 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
8371 if (!REAL_VALUES_EQUAL (r0
, re
))
8376 *modconst
= CONST_VECTOR_ELT (op
, 0);
8384 /* Splat vector constant out into a byte vector. */
8385 for (i
= 0; i
< n_elts
; i
++)
8387 rtx el
= CONST_VECTOR_ELT (op
, i
);
8388 unsigned HOST_WIDE_INT elpart
;
8389 unsigned int part
, parts
;
8391 if (GET_CODE (el
) == CONST_INT
)
8393 elpart
= INTVAL (el
);
8396 else if (GET_CODE (el
) == CONST_DOUBLE
)
8398 elpart
= CONST_DOUBLE_LOW (el
);
8404 for (part
= 0; part
< parts
; part
++)
8407 for (byte
= 0; byte
< innersize
; byte
++)
8409 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8410 elpart
>>= BITS_PER_UNIT
;
8412 if (GET_CODE (el
) == CONST_DOUBLE
)
8413 elpart
= CONST_DOUBLE_HIGH (el
);
8418 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8422 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8423 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8425 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8426 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8428 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8429 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8431 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8432 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
8434 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
8436 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
8438 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8439 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8441 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8442 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8444 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8445 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8447 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8448 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
8450 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
8452 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
8454 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8455 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8457 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8458 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8460 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8461 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8463 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8464 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8466 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
8468 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8469 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
8477 *elementwidth
= elsize
;
8481 unsigned HOST_WIDE_INT imm
= 0;
8483 /* Un-invert bytes of recognized vector, if necessary. */
8485 for (i
= 0; i
< idx
; i
++)
8486 bytes
[i
] ^= invmask
;
8490 /* FIXME: Broken on 32-bit H_W_I hosts. */
8491 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8493 for (i
= 0; i
< 8; i
++)
8494 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8495 << (i
* BITS_PER_UNIT
);
8497 *modconst
= GEN_INT (imm
);
8501 unsigned HOST_WIDE_INT imm
= 0;
8503 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8504 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8506 *modconst
= GEN_INT (imm
);
8514 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8515 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8516 float elements), and a modified constant (whatever should be output for a
8517 VMOV) in *MODCONST. */
8520 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
8521 rtx
*modconst
, int *elementwidth
)
8525 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
8531 *modconst
= tmpconst
;
8534 *elementwidth
= tmpwidth
;
8539 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8540 the immediate is valid, write a constant suitable for using as an operand
8541 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8542 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8545 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
8546 rtx
*modconst
, int *elementwidth
)
8550 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
8552 if (retval
< 0 || retval
> 5)
8556 *modconst
= tmpconst
;
8559 *elementwidth
= tmpwidth
;
8564 /* Return a string suitable for output of Neon immediate logic operation
8568 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
8569 int inverse
, int quad
)
8571 int width
, is_valid
;
8572 static char templ
[40];
8574 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
8576 gcc_assert (is_valid
!= 0);
8579 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
8581 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
8586 /* Output a sequence of pairwise operations to implement a reduction.
8587 NOTE: We do "too much work" here, because pairwise operations work on two
8588 registers-worth of operands in one go. Unfortunately we can't exploit those
8589 extra calculations to do the full operation in fewer steps, I don't think.
8590 Although all vector elements of the result but the first are ignored, we
8591 actually calculate the same result in each of the elements. An alternative
8592 such as initially loading a vector with zero to use as each of the second
8593 operands would use up an additional register and take an extra instruction,
8594 for no particular gain. */
8597 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
8598 rtx (*reduc
) (rtx
, rtx
, rtx
))
8600 enum machine_mode inner
= GET_MODE_INNER (mode
);
8601 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
8604 for (i
= parts
/ 2; i
>= 1; i
/= 2)
8606 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
8607 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
8612 /* If VALS is a vector constant that can be loaded into a register
8613 using VDUP, generate instructions to do so and return an RTX to
8614 assign to the register. Otherwise return NULL_RTX. */
8617 neon_vdup_constant (rtx vals
)
8619 enum machine_mode mode
= GET_MODE (vals
);
8620 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8621 int n_elts
= GET_MODE_NUNITS (mode
);
8622 bool all_same
= true;
8626 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
8629 for (i
= 0; i
< n_elts
; ++i
)
8631 x
= XVECEXP (vals
, 0, i
);
8632 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8637 /* The elements are not all the same. We could handle repeating
8638 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8639 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8643 /* We can load this constant by using VDUP and a constant in a
8644 single ARM register. This will be cheaper than a vector
8647 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8648 return gen_rtx_VEC_DUPLICATE (mode
, x
);
8651 /* Generate code to load VALS, which is a PARALLEL containing only
8652 constants (for vec_init) or CONST_VECTOR, efficiently into a
8653 register. Returns an RTX to copy into the register, or NULL_RTX
8654 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8657 neon_make_constant (rtx vals
)
8659 enum machine_mode mode
= GET_MODE (vals
);
8661 rtx const_vec
= NULL_RTX
;
8662 int n_elts
= GET_MODE_NUNITS (mode
);
8666 if (GET_CODE (vals
) == CONST_VECTOR
)
8668 else if (GET_CODE (vals
) == PARALLEL
)
8670 /* A CONST_VECTOR must contain only CONST_INTs and
8671 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8672 Only store valid constants in a CONST_VECTOR. */
8673 for (i
= 0; i
< n_elts
; ++i
)
8675 rtx x
= XVECEXP (vals
, 0, i
);
8676 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
8679 if (n_const
== n_elts
)
8680 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8685 if (const_vec
!= NULL
8686 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
8687 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8689 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
8690 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8691 pipeline cycle; creating the constant takes one or two ARM
8694 else if (const_vec
!= NULL_RTX
)
8695 /* Load from constant pool. On Cortex-A8 this takes two cycles
8696 (for either double or quad vectors). We can not take advantage
8697 of single-cycle VLD1 because we need a PC-relative addressing
8701 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8702 We can not construct an initializer. */
8706 /* Initialize vector TARGET to VALS. */
8709 neon_expand_vector_init (rtx target
, rtx vals
)
8711 enum machine_mode mode
= GET_MODE (target
);
8712 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8713 int n_elts
= GET_MODE_NUNITS (mode
);
8714 int n_var
= 0, one_var
= -1;
8715 bool all_same
= true;
8719 for (i
= 0; i
< n_elts
; ++i
)
8721 x
= XVECEXP (vals
, 0, i
);
8722 if (!CONSTANT_P (x
))
8723 ++n_var
, one_var
= i
;
8725 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8731 rtx constant
= neon_make_constant (vals
);
8732 if (constant
!= NULL_RTX
)
8734 emit_move_insn (target
, constant
);
8739 /* Splat a single non-constant element if we can. */
8740 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
8742 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8743 emit_insn (gen_rtx_SET (VOIDmode
, target
,
8744 gen_rtx_VEC_DUPLICATE (mode
, x
)));
8748 /* One field is non-constant. Load constant then overwrite varying
8749 field. This is more efficient than using the stack. */
8752 rtx copy
= copy_rtx (vals
);
8753 rtx index
= GEN_INT (one_var
);
8755 /* Load constant part of vector, substitute neighboring value for
8757 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
8758 neon_expand_vector_init (target
, copy
);
8760 /* Insert variable. */
8761 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8765 emit_insn (gen_neon_vset_lanev8qi (target
, x
, target
, index
));
8768 emit_insn (gen_neon_vset_lanev16qi (target
, x
, target
, index
));
8771 emit_insn (gen_neon_vset_lanev4hi (target
, x
, target
, index
));
8774 emit_insn (gen_neon_vset_lanev8hi (target
, x
, target
, index
));
8777 emit_insn (gen_neon_vset_lanev2si (target
, x
, target
, index
));
8780 emit_insn (gen_neon_vset_lanev4si (target
, x
, target
, index
));
8783 emit_insn (gen_neon_vset_lanev2sf (target
, x
, target
, index
));
8786 emit_insn (gen_neon_vset_lanev4sf (target
, x
, target
, index
));
8789 emit_insn (gen_neon_vset_lanev2di (target
, x
, target
, index
));
8797 /* Construct the vector in memory one field at a time
8798 and load the whole vector. */
8799 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), 0);
8800 for (i
= 0; i
< n_elts
; i
++)
8801 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8802 i
* GET_MODE_SIZE (inner_mode
)),
8803 XVECEXP (vals
, 0, i
));
8804 emit_move_insn (target
, mem
);
8807 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8808 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8809 reported source locations are bogus. */
8812 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
8817 gcc_assert (GET_CODE (operand
) == CONST_INT
);
8819 lane
= INTVAL (operand
);
8821 if (lane
< low
|| lane
>= high
)
8825 /* Bounds-check lanes. */
8828 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8830 bounds_check (operand
, low
, high
, "lane out of range");
8833 /* Bounds-check constants. */
8836 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8838 bounds_check (operand
, low
, high
, "constant out of range");
8842 neon_element_bits (enum machine_mode mode
)
8845 return GET_MODE_BITSIZE (mode
);
8847 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
8851 /* Predicates for `match_operand' and `match_operator'. */
8853 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8855 cirrus_memory_offset (rtx op
)
8857 /* Reject eliminable registers. */
8858 if (! (reload_in_progress
|| reload_completed
)
8859 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8860 || reg_mentioned_p (arg_pointer_rtx
, op
)
8861 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8862 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8863 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8864 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8867 if (GET_CODE (op
) == MEM
)
8873 /* Match: (mem (reg)). */
8874 if (GET_CODE (ind
) == REG
)
8880 if (GET_CODE (ind
) == PLUS
8881 && GET_CODE (XEXP (ind
, 0)) == REG
8882 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8883 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
)
8890 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8891 WB is true if full writeback address modes are allowed and is false
8892 if limited writeback address modes (POST_INC and PRE_DEC) are
8896 arm_coproc_mem_operand (rtx op
, bool wb
)
8900 /* Reject eliminable registers. */
8901 if (! (reload_in_progress
|| reload_completed
)
8902 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8903 || reg_mentioned_p (arg_pointer_rtx
, op
)
8904 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8905 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8906 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8907 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8910 /* Constants are converted into offsets from labels. */
8911 if (GET_CODE (op
) != MEM
)
8916 if (reload_completed
8917 && (GET_CODE (ind
) == LABEL_REF
8918 || (GET_CODE (ind
) == CONST
8919 && GET_CODE (XEXP (ind
, 0)) == PLUS
8920 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8921 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8924 /* Match: (mem (reg)). */
8925 if (GET_CODE (ind
) == REG
)
8926 return arm_address_register_rtx_p (ind
, 0);
8928 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8929 acceptable in any case (subject to verification by
8930 arm_address_register_rtx_p). We need WB to be true to accept
8931 PRE_INC and POST_DEC. */
8932 if (GET_CODE (ind
) == POST_INC
8933 || GET_CODE (ind
) == PRE_DEC
8935 && (GET_CODE (ind
) == PRE_INC
8936 || GET_CODE (ind
) == POST_DEC
)))
8937 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
8940 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
8941 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
8942 && GET_CODE (XEXP (ind
, 1)) == PLUS
8943 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
8944 ind
= XEXP (ind
, 1);
8949 if (GET_CODE (ind
) == PLUS
8950 && GET_CODE (XEXP (ind
, 0)) == REG
8951 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8952 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
8953 && INTVAL (XEXP (ind
, 1)) > -1024
8954 && INTVAL (XEXP (ind
, 1)) < 1024
8955 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
8961 /* Return TRUE if OP is a memory operand which we can load or store a vector
8962 to/from. TYPE is one of the following values:
8963 0 - Vector load/stor (vldr)
8964 1 - Core registers (ldm)
8965 2 - Element/structure loads (vld1)
8968 neon_vector_mem_operand (rtx op
, int type
)
8972 /* Reject eliminable registers. */
8973 if (! (reload_in_progress
|| reload_completed
)
8974 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8975 || reg_mentioned_p (arg_pointer_rtx
, op
)
8976 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8977 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8978 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8979 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8982 /* Constants are converted into offsets from labels. */
8983 if (GET_CODE (op
) != MEM
)
8988 if (reload_completed
8989 && (GET_CODE (ind
) == LABEL_REF
8990 || (GET_CODE (ind
) == CONST
8991 && GET_CODE (XEXP (ind
, 0)) == PLUS
8992 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8993 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8996 /* Match: (mem (reg)). */
8997 if (GET_CODE (ind
) == REG
)
8998 return arm_address_register_rtx_p (ind
, 0);
9000 /* Allow post-increment with Neon registers. */
9001 if ((type
!= 1 && GET_CODE (ind
) == POST_INC
)
9002 || (type
== 0 && GET_CODE (ind
) == PRE_DEC
))
9003 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
9005 /* FIXME: vld1 allows register post-modify. */
9011 && GET_CODE (ind
) == PLUS
9012 && GET_CODE (XEXP (ind
, 0)) == REG
9013 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
9014 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
9015 && INTVAL (XEXP (ind
, 1)) > -1024
9016 && INTVAL (XEXP (ind
, 1)) < 1016
9017 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
9023 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
9026 neon_struct_mem_operand (rtx op
)
9030 /* Reject eliminable registers. */
9031 if (! (reload_in_progress
|| reload_completed
)
9032 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
9033 || reg_mentioned_p (arg_pointer_rtx
, op
)
9034 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
9035 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
9036 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
9037 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
9040 /* Constants are converted into offsets from labels. */
9041 if (GET_CODE (op
) != MEM
)
9046 if (reload_completed
9047 && (GET_CODE (ind
) == LABEL_REF
9048 || (GET_CODE (ind
) == CONST
9049 && GET_CODE (XEXP (ind
, 0)) == PLUS
9050 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
9051 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
9054 /* Match: (mem (reg)). */
9055 if (GET_CODE (ind
) == REG
)
9056 return arm_address_register_rtx_p (ind
, 0);
9061 /* Return true if X is a register that will be eliminated later on. */
9063 arm_eliminable_register (rtx x
)
9065 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
9066 || REGNO (x
) == ARG_POINTER_REGNUM
9067 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
9068 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
9071 /* Return GENERAL_REGS if a scratch register required to reload x to/from
9072 coprocessor registers. Otherwise return NO_REGS. */
9075 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
9079 if (!TARGET_NEON_FP16
)
9080 return GENERAL_REGS
;
9081 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2))
9083 return GENERAL_REGS
;
9087 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
9088 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
9089 && neon_vector_mem_operand (x
, 0))
9092 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
9095 return GENERAL_REGS
;
9098 /* Values which must be returned in the most-significant end of the return
9102 arm_return_in_msb (const_tree valtype
)
9104 return (TARGET_AAPCS_BASED
9106 && (AGGREGATE_TYPE_P (valtype
)
9107 || TREE_CODE (valtype
) == COMPLEX_TYPE
));
9110 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
9111 Use by the Cirrus Maverick code which has to workaround
9112 a hardware bug triggered by such instructions. */
9114 arm_memory_load_p (rtx insn
)
9116 rtx body
, lhs
, rhs
;;
9118 if (insn
== NULL_RTX
|| GET_CODE (insn
) != INSN
)
9121 body
= PATTERN (insn
);
9123 if (GET_CODE (body
) != SET
)
9126 lhs
= XEXP (body
, 0);
9127 rhs
= XEXP (body
, 1);
9129 lhs
= REG_OR_SUBREG_RTX (lhs
);
9131 /* If the destination is not a general purpose
9132 register we do not have to worry. */
9133 if (GET_CODE (lhs
) != REG
9134 || REGNO_REG_CLASS (REGNO (lhs
)) != GENERAL_REGS
)
9137 /* As well as loads from memory we also have to react
9138 to loads of invalid constants which will be turned
9139 into loads from the minipool. */
9140 return (GET_CODE (rhs
) == MEM
9141 || GET_CODE (rhs
) == SYMBOL_REF
9142 || note_invalid_constants (insn
, -1, false));
9145 /* Return TRUE if INSN is a Cirrus instruction. */
9147 arm_cirrus_insn_p (rtx insn
)
9149 enum attr_cirrus attr
;
9151 /* get_attr cannot accept USE or CLOBBER. */
9153 || GET_CODE (insn
) != INSN
9154 || GET_CODE (PATTERN (insn
)) == USE
9155 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
9158 attr
= get_attr_cirrus (insn
);
9160 return attr
!= CIRRUS_NOT
;
9163 /* Cirrus reorg for invalid instruction combinations. */
9165 cirrus_reorg (rtx first
)
9167 enum attr_cirrus attr
;
9168 rtx body
= PATTERN (first
);
9172 /* Any branch must be followed by 2 non Cirrus instructions. */
9173 if (GET_CODE (first
) == JUMP_INSN
&& GET_CODE (body
) != RETURN
)
9176 t
= next_nonnote_insn (first
);
9178 if (arm_cirrus_insn_p (t
))
9181 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
9185 emit_insn_after (gen_nop (), first
);
9190 /* (float (blah)) is in parallel with a clobber. */
9191 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
9192 body
= XVECEXP (body
, 0, 0);
9194 if (GET_CODE (body
) == SET
)
9196 rtx lhs
= XEXP (body
, 0), rhs
= XEXP (body
, 1);
9198 /* cfldrd, cfldr64, cfstrd, cfstr64 must
9199 be followed by a non Cirrus insn. */
9200 if (get_attr_cirrus (first
) == CIRRUS_DOUBLE
)
9202 if (arm_cirrus_insn_p (next_nonnote_insn (first
)))
9203 emit_insn_after (gen_nop (), first
);
9207 else if (arm_memory_load_p (first
))
9209 unsigned int arm_regno
;
9211 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
9212 ldr/cfmv64hr combination where the Rd field is the same
9213 in both instructions must be split with a non Cirrus
9220 /* Get Arm register number for ldr insn. */
9221 if (GET_CODE (lhs
) == REG
)
9222 arm_regno
= REGNO (lhs
);
9225 gcc_assert (GET_CODE (rhs
) == REG
);
9226 arm_regno
= REGNO (rhs
);
9230 first
= next_nonnote_insn (first
);
9232 if (! arm_cirrus_insn_p (first
))
9235 body
= PATTERN (first
);
9237 /* (float (blah)) is in parallel with a clobber. */
9238 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0))
9239 body
= XVECEXP (body
, 0, 0);
9241 if (GET_CODE (body
) == FLOAT
)
9242 body
= XEXP (body
, 0);
9244 if (get_attr_cirrus (first
) == CIRRUS_MOVE
9245 && GET_CODE (XEXP (body
, 1)) == REG
9246 && arm_regno
== REGNO (XEXP (body
, 1)))
9247 emit_insn_after (gen_nop (), first
);
9253 /* get_attr cannot accept USE or CLOBBER. */
9255 || GET_CODE (first
) != INSN
9256 || GET_CODE (PATTERN (first
)) == USE
9257 || GET_CODE (PATTERN (first
)) == CLOBBER
)
9260 attr
= get_attr_cirrus (first
);
9262 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
9263 must be followed by a non-coprocessor instruction. */
9264 if (attr
== CIRRUS_COMPARE
)
9268 t
= next_nonnote_insn (first
);
9270 if (arm_cirrus_insn_p (t
))
9273 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
9277 emit_insn_after (gen_nop (), first
);
9283 /* Return TRUE if X references a SYMBOL_REF. */
9285 symbol_mentioned_p (rtx x
)
9290 if (GET_CODE (x
) == SYMBOL_REF
)
9293 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
9294 are constant offsets, not symbols. */
9295 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9298 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9300 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9306 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9307 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
9310 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
9317 /* Return TRUE if X references a LABEL_REF. */
9319 label_mentioned_p (rtx x
)
9324 if (GET_CODE (x
) == LABEL_REF
)
9327 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
9328 instruction, but they are constant offsets, not symbols. */
9329 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
9332 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
9333 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
9339 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
9340 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
9343 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
9351 tls_mentioned_p (rtx x
)
9353 switch (GET_CODE (x
))
9356 return tls_mentioned_p (XEXP (x
, 0));
9359 if (XINT (x
, 1) == UNSPEC_TLS
)
9367 /* Must not copy any rtx that uses a pc-relative address. */
9370 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
9372 if (GET_CODE (*x
) == UNSPEC
9373 && XINT (*x
, 1) == UNSPEC_PIC_BASE
)
9379 arm_cannot_copy_insn_p (rtx insn
)
9381 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
9387 enum rtx_code code
= GET_CODE (x
);
9404 /* Return 1 if memory locations are adjacent. */
9406 adjacent_mem_locations (rtx a
, rtx b
)
9408 /* We don't guarantee to preserve the order of these memory refs. */
9409 if (volatile_refs_p (a
) || volatile_refs_p (b
))
9412 if ((GET_CODE (XEXP (a
, 0)) == REG
9413 || (GET_CODE (XEXP (a
, 0)) == PLUS
9414 && GET_CODE (XEXP (XEXP (a
, 0), 1)) == CONST_INT
))
9415 && (GET_CODE (XEXP (b
, 0)) == REG
9416 || (GET_CODE (XEXP (b
, 0)) == PLUS
9417 && GET_CODE (XEXP (XEXP (b
, 0), 1)) == CONST_INT
)))
9419 HOST_WIDE_INT val0
= 0, val1
= 0;
9423 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
9425 reg0
= XEXP (XEXP (a
, 0), 0);
9426 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
9431 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
9433 reg1
= XEXP (XEXP (b
, 0), 0);
9434 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
9439 /* Don't accept any offset that will require multiple
9440 instructions to handle, since this would cause the
9441 arith_adjacentmem pattern to output an overlong sequence. */
9442 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
9445 /* Don't allow an eliminable register: register elimination can make
9446 the offset too large. */
9447 if (arm_eliminable_register (reg0
))
9450 val_diff
= val1
- val0
;
9454 /* If the target has load delay slots, then there's no benefit
9455 to using an ldm instruction unless the offset is zero and
9456 we are optimizing for size. */
9457 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
9458 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
9459 && (val_diff
== 4 || val_diff
== -4));
9462 return ((REGNO (reg0
) == REGNO (reg1
))
9463 && (val_diff
== 4 || val_diff
== -4));
9469 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9470 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9471 instruction. ADD_OFFSET is nonzero if the base address register needs
9472 to be modified with an add instruction before we can use it. */
9475 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED
,
9476 int nops
, HOST_WIDE_INT add_offset
)
9478 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9479 if the offset isn't small enough. The reason 2 ldrs are faster
9480 is because these ARMs are able to do more than one cache access
9481 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9482 whilst the ARM8 has a double bandwidth cache. This means that
9483 these cores can do both an instruction fetch and a data fetch in
9484 a single cycle, so the trick of calculating the address into a
9485 scratch register (one of the result regs) and then doing a load
9486 multiple actually becomes slower (and no smaller in code size).
9487 That is the transformation
9489 ldr rd1, [rbase + offset]
9490 ldr rd2, [rbase + offset + 4]
9494 add rd1, rbase, offset
9495 ldmia rd1, {rd1, rd2}
9497 produces worse code -- '3 cycles + any stalls on rd2' instead of
9498 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9499 access per cycle, the first sequence could never complete in less
9500 than 6 cycles, whereas the ldm sequence would only take 5 and
9501 would make better use of sequential accesses if not hitting the
9504 We cheat here and test 'arm_ld_sched' which we currently know to
9505 only be true for the ARM8, ARM9 and StrongARM. If this ever
9506 changes, then the test below needs to be reworked. */
9507 if (nops
== 2 && arm_ld_sched
&& add_offset
!= 0)
9510 /* XScale has load-store double instructions, but they have stricter
9511 alignment requirements than load-store multiple, so we cannot
9514 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9515 the pipeline until completion.
9523 An ldr instruction takes 1-3 cycles, but does not block the
9532 Best case ldr will always win. However, the more ldr instructions
9533 we issue, the less likely we are to be able to schedule them well.
9534 Using ldr instructions also increases code size.
9536 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9537 for counts of 3 or 4 regs. */
9538 if (nops
<= 2 && arm_tune_xscale
&& !optimize_size
)
9543 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9544 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9545 an array ORDER which describes the sequence to use when accessing the
9546 offsets that produces an ascending order. In this sequence, each
9547 offset must be larger by exactly 4 than the previous one. ORDER[0]
9548 must have been filled in with the lowest offset by the caller.
9549 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9550 we use to verify that ORDER produces an ascending order of registers.
9551 Return true if it was possible to construct such an order, false if
9555 compute_offset_order (int nops
, HOST_WIDE_INT
*unsorted_offsets
, int *order
,
9559 for (i
= 1; i
< nops
; i
++)
9563 order
[i
] = order
[i
- 1];
9564 for (j
= 0; j
< nops
; j
++)
9565 if (unsorted_offsets
[j
] == unsorted_offsets
[order
[i
- 1]] + 4)
9567 /* We must find exactly one offset that is higher than the
9568 previous one by 4. */
9569 if (order
[i
] != order
[i
- 1])
9573 if (order
[i
] == order
[i
- 1])
9575 /* The register numbers must be ascending. */
9576 if (unsorted_regs
!= NULL
9577 && unsorted_regs
[order
[i
]] <= unsorted_regs
[order
[i
- 1]])
9583 /* Used to determine in a peephole whether a sequence of load
9584 instructions can be changed into a load-multiple instruction.
9585 NOPS is the number of separate load instructions we are examining. The
9586 first NOPS entries in OPERANDS are the destination registers, the
9587 next NOPS entries are memory operands. If this function is
9588 successful, *BASE is set to the common base register of the memory
9589 accesses; *LOAD_OFFSET is set to the first memory location's offset
9590 from that base register.
9591 REGS is an array filled in with the destination register numbers.
9592 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9593 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9594 the sequence of registers in REGS matches the loads from ascending memory
9595 locations, and the function verifies that the register numbers are
9596 themselves ascending. If CHECK_REGS is false, the register numbers
9597 are stored in the order they are found in the operands. */
9599 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *saved_order
,
9600 int *base
, HOST_WIDE_INT
*load_offset
, bool check_regs
)
9602 int unsorted_regs
[MAX_LDM_STM_OPS
];
9603 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
9604 int order
[MAX_LDM_STM_OPS
];
9605 rtx base_reg_rtx
= NULL
;
9609 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9610 easily extended if required. */
9611 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
9613 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
9615 /* Loop over the operands and check that the memory references are
9616 suitable (i.e. immediate offsets from the same base register). At
9617 the same time, extract the target register, and the memory
9619 for (i
= 0; i
< nops
; i
++)
9624 /* Convert a subreg of a mem into the mem itself. */
9625 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9626 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9628 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9630 /* Don't reorder volatile memory references; it doesn't seem worth
9631 looking for the case where the order is ok anyway. */
9632 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9635 offset
= const0_rtx
;
9637 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9638 || (GET_CODE (reg
) == SUBREG
9639 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9640 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9641 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9643 || (GET_CODE (reg
) == SUBREG
9644 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9645 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9650 base_reg
= REGNO (reg
);
9652 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
9655 else if (base_reg
!= (int) REGNO (reg
))
9656 /* Not addressed from the same base register. */
9659 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
9660 ? REGNO (operands
[i
])
9661 : REGNO (SUBREG_REG (operands
[i
])));
9663 /* If it isn't an integer register, or if it overwrites the
9664 base register but isn't the last insn in the list, then
9665 we can't do this. */
9666 if (unsorted_regs
[i
] < 0
9667 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
9668 || unsorted_regs
[i
] > 14
9669 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
9672 unsorted_offsets
[i
] = INTVAL (offset
);
9673 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
9677 /* Not a suitable memory address. */
9681 /* All the useful information has now been extracted from the
9682 operands into unsorted_regs and unsorted_offsets; additionally,
9683 order[0] has been set to the lowest offset in the list. Sort
9684 the offsets into order, verifying that they are adjacent, and
9685 check that the register numbers are ascending. */
9686 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
9687 check_regs
? unsorted_regs
: NULL
))
9691 memcpy (saved_order
, order
, sizeof order
);
9697 for (i
= 0; i
< nops
; i
++)
9698 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
9700 *load_offset
= unsorted_offsets
[order
[0]];
9704 && !peep2_reg_dead_p (nops
, base_reg_rtx
))
9707 if (unsorted_offsets
[order
[0]] == 0)
9708 ldm_case
= 1; /* ldmia */
9709 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
9710 ldm_case
= 2; /* ldmib */
9711 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
9712 ldm_case
= 3; /* ldmda */
9713 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
9714 ldm_case
= 4; /* ldmdb */
9715 else if (const_ok_for_arm (unsorted_offsets
[order
[0]])
9716 || const_ok_for_arm (-unsorted_offsets
[order
[0]]))
9721 if (!multiple_operation_profitable_p (false, nops
,
9723 ? unsorted_offsets
[order
[0]] : 0))
9729 /* Used to determine in a peephole whether a sequence of store instructions can
9730 be changed into a store-multiple instruction.
9731 NOPS is the number of separate store instructions we are examining.
9732 NOPS_TOTAL is the total number of instructions recognized by the peephole
9734 The first NOPS entries in OPERANDS are the source registers, the next
9735 NOPS entries are memory operands. If this function is successful, *BASE is
9736 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9737 to the first memory location's offset from that base register. REGS is an
9738 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9739 likewise filled with the corresponding rtx's.
9740 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9741 numbers to to an ascending order of stores.
9742 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9743 from ascending memory locations, and the function verifies that the register
9744 numbers are themselves ascending. If CHECK_REGS is false, the register
9745 numbers are stored in the order they are found in the operands. */
9747 store_multiple_sequence (rtx
*operands
, int nops
, int nops_total
,
9748 int *regs
, rtx
*reg_rtxs
, int *saved_order
, int *base
,
9749 HOST_WIDE_INT
*load_offset
, bool check_regs
)
9751 int unsorted_regs
[MAX_LDM_STM_OPS
];
9752 rtx unsorted_reg_rtxs
[MAX_LDM_STM_OPS
];
9753 HOST_WIDE_INT unsorted_offsets
[MAX_LDM_STM_OPS
];
9754 int order
[MAX_LDM_STM_OPS
];
9756 rtx base_reg_rtx
= NULL
;
9759 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9760 easily extended if required. */
9761 gcc_assert (nops
>= 2 && nops
<= MAX_LDM_STM_OPS
);
9763 memset (order
, 0, MAX_LDM_STM_OPS
* sizeof (int));
9765 /* Loop over the operands and check that the memory references are
9766 suitable (i.e. immediate offsets from the same base register). At
9767 the same time, extract the target register, and the memory
9769 for (i
= 0; i
< nops
; i
++)
9774 /* Convert a subreg of a mem into the mem itself. */
9775 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9776 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9778 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9780 /* Don't reorder volatile memory references; it doesn't seem worth
9781 looking for the case where the order is ok anyway. */
9782 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9785 offset
= const0_rtx
;
9787 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9788 || (GET_CODE (reg
) == SUBREG
9789 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9790 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9791 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9793 || (GET_CODE (reg
) == SUBREG
9794 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9795 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9798 unsorted_reg_rtxs
[i
] = (GET_CODE (operands
[i
]) == REG
9799 ? operands
[i
] : SUBREG_REG (operands
[i
]));
9800 unsorted_regs
[i
] = REGNO (unsorted_reg_rtxs
[i
]);
9804 base_reg
= REGNO (reg
);
9806 if (TARGET_THUMB1
&& base_reg
> LAST_LO_REGNUM
)
9809 else if (base_reg
!= (int) REGNO (reg
))
9810 /* Not addressed from the same base register. */
9813 /* If it isn't an integer register, then we can't do this. */
9814 if (unsorted_regs
[i
] < 0
9815 || (TARGET_THUMB1
&& unsorted_regs
[i
] > LAST_LO_REGNUM
)
9816 || (TARGET_THUMB2
&& unsorted_regs
[i
] == base_reg
)
9817 || (TARGET_THUMB2
&& unsorted_regs
[i
] == SP_REGNUM
)
9818 || unsorted_regs
[i
] > 14)
9821 unsorted_offsets
[i
] = INTVAL (offset
);
9822 if (i
== 0 || unsorted_offsets
[i
] < unsorted_offsets
[order
[0]])
9826 /* Not a suitable memory address. */
9830 /* All the useful information has now been extracted from the
9831 operands into unsorted_regs and unsorted_offsets; additionally,
9832 order[0] has been set to the lowest offset in the list. Sort
9833 the offsets into order, verifying that they are adjacent, and
9834 check that the register numbers are ascending. */
9835 if (!compute_offset_order (nops
, unsorted_offsets
, order
,
9836 check_regs
? unsorted_regs
: NULL
))
9840 memcpy (saved_order
, order
, sizeof order
);
9846 for (i
= 0; i
< nops
; i
++)
9848 regs
[i
] = unsorted_regs
[check_regs
? order
[i
] : i
];
9850 reg_rtxs
[i
] = unsorted_reg_rtxs
[check_regs
? order
[i
] : i
];
9853 *load_offset
= unsorted_offsets
[order
[0]];
9857 && !peep2_reg_dead_p (nops_total
, base_reg_rtx
))
9860 if (unsorted_offsets
[order
[0]] == 0)
9861 stm_case
= 1; /* stmia */
9862 else if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
9863 stm_case
= 2; /* stmib */
9864 else if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
9865 stm_case
= 3; /* stmda */
9866 else if (TARGET_32BIT
&& unsorted_offsets
[order
[nops
- 1]] == -4)
9867 stm_case
= 4; /* stmdb */
9871 if (!multiple_operation_profitable_p (false, nops
, 0))
9877 /* Routines for use in generating RTL. */
9879 /* Generate a load-multiple instruction. COUNT is the number of loads in
9880 the instruction; REGS and MEMS are arrays containing the operands.
9881 BASEREG is the base register to be used in addressing the memory operands.
9882 WBACK_OFFSET is nonzero if the instruction should update the base
9886 arm_gen_load_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
9887 HOST_WIDE_INT wback_offset
)
9892 if (!multiple_operation_profitable_p (false, count
, 0))
9898 for (i
= 0; i
< count
; i
++)
9899 emit_move_insn (gen_rtx_REG (SImode
, regs
[i
]), mems
[i
]);
9901 if (wback_offset
!= 0)
9902 emit_move_insn (basereg
, plus_constant (basereg
, wback_offset
));
9910 result
= gen_rtx_PARALLEL (VOIDmode
,
9911 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
9912 if (wback_offset
!= 0)
9914 XVECEXP (result
, 0, 0)
9915 = gen_rtx_SET (VOIDmode
, basereg
,
9916 plus_constant (basereg
, wback_offset
));
9921 for (j
= 0; i
< count
; i
++, j
++)
9922 XVECEXP (result
, 0, i
)
9923 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, regs
[j
]), mems
[j
]);
9928 /* Generate a store-multiple instruction. COUNT is the number of stores in
9929 the instruction; REGS and MEMS are arrays containing the operands.
9930 BASEREG is the base register to be used in addressing the memory operands.
9931 WBACK_OFFSET is nonzero if the instruction should update the base
9935 arm_gen_store_multiple_1 (int count
, int *regs
, rtx
*mems
, rtx basereg
,
9936 HOST_WIDE_INT wback_offset
)
9941 if (GET_CODE (basereg
) == PLUS
)
9942 basereg
= XEXP (basereg
, 0);
9944 if (!multiple_operation_profitable_p (false, count
, 0))
9950 for (i
= 0; i
< count
; i
++)
9951 emit_move_insn (mems
[i
], gen_rtx_REG (SImode
, regs
[i
]));
9953 if (wback_offset
!= 0)
9954 emit_move_insn (basereg
, plus_constant (basereg
, wback_offset
));
9962 result
= gen_rtx_PARALLEL (VOIDmode
,
9963 rtvec_alloc (count
+ (wback_offset
!= 0 ? 1 : 0)));
9964 if (wback_offset
!= 0)
9966 XVECEXP (result
, 0, 0)
9967 = gen_rtx_SET (VOIDmode
, basereg
,
9968 plus_constant (basereg
, wback_offset
));
9973 for (j
= 0; i
< count
; i
++, j
++)
9974 XVECEXP (result
, 0, i
)
9975 = gen_rtx_SET (VOIDmode
, mems
[j
], gen_rtx_REG (SImode
, regs
[j
]));
9980 /* Generate either a load-multiple or a store-multiple instruction. This
9981 function can be used in situations where we can start with a single MEM
9982 rtx and adjust its address upwards.
9983 COUNT is the number of operations in the instruction, not counting a
9984 possible update of the base register. REGS is an array containing the
9986 BASEREG is the base register to be used in addressing the memory operands,
9987 which are constructed from BASEMEM.
9988 WRITE_BACK specifies whether the generated instruction should include an
9989 update of the base register.
9990 OFFSETP is used to pass an offset to and from this function; this offset
9991 is not used when constructing the address (instead BASEMEM should have an
9992 appropriate offset in its address), it is used only for setting
9993 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
9996 arm_gen_multiple_op (bool is_load
, int *regs
, int count
, rtx basereg
,
9997 bool write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
9999 rtx mems
[MAX_LDM_STM_OPS
];
10000 HOST_WIDE_INT offset
= *offsetp
;
10003 gcc_assert (count
<= MAX_LDM_STM_OPS
);
10005 if (GET_CODE (basereg
) == PLUS
)
10006 basereg
= XEXP (basereg
, 0);
10008 for (i
= 0; i
< count
; i
++)
10010 rtx addr
= plus_constant (basereg
, i
* 4);
10011 mems
[i
] = adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
10019 return arm_gen_load_multiple_1 (count
, regs
, mems
, basereg
,
10020 write_back
? 4 * count
: 0);
10022 return arm_gen_store_multiple_1 (count
, regs
, mems
, basereg
,
10023 write_back
? 4 * count
: 0);
10027 arm_gen_load_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10028 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10030 return arm_gen_multiple_op (TRUE
, regs
, count
, basereg
, write_back
, basemem
,
10035 arm_gen_store_multiple (int *regs
, int count
, rtx basereg
, int write_back
,
10036 rtx basemem
, HOST_WIDE_INT
*offsetp
)
10038 return arm_gen_multiple_op (FALSE
, regs
, count
, basereg
, write_back
, basemem
,
10042 /* Called from a peephole2 expander to turn a sequence of loads into an
10043 LDM instruction. OPERANDS are the operands found by the peephole matcher;
10044 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
10045 is true if we can reorder the registers because they are used commutatively
10047 Returns true iff we could generate a new instruction. */
10050 gen_ldm_seq (rtx
*operands
, int nops
, bool sort_regs
)
10052 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10053 rtx mems
[MAX_LDM_STM_OPS
];
10054 int i
, j
, base_reg
;
10056 HOST_WIDE_INT offset
;
10057 int write_back
= FALSE
;
10061 ldm_case
= load_multiple_sequence (operands
, nops
, regs
, mem_order
,
10062 &base_reg
, &offset
, !sort_regs
);
10068 for (i
= 0; i
< nops
- 1; i
++)
10069 for (j
= i
+ 1; j
< nops
; j
++)
10070 if (regs
[i
] > regs
[j
])
10076 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10080 gcc_assert (peep2_reg_dead_p (nops
, base_reg_rtx
));
10081 gcc_assert (ldm_case
== 1 || ldm_case
== 5);
10087 rtx newbase
= TARGET_THUMB1
? base_reg_rtx
: gen_rtx_REG (SImode
, regs
[0]);
10088 emit_insn (gen_addsi3 (newbase
, base_reg_rtx
, GEN_INT (offset
)));
10090 if (!TARGET_THUMB1
)
10092 base_reg
= regs
[0];
10093 base_reg_rtx
= newbase
;
10097 for (i
= 0; i
< nops
; i
++)
10099 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10100 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10103 emit_insn (arm_gen_load_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10104 write_back
? offset
+ i
* 4 : 0));
10108 /* Called from a peephole2 expander to turn a sequence of stores into an
10109 STM instruction. OPERANDS are the operands found by the peephole matcher;
10110 NOPS indicates how many separate stores we are trying to combine.
10111 Returns true iff we could generate a new instruction. */
10114 gen_stm_seq (rtx
*operands
, int nops
)
10117 int regs
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10118 rtx mems
[MAX_LDM_STM_OPS
];
10121 HOST_WIDE_INT offset
;
10122 int write_back
= FALSE
;
10125 bool base_reg_dies
;
10127 stm_case
= store_multiple_sequence (operands
, nops
, nops
, regs
, NULL
,
10128 mem_order
, &base_reg
, &offset
, true);
10133 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10135 base_reg_dies
= peep2_reg_dead_p (nops
, base_reg_rtx
);
10138 gcc_assert (base_reg_dies
);
10144 gcc_assert (base_reg_dies
);
10145 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10149 addr
= plus_constant (base_reg_rtx
, offset
);
10151 for (i
= 0; i
< nops
; i
++)
10153 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10154 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10157 emit_insn (arm_gen_store_multiple_1 (nops
, regs
, mems
, base_reg_rtx
,
10158 write_back
? offset
+ i
* 4 : 0));
10162 /* Called from a peephole2 expander to turn a sequence of stores that are
10163 preceded by constant loads into an STM instruction. OPERANDS are the
10164 operands found by the peephole matcher; NOPS indicates how many
10165 separate stores we are trying to combine; there are 2 * NOPS
10166 instructions in the peephole.
10167 Returns true iff we could generate a new instruction. */
10170 gen_const_stm_seq (rtx
*operands
, int nops
)
10172 int regs
[MAX_LDM_STM_OPS
], sorted_regs
[MAX_LDM_STM_OPS
];
10173 int reg_order
[MAX_LDM_STM_OPS
], mem_order
[MAX_LDM_STM_OPS
];
10174 rtx reg_rtxs
[MAX_LDM_STM_OPS
], orig_reg_rtxs
[MAX_LDM_STM_OPS
];
10175 rtx mems
[MAX_LDM_STM_OPS
];
10178 HOST_WIDE_INT offset
;
10179 int write_back
= FALSE
;
10182 bool base_reg_dies
;
10184 HARD_REG_SET allocated
;
10186 stm_case
= store_multiple_sequence (operands
, nops
, 2 * nops
, regs
, reg_rtxs
,
10187 mem_order
, &base_reg
, &offset
, false);
10192 memcpy (orig_reg_rtxs
, reg_rtxs
, sizeof orig_reg_rtxs
);
10194 /* If the same register is used more than once, try to find a free
10196 CLEAR_HARD_REG_SET (allocated
);
10197 for (i
= 0; i
< nops
; i
++)
10199 for (j
= i
+ 1; j
< nops
; j
++)
10200 if (regs
[i
] == regs
[j
])
10202 rtx t
= peep2_find_free_register (0, nops
* 2,
10203 TARGET_THUMB1
? "l" : "r",
10204 SImode
, &allocated
);
10208 regs
[i
] = REGNO (t
);
10212 /* Compute an ordering that maps the register numbers to an ascending
10215 for (i
= 0; i
< nops
; i
++)
10216 if (regs
[i
] < regs
[reg_order
[0]])
10219 for (i
= 1; i
< nops
; i
++)
10221 int this_order
= reg_order
[i
- 1];
10222 for (j
= 0; j
< nops
; j
++)
10223 if (regs
[j
] > regs
[reg_order
[i
- 1]]
10224 && (this_order
== reg_order
[i
- 1]
10225 || regs
[j
] < regs
[this_order
]))
10227 reg_order
[i
] = this_order
;
10230 /* Ensure that registers that must be live after the instruction end
10231 up with the correct value. */
10232 for (i
= 0; i
< nops
; i
++)
10234 int this_order
= reg_order
[i
];
10235 if ((this_order
!= mem_order
[i
]
10236 || orig_reg_rtxs
[this_order
] != reg_rtxs
[this_order
])
10237 && !peep2_reg_dead_p (nops
* 2, orig_reg_rtxs
[this_order
]))
10241 /* Load the constants. */
10242 for (i
= 0; i
< nops
; i
++)
10244 rtx op
= operands
[2 * nops
+ mem_order
[i
]];
10245 sorted_regs
[i
] = regs
[reg_order
[i
]];
10246 emit_move_insn (reg_rtxs
[reg_order
[i
]], op
);
10249 base_reg_rtx
= gen_rtx_REG (Pmode
, base_reg
);
10251 base_reg_dies
= peep2_reg_dead_p (nops
* 2, base_reg_rtx
);
10254 gcc_assert (base_reg_dies
);
10260 gcc_assert (base_reg_dies
);
10261 emit_insn (gen_addsi3 (base_reg_rtx
, base_reg_rtx
, GEN_INT (offset
)));
10265 addr
= plus_constant (base_reg_rtx
, offset
);
10267 for (i
= 0; i
< nops
; i
++)
10269 addr
= plus_constant (base_reg_rtx
, offset
+ i
* 4);
10270 mems
[i
] = adjust_automodify_address_nv (operands
[nops
+ mem_order
[i
]],
10273 emit_insn (arm_gen_store_multiple_1 (nops
, sorted_regs
, mems
, base_reg_rtx
,
10274 write_back
? offset
+ i
* 4 : 0));
10279 arm_gen_movmemqi (rtx
*operands
)
10281 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
10282 HOST_WIDE_INT srcoffset
, dstoffset
;
10284 rtx src
, dst
, srcbase
, dstbase
;
10285 rtx part_bytes_reg
= NULL
;
10288 if (GET_CODE (operands
[2]) != CONST_INT
10289 || GET_CODE (operands
[3]) != CONST_INT
10290 || INTVAL (operands
[2]) > 64
10291 || INTVAL (operands
[3]) & 3)
10294 dstbase
= operands
[0];
10295 srcbase
= operands
[1];
10297 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
10298 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
10300 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
10301 out_words_to_go
= INTVAL (operands
[2]) / 4;
10302 last_bytes
= INTVAL (operands
[2]) & 3;
10303 dstoffset
= srcoffset
= 0;
10305 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
10306 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
10308 for (i
= 0; in_words_to_go
>= 2; i
+=4)
10310 if (in_words_to_go
> 4)
10311 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, 4, src
,
10312 TRUE
, srcbase
, &srcoffset
));
10314 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence
, in_words_to_go
,
10315 src
, FALSE
, srcbase
,
10318 if (out_words_to_go
)
10320 if (out_words_to_go
> 4)
10321 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
, 4, dst
,
10322 TRUE
, dstbase
, &dstoffset
));
10323 else if (out_words_to_go
!= 1)
10324 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence
,
10325 out_words_to_go
, dst
,
10328 dstbase
, &dstoffset
));
10331 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
10332 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
10333 if (last_bytes
!= 0)
10335 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
10341 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
10342 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
10345 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
10346 if (out_words_to_go
)
10350 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
10351 sreg
= copy_to_reg (mem
);
10353 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
10354 emit_move_insn (mem
, sreg
);
10357 gcc_assert (!in_words_to_go
); /* Sanity check */
10360 if (in_words_to_go
)
10362 gcc_assert (in_words_to_go
> 0);
10364 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
10365 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
10368 gcc_assert (!last_bytes
|| part_bytes_reg
);
10370 if (BYTES_BIG_ENDIAN
&& last_bytes
)
10372 rtx tmp
= gen_reg_rtx (SImode
);
10374 /* The bytes we want are in the top end of the word. */
10375 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
10376 GEN_INT (8 * (4 - last_bytes
))));
10377 part_bytes_reg
= tmp
;
10381 mem
= adjust_automodify_address (dstbase
, QImode
,
10382 plus_constant (dst
, last_bytes
- 1),
10383 dstoffset
+ last_bytes
- 1);
10384 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
10388 tmp
= gen_reg_rtx (SImode
);
10389 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
10390 part_bytes_reg
= tmp
;
10397 if (last_bytes
> 1)
10399 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
10400 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
10404 rtx tmp
= gen_reg_rtx (SImode
);
10405 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
10406 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
10407 part_bytes_reg
= tmp
;
10414 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
10415 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
10422 /* Select a dominance comparison mode if possible for a test of the general
10423 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
10424 COND_OR == DOM_CC_X_AND_Y => (X && Y)
10425 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
10426 COND_OR == DOM_CC_X_OR_Y => (X || Y)
10427 In all cases OP will be either EQ or NE, but we don't need to know which
10428 here. If we are unable to support a dominance comparison we return
10429 CC mode. This will then fail to match for the RTL expressions that
10430 generate this call. */
10432 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
10434 enum rtx_code cond1
, cond2
;
10437 /* Currently we will probably get the wrong result if the individual
10438 comparisons are not simple. This also ensures that it is safe to
10439 reverse a comparison if necessary. */
10440 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
10442 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
10446 /* The if_then_else variant of this tests the second condition if the
10447 first passes, but is true if the first fails. Reverse the first
10448 condition to get a true "inclusive-or" expression. */
10449 if (cond_or
== DOM_CC_NX_OR_Y
)
10450 cond1
= reverse_condition (cond1
);
10452 /* If the comparisons are not equal, and one doesn't dominate the other,
10453 then we can't do this. */
10455 && !comparison_dominates_p (cond1
, cond2
)
10456 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
10461 enum rtx_code temp
= cond1
;
10469 if (cond_or
== DOM_CC_X_AND_Y
)
10474 case EQ
: return CC_DEQmode
;
10475 case LE
: return CC_DLEmode
;
10476 case LEU
: return CC_DLEUmode
;
10477 case GE
: return CC_DGEmode
;
10478 case GEU
: return CC_DGEUmode
;
10479 default: gcc_unreachable ();
10483 if (cond_or
== DOM_CC_X_AND_Y
)
10495 gcc_unreachable ();
10499 if (cond_or
== DOM_CC_X_AND_Y
)
10511 gcc_unreachable ();
10515 if (cond_or
== DOM_CC_X_AND_Y
)
10516 return CC_DLTUmode
;
10521 return CC_DLTUmode
;
10523 return CC_DLEUmode
;
10527 gcc_unreachable ();
10531 if (cond_or
== DOM_CC_X_AND_Y
)
10532 return CC_DGTUmode
;
10537 return CC_DGTUmode
;
10539 return CC_DGEUmode
;
10543 gcc_unreachable ();
10546 /* The remaining cases only occur when both comparisons are the
10549 gcc_assert (cond1
== cond2
);
10553 gcc_assert (cond1
== cond2
);
10557 gcc_assert (cond1
== cond2
);
10561 gcc_assert (cond1
== cond2
);
10562 return CC_DLEUmode
;
10565 gcc_assert (cond1
== cond2
);
10566 return CC_DGEUmode
;
10569 gcc_unreachable ();
10574 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
10576 /* All floating point compares return CCFP if it is an equality
10577 comparison, and CCFPE otherwise. */
10578 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
10598 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10603 gcc_unreachable ();
10607 /* A compare with a shifted operand. Because of canonicalization, the
10608 comparison will have to be swapped when we emit the assembler. */
10609 if (GET_MODE (y
) == SImode
10610 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
10611 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
10612 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
10613 || GET_CODE (x
) == ROTATERT
))
10616 /* This operation is performed swapped, but since we only rely on the Z
10617 flag we don't need an additional mode. */
10618 if (GET_MODE (y
) == SImode
10619 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
10620 && GET_CODE (x
) == NEG
10621 && (op
== EQ
|| op
== NE
))
10624 /* This is a special case that is used by combine to allow a
10625 comparison of a shifted byte load to be split into a zero-extend
10626 followed by a comparison of the shifted integer (only valid for
10627 equalities and unsigned inequalities). */
10628 if (GET_MODE (x
) == SImode
10629 && GET_CODE (x
) == ASHIFT
10630 && GET_CODE (XEXP (x
, 1)) == CONST_INT
&& INTVAL (XEXP (x
, 1)) == 24
10631 && GET_CODE (XEXP (x
, 0)) == SUBREG
10632 && GET_CODE (SUBREG_REG (XEXP (x
, 0))) == MEM
10633 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
10634 && (op
== EQ
|| op
== NE
10635 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
10636 && GET_CODE (y
) == CONST_INT
)
10639 /* A construct for a conditional compare, if the false arm contains
10640 0, then both conditions must be true, otherwise either condition
10641 must be true. Not all conditions are possible, so CCmode is
10642 returned if it can't be done. */
10643 if (GET_CODE (x
) == IF_THEN_ELSE
10644 && (XEXP (x
, 2) == const0_rtx
10645 || XEXP (x
, 2) == const1_rtx
)
10646 && COMPARISON_P (XEXP (x
, 0))
10647 && COMPARISON_P (XEXP (x
, 1)))
10648 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10649 INTVAL (XEXP (x
, 2)));
10651 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10652 if (GET_CODE (x
) == AND
10653 && (op
== EQ
|| op
== NE
)
10654 && COMPARISON_P (XEXP (x
, 0))
10655 && COMPARISON_P (XEXP (x
, 1)))
10656 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10659 if (GET_CODE (x
) == IOR
10660 && (op
== EQ
|| op
== NE
)
10661 && COMPARISON_P (XEXP (x
, 0))
10662 && COMPARISON_P (XEXP (x
, 1)))
10663 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10666 /* An operation (on Thumb) where we want to test for a single bit.
10667 This is done by shifting that bit up into the top bit of a
10668 scratch register; we can then branch on the sign bit. */
10670 && GET_MODE (x
) == SImode
10671 && (op
== EQ
|| op
== NE
)
10672 && GET_CODE (x
) == ZERO_EXTRACT
10673 && XEXP (x
, 1) == const1_rtx
)
10676 /* An operation that sets the condition codes as a side-effect, the
10677 V flag is not set correctly, so we can only use comparisons where
10678 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10680 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10681 if (GET_MODE (x
) == SImode
10683 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
10684 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
10685 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
10686 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
10687 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
10688 || GET_CODE (x
) == LSHIFTRT
10689 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
10690 || GET_CODE (x
) == ROTATERT
10691 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
10692 return CC_NOOVmode
;
10694 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
10697 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
10698 && GET_CODE (x
) == PLUS
10699 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
10702 if (GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
)
10704 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10706 if (TARGET_ARM
&& TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10713 /* A DImode comparison against zero can be implemented by
10714 or'ing the two halves together. */
10715 if (y
== const0_rtx
)
10718 /* We can do an equality test in three Thumb instructions. */
10728 /* DImode unsigned comparisons can be implemented by cmp +
10729 cmpeq without a scratch register. Not worth doing in
10740 /* DImode signed and unsigned comparisons can be implemented
10741 by cmp + sbcs with a scratch register, but that does not
10742 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10743 gcc_assert (op
!= EQ
&& op
!= NE
);
10747 gcc_unreachable ();
10754 /* X and Y are two things to compare using CODE. Emit the compare insn and
10755 return the rtx for register 0 in the proper mode. FP means this is a
10756 floating point compare: I don't think that it is needed on the arm. */
10758 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
)
10760 enum machine_mode mode
;
10762 int dimode_comparison
= GET_MODE (x
) == DImode
|| GET_MODE (y
) == DImode
;
10764 /* We might have X as a constant, Y as a register because of the predicates
10765 used for cmpdi. If so, force X to a register here. */
10766 if (dimode_comparison
&& !REG_P (x
))
10767 x
= force_reg (DImode
, x
);
10769 mode
= SELECT_CC_MODE (code
, x
, y
);
10770 cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
10772 if (dimode_comparison
10773 && !(TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
10774 && mode
!= CC_CZmode
)
10778 /* To compare two non-zero values for equality, XOR them and
10779 then compare against zero. Not used for ARM mode; there
10780 CC_CZmode is cheaper. */
10781 if (mode
== CC_Zmode
&& y
!= const0_rtx
)
10783 x
= expand_binop (DImode
, xor_optab
, x
, y
, NULL_RTX
, 0, OPTAB_WIDEN
);
10786 /* A scratch register is required. */
10787 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_SCRATCH (SImode
));
10788 set
= gen_rtx_SET (VOIDmode
, cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
10789 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, set
, clobber
)));
10792 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
10797 /* Generate a sequence of insns that will generate the correct return
10798 address mask depending on the physical architecture that the program
10801 arm_gen_return_addr_mask (void)
10803 rtx reg
= gen_reg_rtx (Pmode
);
10805 emit_insn (gen_return_addr_mask (reg
));
10810 arm_reload_in_hi (rtx
*operands
)
10812 rtx ref
= operands
[1];
10814 HOST_WIDE_INT offset
= 0;
10816 if (GET_CODE (ref
) == SUBREG
)
10818 offset
= SUBREG_BYTE (ref
);
10819 ref
= SUBREG_REG (ref
);
10822 if (GET_CODE (ref
) == REG
)
10824 /* We have a pseudo which has been spilt onto the stack; there
10825 are two cases here: the first where there is a simple
10826 stack-slot replacement and a second where the stack-slot is
10827 out of range, or is used as a subreg. */
10828 if (reg_equiv_mem
[REGNO (ref
)])
10830 ref
= reg_equiv_mem
[REGNO (ref
)];
10831 base
= find_replacement (&XEXP (ref
, 0));
10834 /* The slot is out of range, or was dressed up in a SUBREG. */
10835 base
= reg_equiv_address
[REGNO (ref
)];
10838 base
= find_replacement (&XEXP (ref
, 0));
10840 /* Handle the case where the address is too complex to be offset by 1. */
10841 if (GET_CODE (base
) == MINUS
10842 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10844 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10846 emit_set_insn (base_plus
, base
);
10849 else if (GET_CODE (base
) == PLUS
)
10851 /* The addend must be CONST_INT, or we would have dealt with it above. */
10852 HOST_WIDE_INT hi
, lo
;
10854 offset
+= INTVAL (XEXP (base
, 1));
10855 base
= XEXP (base
, 0);
10857 /* Rework the address into a legal sequence of insns. */
10858 /* Valid range for lo is -4095 -> 4095 */
10861 : -((-offset
) & 0xfff));
10863 /* Corner case, if lo is the max offset then we would be out of range
10864 once we have added the additional 1 below, so bump the msb into the
10865 pre-loading insn(s). */
10869 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
10870 ^ (HOST_WIDE_INT
) 0x80000000)
10871 - (HOST_WIDE_INT
) 0x80000000);
10873 gcc_assert (hi
+ lo
== offset
);
10877 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10879 /* Get the base address; addsi3 knows how to handle constants
10880 that require more than one insn. */
10881 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
10887 /* Operands[2] may overlap operands[0] (though it won't overlap
10888 operands[1]), that's why we asked for a DImode reg -- so we can
10889 use the bit that does not overlap. */
10890 if (REGNO (operands
[2]) == REGNO (operands
[0]))
10891 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10893 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10895 emit_insn (gen_zero_extendqisi2 (scratch
,
10896 gen_rtx_MEM (QImode
,
10897 plus_constant (base
,
10899 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10900 gen_rtx_MEM (QImode
,
10901 plus_constant (base
,
10903 if (!BYTES_BIG_ENDIAN
)
10904 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10905 gen_rtx_IOR (SImode
,
10908 gen_rtx_SUBREG (SImode
, operands
[0], 0),
10912 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10913 gen_rtx_IOR (SImode
,
10914 gen_rtx_ASHIFT (SImode
, scratch
,
10916 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
10919 /* Handle storing a half-word to memory during reload by synthesizing as two
10920 byte stores. Take care not to clobber the input values until after we
10921 have moved them somewhere safe. This code assumes that if the DImode
10922 scratch in operands[2] overlaps either the input value or output address
10923 in some way, then that value must die in this insn (we absolutely need
10924 two scratch registers for some corner cases). */
10926 arm_reload_out_hi (rtx
*operands
)
10928 rtx ref
= operands
[0];
10929 rtx outval
= operands
[1];
10931 HOST_WIDE_INT offset
= 0;
10933 if (GET_CODE (ref
) == SUBREG
)
10935 offset
= SUBREG_BYTE (ref
);
10936 ref
= SUBREG_REG (ref
);
10939 if (GET_CODE (ref
) == REG
)
10941 /* We have a pseudo which has been spilt onto the stack; there
10942 are two cases here: the first where there is a simple
10943 stack-slot replacement and a second where the stack-slot is
10944 out of range, or is used as a subreg. */
10945 if (reg_equiv_mem
[REGNO (ref
)])
10947 ref
= reg_equiv_mem
[REGNO (ref
)];
10948 base
= find_replacement (&XEXP (ref
, 0));
10951 /* The slot is out of range, or was dressed up in a SUBREG. */
10952 base
= reg_equiv_address
[REGNO (ref
)];
10955 base
= find_replacement (&XEXP (ref
, 0));
10957 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10959 /* Handle the case where the address is too complex to be offset by 1. */
10960 if (GET_CODE (base
) == MINUS
10961 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10963 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10965 /* Be careful not to destroy OUTVAL. */
10966 if (reg_overlap_mentioned_p (base_plus
, outval
))
10968 /* Updating base_plus might destroy outval, see if we can
10969 swap the scratch and base_plus. */
10970 if (!reg_overlap_mentioned_p (scratch
, outval
))
10973 scratch
= base_plus
;
10978 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
10980 /* Be conservative and copy OUTVAL into the scratch now,
10981 this should only be necessary if outval is a subreg
10982 of something larger than a word. */
10983 /* XXX Might this clobber base? I can't see how it can,
10984 since scratch is known to overlap with OUTVAL, and
10985 must be wider than a word. */
10986 emit_insn (gen_movhi (scratch_hi
, outval
));
10987 outval
= scratch_hi
;
10991 emit_set_insn (base_plus
, base
);
10994 else if (GET_CODE (base
) == PLUS
)
10996 /* The addend must be CONST_INT, or we would have dealt with it above. */
10997 HOST_WIDE_INT hi
, lo
;
10999 offset
+= INTVAL (XEXP (base
, 1));
11000 base
= XEXP (base
, 0);
11002 /* Rework the address into a legal sequence of insns. */
11003 /* Valid range for lo is -4095 -> 4095 */
11006 : -((-offset
) & 0xfff));
11008 /* Corner case, if lo is the max offset then we would be out of range
11009 once we have added the additional 1 below, so bump the msb into the
11010 pre-loading insn(s). */
11014 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
11015 ^ (HOST_WIDE_INT
) 0x80000000)
11016 - (HOST_WIDE_INT
) 0x80000000);
11018 gcc_assert (hi
+ lo
== offset
);
11022 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
11024 /* Be careful not to destroy OUTVAL. */
11025 if (reg_overlap_mentioned_p (base_plus
, outval
))
11027 /* Updating base_plus might destroy outval, see if we
11028 can swap the scratch and base_plus. */
11029 if (!reg_overlap_mentioned_p (scratch
, outval
))
11032 scratch
= base_plus
;
11037 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
11039 /* Be conservative and copy outval into scratch now,
11040 this should only be necessary if outval is a
11041 subreg of something larger than a word. */
11042 /* XXX Might this clobber base? I can't see how it
11043 can, since scratch is known to overlap with
11045 emit_insn (gen_movhi (scratch_hi
, outval
));
11046 outval
= scratch_hi
;
11050 /* Get the base address; addsi3 knows how to handle constants
11051 that require more than one insn. */
11052 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
11058 if (BYTES_BIG_ENDIAN
)
11060 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
11061 plus_constant (base
, offset
+ 1)),
11062 gen_lowpart (QImode
, outval
)));
11063 emit_insn (gen_lshrsi3 (scratch
,
11064 gen_rtx_SUBREG (SImode
, outval
, 0),
11066 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
11067 gen_lowpart (QImode
, scratch
)));
11071 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
11072 gen_lowpart (QImode
, outval
)));
11073 emit_insn (gen_lshrsi3 (scratch
,
11074 gen_rtx_SUBREG (SImode
, outval
, 0),
11076 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
11077 plus_constant (base
, offset
+ 1)),
11078 gen_lowpart (QImode
, scratch
)));
11082 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
11083 (padded to the size of a word) should be passed in a register. */
11086 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
11088 if (TARGET_AAPCS_BASED
)
11089 return must_pass_in_stack_var_size (mode
, type
);
11091 return must_pass_in_stack_var_size_or_pad (mode
, type
);
11095 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
11096 Return true if an argument passed on the stack should be padded upwards,
11097 i.e. if the least-significant byte has useful data.
11098 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
11099 aggregate types are placed in the lowest memory address. */
11102 arm_pad_arg_upward (enum machine_mode mode
, const_tree type
)
11104 if (!TARGET_AAPCS_BASED
)
11105 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
11107 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
11114 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
11115 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
11116 byte of the register has useful data, and return the opposite if the
11117 most significant byte does.
11118 For AAPCS, small aggregates and small complex types are always padded
11122 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
,
11123 tree type
, int first ATTRIBUTE_UNUSED
)
11125 if (TARGET_AAPCS_BASED
11126 && BYTES_BIG_ENDIAN
11127 && (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
)
11128 && int_size_in_bytes (type
) <= 4)
11131 /* Otherwise, use default padding. */
11132 return !BYTES_BIG_ENDIAN
;
11136 /* Print a symbolic form of X to the debug file, F. */
11138 arm_print_value (FILE *f
, rtx x
)
11140 switch (GET_CODE (x
))
11143 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
11147 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
11155 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
11157 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
11158 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
11166 fprintf (f
, "\"%s\"", XSTR (x
, 0));
11170 fprintf (f
, "`%s'", XSTR (x
, 0));
11174 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
11178 arm_print_value (f
, XEXP (x
, 0));
11182 arm_print_value (f
, XEXP (x
, 0));
11184 arm_print_value (f
, XEXP (x
, 1));
11192 fprintf (f
, "????");
11197 /* Routines for manipulation of the constant pool. */
11199 /* Arm instructions cannot load a large constant directly into a
11200 register; they have to come from a pc relative load. The constant
11201 must therefore be placed in the addressable range of the pc
11202 relative load. Depending on the precise pc relative load
11203 instruction the range is somewhere between 256 bytes and 4k. This
11204 means that we often have to dump a constant inside a function, and
11205 generate code to branch around it.
11207 It is important to minimize this, since the branches will slow
11208 things down and make the code larger.
11210 Normally we can hide the table after an existing unconditional
11211 branch so that there is no interruption of the flow, but in the
11212 worst case the code looks like this:
11230 We fix this by performing a scan after scheduling, which notices
11231 which instructions need to have their operands fetched from the
11232 constant table and builds the table.
11234 The algorithm starts by building a table of all the constants that
11235 need fixing up and all the natural barriers in the function (places
11236 where a constant table can be dropped without breaking the flow).
11237 For each fixup we note how far the pc-relative replacement will be
11238 able to reach and the offset of the instruction into the function.
11240 Having built the table we then group the fixes together to form
11241 tables that are as large as possible (subject to addressing
11242 constraints) and emit each table of constants after the last
11243 barrier that is within range of all the instructions in the group.
11244 If a group does not contain a barrier, then we forcibly create one
11245 by inserting a jump instruction into the flow. Once the table has
11246 been inserted, the insns are then modified to reference the
11247 relevant entry in the pool.
11249 Possible enhancements to the algorithm (not implemented) are:
11251 1) For some processors and object formats, there may be benefit in
11252 aligning the pools to the start of cache lines; this alignment
11253 would need to be taken into account when calculating addressability
11256 /* These typedefs are located at the start of this file, so that
11257 they can be used in the prototypes there. This comment is to
11258 remind readers of that fact so that the following structures
11259 can be understood more easily.
11261 typedef struct minipool_node Mnode;
11262 typedef struct minipool_fixup Mfix; */
11264 struct minipool_node
11266 /* Doubly linked chain of entries. */
11269 /* The maximum offset into the code that this entry can be placed. While
11270 pushing fixes for forward references, all entries are sorted in order
11271 of increasing max_address. */
11272 HOST_WIDE_INT max_address
;
11273 /* Similarly for an entry inserted for a backwards ref. */
11274 HOST_WIDE_INT min_address
;
11275 /* The number of fixes referencing this entry. This can become zero
11276 if we "unpush" an entry. In this case we ignore the entry when we
11277 come to emit the code. */
11279 /* The offset from the start of the minipool. */
11280 HOST_WIDE_INT offset
;
11281 /* The value in table. */
11283 /* The mode of value. */
11284 enum machine_mode mode
;
11285 /* The size of the value. With iWMMXt enabled
11286 sizes > 4 also imply an alignment of 8-bytes. */
11290 struct minipool_fixup
11294 HOST_WIDE_INT address
;
11296 enum machine_mode mode
;
11300 HOST_WIDE_INT forwards
;
11301 HOST_WIDE_INT backwards
;
11304 /* Fixes less than a word need padding out to a word boundary. */
11305 #define MINIPOOL_FIX_SIZE(mode) \
11306 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
11308 static Mnode
* minipool_vector_head
;
11309 static Mnode
* minipool_vector_tail
;
11310 static rtx minipool_vector_label
;
11311 static int minipool_pad
;
11313 /* The linked list of all minipool fixes required for this function. */
11314 Mfix
* minipool_fix_head
;
11315 Mfix
* minipool_fix_tail
;
11316 /* The fix entry for the current minipool, once it has been placed. */
11317 Mfix
* minipool_barrier
;
11319 /* Determines if INSN is the start of a jump table. Returns the end
11320 of the TABLE or NULL_RTX. */
11322 is_jump_table (rtx insn
)
11326 if (GET_CODE (insn
) == JUMP_INSN
11327 && JUMP_LABEL (insn
) != NULL
11328 && ((table
= next_real_insn (JUMP_LABEL (insn
)))
11329 == next_real_insn (insn
))
11331 && GET_CODE (table
) == JUMP_INSN
11332 && (GET_CODE (PATTERN (table
)) == ADDR_VEC
11333 || GET_CODE (PATTERN (table
)) == ADDR_DIFF_VEC
))
11339 #ifndef JUMP_TABLES_IN_TEXT_SECTION
11340 #define JUMP_TABLES_IN_TEXT_SECTION 0
11343 static HOST_WIDE_INT
11344 get_jump_table_size (rtx insn
)
11346 /* ADDR_VECs only take room if read-only data does into the text
11348 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
11350 rtx body
= PATTERN (insn
);
11351 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
11352 HOST_WIDE_INT size
;
11353 HOST_WIDE_INT modesize
;
11355 modesize
= GET_MODE_SIZE (GET_MODE (body
));
11356 size
= modesize
* XVECLEN (body
, elt
);
11360 /* Round up size of TBB table to a halfword boundary. */
11361 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
11364 /* No padding necessary for TBH. */
11367 /* Add two bytes for alignment on Thumb. */
11372 gcc_unreachable ();
11380 /* Move a minipool fix MP from its current location to before MAX_MP.
11381 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
11382 constraints may need updating. */
11384 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
11385 HOST_WIDE_INT max_address
)
11387 /* The code below assumes these are different. */
11388 gcc_assert (mp
!= max_mp
);
11390 if (max_mp
== NULL
)
11392 if (max_address
< mp
->max_address
)
11393 mp
->max_address
= max_address
;
11397 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
11398 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
11400 mp
->max_address
= max_address
;
11402 /* Unlink MP from its current position. Since max_mp is non-null,
11403 mp->prev must be non-null. */
11404 mp
->prev
->next
= mp
->next
;
11405 if (mp
->next
!= NULL
)
11406 mp
->next
->prev
= mp
->prev
;
11408 minipool_vector_tail
= mp
->prev
;
11410 /* Re-insert it before MAX_MP. */
11412 mp
->prev
= max_mp
->prev
;
11415 if (mp
->prev
!= NULL
)
11416 mp
->prev
->next
= mp
;
11418 minipool_vector_head
= mp
;
11421 /* Save the new entry. */
11424 /* Scan over the preceding entries and adjust their addresses as
11426 while (mp
->prev
!= NULL
11427 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
11429 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
11436 /* Add a constant to the minipool for a forward reference. Returns the
11437 node added or NULL if the constant will not fit in this pool. */
11439 add_minipool_forward_ref (Mfix
*fix
)
11441 /* If set, max_mp is the first pool_entry that has a lower
11442 constraint than the one we are trying to add. */
11443 Mnode
* max_mp
= NULL
;
11444 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
11447 /* If the minipool starts before the end of FIX->INSN then this FIX
11448 can not be placed into the current pool. Furthermore, adding the
11449 new constant pool entry may cause the pool to start FIX_SIZE bytes
11451 if (minipool_vector_head
&&
11452 (fix
->address
+ get_attr_length (fix
->insn
)
11453 >= minipool_vector_head
->max_address
- fix
->fix_size
))
11456 /* Scan the pool to see if a constant with the same value has
11457 already been added. While we are doing this, also note the
11458 location where we must insert the constant if it doesn't already
11460 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11462 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
11463 && fix
->mode
== mp
->mode
11464 && (GET_CODE (fix
->value
) != CODE_LABEL
11465 || (CODE_LABEL_NUMBER (fix
->value
)
11466 == CODE_LABEL_NUMBER (mp
->value
)))
11467 && rtx_equal_p (fix
->value
, mp
->value
))
11469 /* More than one fix references this entry. */
11471 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
11474 /* Note the insertion point if necessary. */
11476 && mp
->max_address
> max_address
)
11479 /* If we are inserting an 8-bytes aligned quantity and
11480 we have not already found an insertion point, then
11481 make sure that all such 8-byte aligned quantities are
11482 placed at the start of the pool. */
11483 if (ARM_DOUBLEWORD_ALIGN
11485 && fix
->fix_size
>= 8
11486 && mp
->fix_size
< 8)
11489 max_address
= mp
->max_address
;
11493 /* The value is not currently in the minipool, so we need to create
11494 a new entry for it. If MAX_MP is NULL, the entry will be put on
11495 the end of the list since the placement is less constrained than
11496 any existing entry. Otherwise, we insert the new fix before
11497 MAX_MP and, if necessary, adjust the constraints on the other
11500 mp
->fix_size
= fix
->fix_size
;
11501 mp
->mode
= fix
->mode
;
11502 mp
->value
= fix
->value
;
11504 /* Not yet required for a backwards ref. */
11505 mp
->min_address
= -65536;
11507 if (max_mp
== NULL
)
11509 mp
->max_address
= max_address
;
11511 mp
->prev
= minipool_vector_tail
;
11513 if (mp
->prev
== NULL
)
11515 minipool_vector_head
= mp
;
11516 minipool_vector_label
= gen_label_rtx ();
11519 mp
->prev
->next
= mp
;
11521 minipool_vector_tail
= mp
;
11525 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
11526 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
11528 mp
->max_address
= max_address
;
11531 mp
->prev
= max_mp
->prev
;
11533 if (mp
->prev
!= NULL
)
11534 mp
->prev
->next
= mp
;
11536 minipool_vector_head
= mp
;
11539 /* Save the new entry. */
11542 /* Scan over the preceding entries and adjust their addresses as
11544 while (mp
->prev
!= NULL
11545 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
11547 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
11555 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
11556 HOST_WIDE_INT min_address
)
11558 HOST_WIDE_INT offset
;
11560 /* The code below assumes these are different. */
11561 gcc_assert (mp
!= min_mp
);
11563 if (min_mp
== NULL
)
11565 if (min_address
> mp
->min_address
)
11566 mp
->min_address
= min_address
;
11570 /* We will adjust this below if it is too loose. */
11571 mp
->min_address
= min_address
;
11573 /* Unlink MP from its current position. Since min_mp is non-null,
11574 mp->next must be non-null. */
11575 mp
->next
->prev
= mp
->prev
;
11576 if (mp
->prev
!= NULL
)
11577 mp
->prev
->next
= mp
->next
;
11579 minipool_vector_head
= mp
->next
;
11581 /* Reinsert it after MIN_MP. */
11583 mp
->next
= min_mp
->next
;
11585 if (mp
->next
!= NULL
)
11586 mp
->next
->prev
= mp
;
11588 minipool_vector_tail
= mp
;
11594 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11596 mp
->offset
= offset
;
11597 if (mp
->refcount
> 0)
11598 offset
+= mp
->fix_size
;
11600 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
11601 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
11607 /* Add a constant to the minipool for a backward reference. Returns the
11608 node added or NULL if the constant will not fit in this pool.
11610 Note that the code for insertion for a backwards reference can be
11611 somewhat confusing because the calculated offsets for each fix do
11612 not take into account the size of the pool (which is still under
11615 add_minipool_backward_ref (Mfix
*fix
)
11617 /* If set, min_mp is the last pool_entry that has a lower constraint
11618 than the one we are trying to add. */
11619 Mnode
*min_mp
= NULL
;
11620 /* This can be negative, since it is only a constraint. */
11621 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
11624 /* If we can't reach the current pool from this insn, or if we can't
11625 insert this entry at the end of the pool without pushing other
11626 fixes out of range, then we don't try. This ensures that we
11627 can't fail later on. */
11628 if (min_address
>= minipool_barrier
->address
11629 || (minipool_vector_tail
->min_address
+ fix
->fix_size
11630 >= minipool_barrier
->address
))
11633 /* Scan the pool to see if a constant with the same value has
11634 already been added. While we are doing this, also note the
11635 location where we must insert the constant if it doesn't already
11637 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
11639 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
11640 && fix
->mode
== mp
->mode
11641 && (GET_CODE (fix
->value
) != CODE_LABEL
11642 || (CODE_LABEL_NUMBER (fix
->value
)
11643 == CODE_LABEL_NUMBER (mp
->value
)))
11644 && rtx_equal_p (fix
->value
, mp
->value
)
11645 /* Check that there is enough slack to move this entry to the
11646 end of the table (this is conservative). */
11647 && (mp
->max_address
11648 > (minipool_barrier
->address
11649 + minipool_vector_tail
->offset
11650 + minipool_vector_tail
->fix_size
)))
11653 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
11656 if (min_mp
!= NULL
)
11657 mp
->min_address
+= fix
->fix_size
;
11660 /* Note the insertion point if necessary. */
11661 if (mp
->min_address
< min_address
)
11663 /* For now, we do not allow the insertion of 8-byte alignment
11664 requiring nodes anywhere but at the start of the pool. */
11665 if (ARM_DOUBLEWORD_ALIGN
11666 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
11671 else if (mp
->max_address
11672 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
11674 /* Inserting before this entry would push the fix beyond
11675 its maximum address (which can happen if we have
11676 re-located a forwards fix); force the new fix to come
11678 if (ARM_DOUBLEWORD_ALIGN
11679 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
11684 min_address
= mp
->min_address
+ fix
->fix_size
;
11687 /* Do not insert a non-8-byte aligned quantity before 8-byte
11688 aligned quantities. */
11689 else if (ARM_DOUBLEWORD_ALIGN
11690 && fix
->fix_size
< 8
11691 && mp
->fix_size
>= 8)
11694 min_address
= mp
->min_address
+ fix
->fix_size
;
11699 /* We need to create a new entry. */
11701 mp
->fix_size
= fix
->fix_size
;
11702 mp
->mode
= fix
->mode
;
11703 mp
->value
= fix
->value
;
11705 mp
->max_address
= minipool_barrier
->address
+ 65536;
11707 mp
->min_address
= min_address
;
11709 if (min_mp
== NULL
)
11712 mp
->next
= minipool_vector_head
;
11714 if (mp
->next
== NULL
)
11716 minipool_vector_tail
= mp
;
11717 minipool_vector_label
= gen_label_rtx ();
11720 mp
->next
->prev
= mp
;
11722 minipool_vector_head
= mp
;
11726 mp
->next
= min_mp
->next
;
11730 if (mp
->next
!= NULL
)
11731 mp
->next
->prev
= mp
;
11733 minipool_vector_tail
= mp
;
11736 /* Save the new entry. */
11744 /* Scan over the following entries and adjust their offsets. */
11745 while (mp
->next
!= NULL
)
11747 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
11748 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
11751 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
11753 mp
->next
->offset
= mp
->offset
;
11762 assign_minipool_offsets (Mfix
*barrier
)
11764 HOST_WIDE_INT offset
= 0;
11767 minipool_barrier
= barrier
;
11769 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11771 mp
->offset
= offset
;
11773 if (mp
->refcount
> 0)
11774 offset
+= mp
->fix_size
;
11778 /* Output the literal table */
11780 dump_minipool (rtx scan
)
11786 if (ARM_DOUBLEWORD_ALIGN
)
11787 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11788 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
11795 fprintf (dump_file
,
11796 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11797 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
11799 scan
= emit_label_after (gen_label_rtx (), scan
);
11800 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
11801 scan
= emit_label_after (minipool_vector_label
, scan
);
11803 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
11805 if (mp
->refcount
> 0)
11809 fprintf (dump_file
,
11810 ";; Offset %u, min %ld, max %ld ",
11811 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
11812 (unsigned long) mp
->max_address
);
11813 arm_print_value (dump_file
, mp
->value
);
11814 fputc ('\n', dump_file
);
11817 switch (mp
->fix_size
)
11819 #ifdef HAVE_consttable_1
11821 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
11825 #ifdef HAVE_consttable_2
11827 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
11831 #ifdef HAVE_consttable_4
11833 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
11837 #ifdef HAVE_consttable_8
11839 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
11843 #ifdef HAVE_consttable_16
11845 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
11850 gcc_unreachable ();
11858 minipool_vector_head
= minipool_vector_tail
= NULL
;
11859 scan
= emit_insn_after (gen_consttable_end (), scan
);
11860 scan
= emit_barrier_after (scan
);
11863 /* Return the cost of forcibly inserting a barrier after INSN. */
11865 arm_barrier_cost (rtx insn
)
11867 /* Basing the location of the pool on the loop depth is preferable,
11868 but at the moment, the basic block information seems to be
11869 corrupt by this stage of the compilation. */
11870 int base_cost
= 50;
11871 rtx next
= next_nonnote_insn (insn
);
11873 if (next
!= NULL
&& GET_CODE (next
) == CODE_LABEL
)
11876 switch (GET_CODE (insn
))
11879 /* It will always be better to place the table before the label, rather
11888 return base_cost
- 10;
11891 return base_cost
+ 10;
11895 /* Find the best place in the insn stream in the range
11896 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11897 Create the barrier by inserting a jump and add a new fix entry for
11900 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
11902 HOST_WIDE_INT count
= 0;
11904 rtx from
= fix
->insn
;
11905 /* The instruction after which we will insert the jump. */
11906 rtx selected
= NULL
;
11908 /* The address at which the jump instruction will be placed. */
11909 HOST_WIDE_INT selected_address
;
11911 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
11912 rtx label
= gen_label_rtx ();
11914 selected_cost
= arm_barrier_cost (from
);
11915 selected_address
= fix
->address
;
11917 while (from
&& count
< max_count
)
11922 /* This code shouldn't have been called if there was a natural barrier
11924 gcc_assert (GET_CODE (from
) != BARRIER
);
11926 /* Count the length of this insn. */
11927 count
+= get_attr_length (from
);
11929 /* If there is a jump table, add its length. */
11930 tmp
= is_jump_table (from
);
11933 count
+= get_jump_table_size (tmp
);
11935 /* Jump tables aren't in a basic block, so base the cost on
11936 the dispatch insn. If we select this location, we will
11937 still put the pool after the table. */
11938 new_cost
= arm_barrier_cost (from
);
11940 if (count
< max_count
11941 && (!selected
|| new_cost
<= selected_cost
))
11944 selected_cost
= new_cost
;
11945 selected_address
= fix
->address
+ count
;
11948 /* Continue after the dispatch table. */
11949 from
= NEXT_INSN (tmp
);
11953 new_cost
= arm_barrier_cost (from
);
11955 if (count
< max_count
11956 && (!selected
|| new_cost
<= selected_cost
))
11959 selected_cost
= new_cost
;
11960 selected_address
= fix
->address
+ count
;
11963 from
= NEXT_INSN (from
);
11966 /* Make sure that we found a place to insert the jump. */
11967 gcc_assert (selected
);
11969 /* Create a new JUMP_INSN that branches around a barrier. */
11970 from
= emit_jump_insn_after (gen_jump (label
), selected
);
11971 JUMP_LABEL (from
) = label
;
11972 barrier
= emit_barrier_after (from
);
11973 emit_label_after (label
, barrier
);
11975 /* Create a minipool barrier entry for the new barrier. */
11976 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
11977 new_fix
->insn
= barrier
;
11978 new_fix
->address
= selected_address
;
11979 new_fix
->next
= fix
->next
;
11980 fix
->next
= new_fix
;
11985 /* Record that there is a natural barrier in the insn stream at
11988 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
11990 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
11993 fix
->address
= address
;
11996 if (minipool_fix_head
!= NULL
)
11997 minipool_fix_tail
->next
= fix
;
11999 minipool_fix_head
= fix
;
12001 minipool_fix_tail
= fix
;
12004 /* Record INSN, which will need fixing up to load a value from the
12005 minipool. ADDRESS is the offset of the insn since the start of the
12006 function; LOC is a pointer to the part of the insn which requires
12007 fixing; VALUE is the constant that must be loaded, which is of type
12010 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
12011 enum machine_mode mode
, rtx value
)
12013 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
12016 fix
->address
= address
;
12019 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
12020 fix
->value
= value
;
12021 fix
->forwards
= get_attr_pool_range (insn
);
12022 fix
->backwards
= get_attr_neg_pool_range (insn
);
12023 fix
->minipool
= NULL
;
12025 /* If an insn doesn't have a range defined for it, then it isn't
12026 expecting to be reworked by this code. Better to stop now than
12027 to generate duff assembly code. */
12028 gcc_assert (fix
->forwards
|| fix
->backwards
);
12030 /* If an entry requires 8-byte alignment then assume all constant pools
12031 require 4 bytes of padding. Trying to do this later on a per-pool
12032 basis is awkward because existing pool entries have to be modified. */
12033 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
12038 fprintf (dump_file
,
12039 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
12040 GET_MODE_NAME (mode
),
12041 INSN_UID (insn
), (unsigned long) address
,
12042 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
12043 arm_print_value (dump_file
, fix
->value
);
12044 fprintf (dump_file
, "\n");
12047 /* Add it to the chain of fixes. */
12050 if (minipool_fix_head
!= NULL
)
12051 minipool_fix_tail
->next
= fix
;
12053 minipool_fix_head
= fix
;
12055 minipool_fix_tail
= fix
;
12058 /* Return the cost of synthesizing a 64-bit constant VAL inline.
12059 Returns the number of insns needed, or 99 if we don't know how to
12062 arm_const_double_inline_cost (rtx val
)
12064 rtx lowpart
, highpart
;
12065 enum machine_mode mode
;
12067 mode
= GET_MODE (val
);
12069 if (mode
== VOIDmode
)
12072 gcc_assert (GET_MODE_SIZE (mode
) == 8);
12074 lowpart
= gen_lowpart (SImode
, val
);
12075 highpart
= gen_highpart_mode (SImode
, mode
, val
);
12077 gcc_assert (GET_CODE (lowpart
) == CONST_INT
);
12078 gcc_assert (GET_CODE (highpart
) == CONST_INT
);
12080 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
12081 NULL_RTX
, NULL_RTX
, 0, 0)
12082 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
12083 NULL_RTX
, NULL_RTX
, 0, 0));
12086 /* Return true if it is worthwhile to split a 64-bit constant into two
12087 32-bit operations. This is the case if optimizing for size, or
12088 if we have load delay slots, or if one 32-bit part can be done with
12089 a single data operation. */
12091 arm_const_double_by_parts (rtx val
)
12093 enum machine_mode mode
= GET_MODE (val
);
12096 if (optimize_size
|| arm_ld_sched
)
12099 if (mode
== VOIDmode
)
12102 part
= gen_highpart_mode (SImode
, mode
, val
);
12104 gcc_assert (GET_CODE (part
) == CONST_INT
);
12106 if (const_ok_for_arm (INTVAL (part
))
12107 || const_ok_for_arm (~INTVAL (part
)))
12110 part
= gen_lowpart (SImode
, val
);
12112 gcc_assert (GET_CODE (part
) == CONST_INT
);
12114 if (const_ok_for_arm (INTVAL (part
))
12115 || const_ok_for_arm (~INTVAL (part
)))
12121 /* Return true if it is possible to inline both the high and low parts
12122 of a 64-bit constant into 32-bit data processing instructions. */
12124 arm_const_double_by_immediates (rtx val
)
12126 enum machine_mode mode
= GET_MODE (val
);
12129 if (mode
== VOIDmode
)
12132 part
= gen_highpart_mode (SImode
, mode
, val
);
12134 gcc_assert (GET_CODE (part
) == CONST_INT
);
12136 if (!const_ok_for_arm (INTVAL (part
)))
12139 part
= gen_lowpart (SImode
, val
);
12141 gcc_assert (GET_CODE (part
) == CONST_INT
);
12143 if (!const_ok_for_arm (INTVAL (part
)))
12149 /* Scan INSN and note any of its operands that need fixing.
12150 If DO_PUSHES is false we do not actually push any of the fixups
12151 needed. The function returns TRUE if any fixups were needed/pushed.
12152 This is used by arm_memory_load_p() which needs to know about loads
12153 of constants that will be converted into minipool loads. */
12155 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
12157 bool result
= false;
12160 extract_insn (insn
);
12162 if (!constrain_operands (1))
12163 fatal_insn_not_found (insn
);
12165 if (recog_data
.n_alternatives
== 0)
12168 /* Fill in recog_op_alt with information about the constraints of
12170 preprocess_constraints ();
12172 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
12174 /* Things we need to fix can only occur in inputs. */
12175 if (recog_data
.operand_type
[opno
] != OP_IN
)
12178 /* If this alternative is a memory reference, then any mention
12179 of constants in this alternative is really to fool reload
12180 into allowing us to accept one there. We need to fix them up
12181 now so that we output the right code. */
12182 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
12184 rtx op
= recog_data
.operand
[opno
];
12186 if (CONSTANT_P (op
))
12189 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
12190 recog_data
.operand_mode
[opno
], op
);
12193 else if (GET_CODE (op
) == MEM
12194 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
12195 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
12199 rtx cop
= avoid_constant_pool_reference (op
);
12201 /* Casting the address of something to a mode narrower
12202 than a word can cause avoid_constant_pool_reference()
12203 to return the pool reference itself. That's no good to
12204 us here. Lets just hope that we can use the
12205 constant pool value directly. */
12207 cop
= get_pool_constant (XEXP (op
, 0));
12209 push_minipool_fix (insn
, address
,
12210 recog_data
.operand_loc
[opno
],
12211 recog_data
.operand_mode
[opno
], cop
);
12222 /* Convert instructions to their cc-clobbering variant if possible, since
12223 that allows us to use smaller encodings. */
12226 thumb2_reorg (void)
12231 INIT_REG_SET (&live
);
12233 /* We are freeing block_for_insn in the toplev to keep compatibility
12234 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12235 compute_bb_for_insn ();
12242 COPY_REG_SET (&live
, DF_LR_OUT (bb
));
12243 df_simulate_initialize_backwards (bb
, &live
);
12244 FOR_BB_INSNS_REVERSE (bb
, insn
)
12246 if (NONJUMP_INSN_P (insn
)
12247 && !REGNO_REG_SET_P (&live
, CC_REGNUM
))
12249 rtx pat
= PATTERN (insn
);
12250 if (GET_CODE (pat
) == SET
12251 && low_register_operand (XEXP (pat
, 0), SImode
)
12252 && thumb_16bit_operator (XEXP (pat
, 1), SImode
)
12253 && low_register_operand (XEXP (XEXP (pat
, 1), 0), SImode
)
12254 && low_register_operand (XEXP (XEXP (pat
, 1), 1), SImode
))
12256 rtx dst
= XEXP (pat
, 0);
12257 rtx src
= XEXP (pat
, 1);
12258 rtx op0
= XEXP (src
, 0);
12259 rtx op1
= (GET_RTX_CLASS (GET_CODE (src
)) == RTX_COMM_ARITH
12260 ? XEXP (src
, 1) : NULL
);
12262 if (rtx_equal_p (dst
, op0
)
12263 || GET_CODE (src
) == PLUS
|| GET_CODE (src
) == MINUS
)
12265 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12266 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
12267 rtvec vec
= gen_rtvec (2, pat
, clobber
);
12269 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
12270 INSN_CODE (insn
) = -1;
12272 /* We can also handle a commutative operation where the
12273 second operand matches the destination. */
12274 else if (op1
&& rtx_equal_p (dst
, op1
))
12276 rtx ccreg
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12277 rtx clobber
= gen_rtx_CLOBBER (VOIDmode
, ccreg
);
12280 src
= copy_rtx (src
);
12281 XEXP (src
, 0) = op1
;
12282 XEXP (src
, 1) = op0
;
12283 pat
= gen_rtx_SET (VOIDmode
, dst
, src
);
12284 vec
= gen_rtvec (2, pat
, clobber
);
12285 PATTERN (insn
) = gen_rtx_PARALLEL (VOIDmode
, vec
);
12286 INSN_CODE (insn
) = -1;
12291 if (NONDEBUG_INSN_P (insn
))
12292 df_simulate_one_insn_backwards (bb
, insn
, &live
);
12296 CLEAR_REG_SET (&live
);
12299 /* Gcc puts the pool in the wrong place for ARM, since we can only
12300 load addresses a limited distance around the pc. We do some
12301 special munging to move the constant pool values to the correct
12302 point in the code. */
12307 HOST_WIDE_INT address
= 0;
12313 minipool_fix_head
= minipool_fix_tail
= NULL
;
12315 /* The first insn must always be a note, or the code below won't
12316 scan it properly. */
12317 insn
= get_insns ();
12318 gcc_assert (GET_CODE (insn
) == NOTE
);
12321 /* Scan all the insns and record the operands that will need fixing. */
12322 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
12324 if (TARGET_CIRRUS_FIX_INVALID_INSNS
12325 && (arm_cirrus_insn_p (insn
)
12326 || GET_CODE (insn
) == JUMP_INSN
12327 || arm_memory_load_p (insn
)))
12328 cirrus_reorg (insn
);
12330 if (GET_CODE (insn
) == BARRIER
)
12331 push_minipool_barrier (insn
, address
);
12332 else if (INSN_P (insn
))
12336 note_invalid_constants (insn
, address
, true);
12337 address
+= get_attr_length (insn
);
12339 /* If the insn is a vector jump, add the size of the table
12340 and skip the table. */
12341 if ((table
= is_jump_table (insn
)) != NULL
)
12343 address
+= get_jump_table_size (table
);
12349 fix
= minipool_fix_head
;
12351 /* Now scan the fixups and perform the required changes. */
12356 Mfix
* last_added_fix
;
12357 Mfix
* last_barrier
= NULL
;
12360 /* Skip any further barriers before the next fix. */
12361 while (fix
&& GET_CODE (fix
->insn
) == BARRIER
)
12364 /* No more fixes. */
12368 last_added_fix
= NULL
;
12370 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
12372 if (GET_CODE (ftmp
->insn
) == BARRIER
)
12374 if (ftmp
->address
>= minipool_vector_head
->max_address
)
12377 last_barrier
= ftmp
;
12379 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
12382 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
12385 /* If we found a barrier, drop back to that; any fixes that we
12386 could have reached but come after the barrier will now go in
12387 the next mini-pool. */
12388 if (last_barrier
!= NULL
)
12390 /* Reduce the refcount for those fixes that won't go into this
12392 for (fdel
= last_barrier
->next
;
12393 fdel
&& fdel
!= ftmp
;
12396 fdel
->minipool
->refcount
--;
12397 fdel
->minipool
= NULL
;
12400 ftmp
= last_barrier
;
12404 /* ftmp is first fix that we can't fit into this pool and
12405 there no natural barriers that we could use. Insert a
12406 new barrier in the code somewhere between the previous
12407 fix and this one, and arrange to jump around it. */
12408 HOST_WIDE_INT max_address
;
12410 /* The last item on the list of fixes must be a barrier, so
12411 we can never run off the end of the list of fixes without
12412 last_barrier being set. */
12415 max_address
= minipool_vector_head
->max_address
;
12416 /* Check that there isn't another fix that is in range that
12417 we couldn't fit into this pool because the pool was
12418 already too large: we need to put the pool before such an
12419 instruction. The pool itself may come just after the
12420 fix because create_fix_barrier also allows space for a
12421 jump instruction. */
12422 if (ftmp
->address
< max_address
)
12423 max_address
= ftmp
->address
+ 1;
12425 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
12428 assign_minipool_offsets (last_barrier
);
12432 if (GET_CODE (ftmp
->insn
) != BARRIER
12433 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
12440 /* Scan over the fixes we have identified for this pool, fixing them
12441 up and adding the constants to the pool itself. */
12442 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
12443 this_fix
= this_fix
->next
)
12444 if (GET_CODE (this_fix
->insn
) != BARRIER
)
12447 = plus_constant (gen_rtx_LABEL_REF (VOIDmode
,
12448 minipool_vector_label
),
12449 this_fix
->minipool
->offset
);
12450 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
12453 dump_minipool (last_barrier
->insn
);
12457 /* From now on we must synthesize any constants that we can't handle
12458 directly. This can happen if the RTL gets split during final
12459 instruction generation. */
12460 after_arm_reorg
= 1;
12462 /* Free the minipool memory. */
12463 obstack_free (&minipool_obstack
, minipool_startobj
);
12466 /* Routines to output assembly language. */
12468 /* If the rtx is the correct value then return the string of the number.
12469 In this way we can ensure that valid double constants are generated even
12470 when cross compiling. */
12472 fp_immediate_constant (rtx x
)
12477 if (!fp_consts_inited
)
12480 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12481 for (i
= 0; i
< 8; i
++)
12482 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
12483 return strings_fp
[i
];
12485 gcc_unreachable ();
12488 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
12489 static const char *
12490 fp_const_from_val (REAL_VALUE_TYPE
*r
)
12494 if (!fp_consts_inited
)
12497 for (i
= 0; i
< 8; i
++)
12498 if (REAL_VALUES_EQUAL (*r
, values_fp
[i
]))
12499 return strings_fp
[i
];
12501 gcc_unreachable ();
12504 /* Output the operands of a LDM/STM instruction to STREAM.
12505 MASK is the ARM register set mask of which only bits 0-15 are important.
12506 REG is the base register, either the frame pointer or the stack pointer,
12507 INSTR is the possibly suffixed load or store instruction.
12508 RFE is nonzero if the instruction should also copy spsr to cpsr. */
12511 print_multi_reg (FILE *stream
, const char *instr
, unsigned reg
,
12512 unsigned long mask
, int rfe
)
12515 bool not_first
= FALSE
;
12517 gcc_assert (!rfe
|| (mask
& (1 << PC_REGNUM
)));
12518 fputc ('\t', stream
);
12519 asm_fprintf (stream
, instr
, reg
);
12520 fputc ('{', stream
);
12522 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
12523 if (mask
& (1 << i
))
12526 fprintf (stream
, ", ");
12528 asm_fprintf (stream
, "%r", i
);
12533 fprintf (stream
, "}^\n");
12535 fprintf (stream
, "}\n");
12539 /* Output a FLDMD instruction to STREAM.
12540 BASE if the register containing the address.
12541 REG and COUNT specify the register range.
12542 Extra registers may be added to avoid hardware bugs.
12544 We output FLDMD even for ARMv5 VFP implementations. Although
12545 FLDMD is technically not supported until ARMv6, it is believed
12546 that all VFP implementations support its use in this context. */
12549 vfp_output_fldmd (FILE * stream
, unsigned int base
, int reg
, int count
)
12553 /* Workaround ARM10 VFPr1 bug. */
12554 if (count
== 2 && !arm_arch6
)
12561 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
12562 load into multiple parts if we have to handle more than 16 registers. */
12565 vfp_output_fldmd (stream
, base
, reg
, 16);
12566 vfp_output_fldmd (stream
, base
, reg
+ 16, count
- 16);
12570 fputc ('\t', stream
);
12571 asm_fprintf (stream
, "fldmfdd\t%r!, {", base
);
12573 for (i
= reg
; i
< reg
+ count
; i
++)
12576 fputs (", ", stream
);
12577 asm_fprintf (stream
, "d%d", i
);
12579 fputs ("}\n", stream
);
12584 /* Output the assembly for a store multiple. */
12587 vfp_output_fstmd (rtx
* operands
)
12594 strcpy (pattern
, "fstmfdd\t%m0!, {%P1");
12595 p
= strlen (pattern
);
12597 gcc_assert (GET_CODE (operands
[1]) == REG
);
12599 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
12600 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
12602 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
12604 strcpy (&pattern
[p
], "}");
12606 output_asm_insn (pattern
, operands
);
12611 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
12612 number of bytes pushed. */
12615 vfp_emit_fstmd (int base_reg
, int count
)
12622 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
12623 register pairs are stored by a store multiple insn. We avoid this
12624 by pushing an extra pair. */
12625 if (count
== 2 && !arm_arch6
)
12627 if (base_reg
== LAST_VFP_REGNUM
- 3)
12632 /* FSTMD may not store more than 16 doubleword registers at once. Split
12633 larger stores into multiple parts (up to a maximum of two, in
12638 /* NOTE: base_reg is an internal register number, so each D register
12640 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
12641 saved
+= vfp_emit_fstmd (base_reg
, 16);
12645 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
12646 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
12648 reg
= gen_rtx_REG (DFmode
, base_reg
);
12651 XVECEXP (par
, 0, 0)
12652 = gen_rtx_SET (VOIDmode
,
12655 gen_rtx_PRE_MODIFY (Pmode
,
12658 (stack_pointer_rtx
,
12661 gen_rtx_UNSPEC (BLKmode
,
12662 gen_rtvec (1, reg
),
12663 UNSPEC_PUSH_MULT
));
12665 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
12666 plus_constant (stack_pointer_rtx
, -(count
* 8)));
12667 RTX_FRAME_RELATED_P (tmp
) = 1;
12668 XVECEXP (dwarf
, 0, 0) = tmp
;
12670 tmp
= gen_rtx_SET (VOIDmode
,
12671 gen_frame_mem (DFmode
, stack_pointer_rtx
),
12673 RTX_FRAME_RELATED_P (tmp
) = 1;
12674 XVECEXP (dwarf
, 0, 1) = tmp
;
12676 for (i
= 1; i
< count
; i
++)
12678 reg
= gen_rtx_REG (DFmode
, base_reg
);
12680 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
12682 tmp
= gen_rtx_SET (VOIDmode
,
12683 gen_frame_mem (DFmode
,
12684 plus_constant (stack_pointer_rtx
,
12687 RTX_FRAME_RELATED_P (tmp
) = 1;
12688 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
12691 par
= emit_insn (par
);
12692 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
12693 RTX_FRAME_RELATED_P (par
) = 1;
12698 /* Emit a call instruction with pattern PAT. ADDR is the address of
12699 the call target. */
12702 arm_emit_call_insn (rtx pat
, rtx addr
)
12706 insn
= emit_call_insn (pat
);
12708 /* The PIC register is live on entry to VxWorks PIC PLT entries.
12709 If the call might use such an entry, add a use of the PIC register
12710 to the instruction's CALL_INSN_FUNCTION_USAGE. */
12711 if (TARGET_VXWORKS_RTP
12713 && GET_CODE (addr
) == SYMBOL_REF
12714 && (SYMBOL_REF_DECL (addr
)
12715 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
12716 : !SYMBOL_REF_LOCAL_P (addr
)))
12718 require_pic_register ();
12719 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
12723 /* Output a 'call' insn. */
12725 output_call (rtx
*operands
)
12727 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
12729 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
12730 if (REGNO (operands
[0]) == LR_REGNUM
)
12732 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
12733 output_asm_insn ("mov%?\t%0, %|lr", operands
);
12736 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12738 if (TARGET_INTERWORK
|| arm_arch4t
)
12739 output_asm_insn ("bx%?\t%0", operands
);
12741 output_asm_insn ("mov%?\t%|pc, %0", operands
);
12746 /* Output a 'call' insn that is a reference in memory. This is
12747 disabled for ARMv5 and we prefer a blx instead because otherwise
12748 there's a significant performance overhead. */
12750 output_call_mem (rtx
*operands
)
12752 gcc_assert (!arm_arch5
);
12753 if (TARGET_INTERWORK
)
12755 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
12756 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12757 output_asm_insn ("bx%?\t%|ip", operands
);
12759 else if (regno_use_in (LR_REGNUM
, operands
[0]))
12761 /* LR is used in the memory address. We load the address in the
12762 first instruction. It's safe to use IP as the target of the
12763 load since the call will kill it anyway. */
12764 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
12765 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12767 output_asm_insn ("bx%?\t%|ip", operands
);
12769 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
12773 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
12774 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
12781 /* Output a move from arm registers to an fpa registers.
12782 OPERANDS[0] is an fpa register.
12783 OPERANDS[1] is the first registers of an arm register pair. */
12785 output_mov_long_double_fpa_from_arm (rtx
*operands
)
12787 int arm_reg0
= REGNO (operands
[1]);
12790 gcc_assert (arm_reg0
!= IP_REGNUM
);
12792 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12793 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12794 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
12796 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
12797 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands
);
12802 /* Output a move from an fpa register to arm registers.
12803 OPERANDS[0] is the first registers of an arm register pair.
12804 OPERANDS[1] is an fpa register. */
12806 output_mov_long_double_arm_from_fpa (rtx
*operands
)
12808 int arm_reg0
= REGNO (operands
[0]);
12811 gcc_assert (arm_reg0
!= IP_REGNUM
);
12813 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12814 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12815 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
12817 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands
);
12818 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
12822 /* Output a move from arm registers to arm registers of a long double
12823 OPERANDS[0] is the destination.
12824 OPERANDS[1] is the source. */
12826 output_mov_long_double_arm_from_arm (rtx
*operands
)
12828 /* We have to be careful here because the two might overlap. */
12829 int dest_start
= REGNO (operands
[0]);
12830 int src_start
= REGNO (operands
[1]);
12834 if (dest_start
< src_start
)
12836 for (i
= 0; i
< 3; i
++)
12838 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
12839 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
12840 output_asm_insn ("mov%?\t%0, %1", ops
);
12845 for (i
= 2; i
>= 0; i
--)
12847 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
12848 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
12849 output_asm_insn ("mov%?\t%0, %1", ops
);
12857 arm_emit_movpair (rtx dest
, rtx src
)
12859 /* If the src is an immediate, simplify it. */
12860 if (CONST_INT_P (src
))
12862 HOST_WIDE_INT val
= INTVAL (src
);
12863 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
12864 if ((val
>> 16) & 0x0000ffff)
12865 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
12867 GEN_INT ((val
>> 16) & 0x0000ffff));
12870 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
12871 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
12874 /* Output a move from arm registers to an fpa registers.
12875 OPERANDS[0] is an fpa register.
12876 OPERANDS[1] is the first registers of an arm register pair. */
12878 output_mov_double_fpa_from_arm (rtx
*operands
)
12880 int arm_reg0
= REGNO (operands
[1]);
12883 gcc_assert (arm_reg0
!= IP_REGNUM
);
12885 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12886 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12887 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops
);
12888 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands
);
12892 /* Output a move from an fpa register to arm registers.
12893 OPERANDS[0] is the first registers of an arm register pair.
12894 OPERANDS[1] is an fpa register. */
12896 output_mov_double_arm_from_fpa (rtx
*operands
)
12898 int arm_reg0
= REGNO (operands
[0]);
12901 gcc_assert (arm_reg0
!= IP_REGNUM
);
12903 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12904 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12905 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands
);
12906 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops
);
12910 /* Output a move between double words. It must be REG<-MEM
12913 output_move_double (rtx
*operands
)
12915 enum rtx_code code0
= GET_CODE (operands
[0]);
12916 enum rtx_code code1
= GET_CODE (operands
[1]);
12921 unsigned int reg0
= REGNO (operands
[0]);
12923 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
12925 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
12927 switch (GET_CODE (XEXP (operands
[1], 0)))
12931 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
12932 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
12934 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
12938 gcc_assert (TARGET_LDRD
);
12939 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
12944 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
12946 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
12951 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
12953 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
12957 gcc_assert (TARGET_LDRD
);
12958 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
12963 /* Autoicrement addressing modes should never have overlapping
12964 base and destination registers, and overlapping index registers
12965 are already prohibited, so this doesn't need to worry about
12967 otherops
[0] = operands
[0];
12968 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
12969 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
12971 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
12973 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
12975 /* Registers overlap so split out the increment. */
12976 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
12977 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
12981 /* Use a single insn if we can.
12982 FIXME: IWMMXT allows offsets larger than ldrd can
12983 handle, fix these up with a pair of ldr. */
12985 || GET_CODE (otherops
[2]) != CONST_INT
12986 || (INTVAL (otherops
[2]) > -256
12987 && INTVAL (otherops
[2]) < 256))
12988 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
12991 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
12992 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12998 /* Use a single insn if we can.
12999 FIXME: IWMMXT allows offsets larger than ldrd can handle,
13000 fix these up with a pair of ldr. */
13002 || GET_CODE (otherops
[2]) != CONST_INT
13003 || (INTVAL (otherops
[2]) > -256
13004 && INTVAL (otherops
[2]) < 256))
13005 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
13008 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
13009 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
13016 /* We might be able to use ldrd %0, %1 here. However the range is
13017 different to ldr/adr, and it is broken on some ARMv7-M
13018 implementations. */
13019 /* Use the second register of the pair to avoid problematic
13021 otherops
[1] = operands
[1];
13022 output_asm_insn ("adr%?\t%0, %1", otherops
);
13023 operands
[1] = otherops
[0];
13025 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
13027 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
13030 /* ??? This needs checking for thumb2. */
13032 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
13033 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
13035 otherops
[0] = operands
[0];
13036 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
13037 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
13039 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
13041 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
13043 switch ((int) INTVAL (otherops
[2]))
13046 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
13051 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
13056 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
13060 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
13061 operands
[1] = otherops
[0];
13063 && (GET_CODE (otherops
[2]) == REG
13065 || (GET_CODE (otherops
[2]) == CONST_INT
13066 && INTVAL (otherops
[2]) > -256
13067 && INTVAL (otherops
[2]) < 256)))
13069 if (reg_overlap_mentioned_p (operands
[0],
13073 /* Swap base and index registers over to
13074 avoid a conflict. */
13076 otherops
[1] = otherops
[2];
13079 /* If both registers conflict, it will usually
13080 have been fixed by a splitter. */
13081 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
13082 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
13084 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13085 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
13089 otherops
[0] = operands
[0];
13090 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
13095 if (GET_CODE (otherops
[2]) == CONST_INT
)
13097 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
13098 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
13100 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13103 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
13106 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
13109 return "ldr%(d%)\t%0, [%1]";
13111 return "ldm%(ia%)\t%1, %M0";
13115 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
13116 /* Take care of overlapping base/data reg. */
13117 if (reg_mentioned_p (operands
[0], operands
[1]))
13119 output_asm_insn ("ldr%?\t%0, %1", otherops
);
13120 output_asm_insn ("ldr%?\t%0, %1", operands
);
13124 output_asm_insn ("ldr%?\t%0, %1", operands
);
13125 output_asm_insn ("ldr%?\t%0, %1", otherops
);
13132 /* Constraints should ensure this. */
13133 gcc_assert (code0
== MEM
&& code1
== REG
);
13134 gcc_assert (REGNO (operands
[1]) != IP_REGNUM
);
13136 switch (GET_CODE (XEXP (operands
[0], 0)))
13140 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
13142 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
13146 gcc_assert (TARGET_LDRD
);
13147 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
13152 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
13154 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
13159 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
13161 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
13165 gcc_assert (TARGET_LDRD
);
13166 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
13171 otherops
[0] = operands
[1];
13172 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
13173 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
13175 /* IWMMXT allows offsets larger than ldrd can handle,
13176 fix these up with a pair of ldr. */
13178 && GET_CODE (otherops
[2]) == CONST_INT
13179 && (INTVAL(otherops
[2]) <= -256
13180 || INTVAL(otherops
[2]) >= 256))
13182 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
13184 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops
);
13185 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
13189 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops
);
13190 output_asm_insn ("str%?\t%0, [%1], %2", otherops
);
13193 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
13194 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
13196 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
13200 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
13201 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
13203 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
13206 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
13212 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
13218 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
13223 && (GET_CODE (otherops
[2]) == REG
13225 || (GET_CODE (otherops
[2]) == CONST_INT
13226 && INTVAL (otherops
[2]) > -256
13227 && INTVAL (otherops
[2]) < 256)))
13229 otherops
[0] = operands
[1];
13230 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
13231 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
13237 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
13238 otherops
[1] = operands
[1];
13239 output_asm_insn ("str%?\t%1, %0", operands
);
13240 output_asm_insn ("str%?\t%H1, %0", otherops
);
13247 /* Output a move, load or store for quad-word vectors in ARM registers. Only
13248 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
13251 output_move_quad (rtx
*operands
)
13253 if (REG_P (operands
[0]))
13255 /* Load, or reg->reg move. */
13257 if (MEM_P (operands
[1]))
13259 switch (GET_CODE (XEXP (operands
[1], 0)))
13262 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
13267 output_asm_insn ("adr%?\t%0, %1", operands
);
13268 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
13272 gcc_unreachable ();
13280 gcc_assert (REG_P (operands
[1]));
13282 dest
= REGNO (operands
[0]);
13283 src
= REGNO (operands
[1]);
13285 /* This seems pretty dumb, but hopefully GCC won't try to do it
13288 for (i
= 0; i
< 4; i
++)
13290 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
13291 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
13292 output_asm_insn ("mov%?\t%0, %1", ops
);
13295 for (i
= 3; i
>= 0; i
--)
13297 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
13298 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
13299 output_asm_insn ("mov%?\t%0, %1", ops
);
13305 gcc_assert (MEM_P (operands
[0]));
13306 gcc_assert (REG_P (operands
[1]));
13307 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
13309 switch (GET_CODE (XEXP (operands
[0], 0)))
13312 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
13316 gcc_unreachable ();
13323 /* Output a VFP load or store instruction. */
13326 output_move_vfp (rtx
*operands
)
13328 rtx reg
, mem
, addr
, ops
[2];
13329 int load
= REG_P (operands
[0]);
13330 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
13331 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
13334 enum machine_mode mode
;
13336 reg
= operands
[!load
];
13337 mem
= operands
[load
];
13339 mode
= GET_MODE (reg
);
13341 gcc_assert (REG_P (reg
));
13342 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
13343 gcc_assert (mode
== SFmode
13347 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
13348 gcc_assert (MEM_P (mem
));
13350 addr
= XEXP (mem
, 0);
13352 switch (GET_CODE (addr
))
13355 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
13356 ops
[0] = XEXP (addr
, 0);
13361 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
13362 ops
[0] = XEXP (addr
, 0);
13367 templ
= "f%s%c%%?\t%%%s0, %%1%s";
13373 sprintf (buff
, templ
,
13374 load
? "ld" : "st",
13377 integer_p
? "\t%@ int" : "");
13378 output_asm_insn (buff
, ops
);
13383 /* Output a Neon quad-word load or store, or a load or store for
13384 larger structure modes.
13386 WARNING: The ordering of elements is weird in big-endian mode,
13387 because we use VSTM, as required by the EABI. GCC RTL defines
13388 element ordering based on in-memory order. This can be differ
13389 from the architectural ordering of elements within a NEON register.
13390 The intrinsics defined in arm_neon.h use the NEON register element
13391 ordering, not the GCC RTL element ordering.
13393 For example, the in-memory ordering of a big-endian a quadword
13394 vector with 16-bit elements when stored from register pair {d0,d1}
13395 will be (lowest address first, d0[N] is NEON register element N):
13397 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
13399 When necessary, quadword registers (dN, dN+1) are moved to ARM
13400 registers from rN in the order:
13402 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
13404 So that STM/LDM can be used on vectors in ARM registers, and the
13405 same memory layout will result as if VSTM/VLDM were used. */
13408 output_move_neon (rtx
*operands
)
13410 rtx reg
, mem
, addr
, ops
[2];
13411 int regno
, load
= REG_P (operands
[0]);
13414 enum machine_mode mode
;
13416 reg
= operands
[!load
];
13417 mem
= operands
[load
];
13419 mode
= GET_MODE (reg
);
13421 gcc_assert (REG_P (reg
));
13422 regno
= REGNO (reg
);
13423 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
13424 || NEON_REGNO_OK_FOR_QUAD (regno
));
13425 gcc_assert (VALID_NEON_DREG_MODE (mode
)
13426 || VALID_NEON_QREG_MODE (mode
)
13427 || VALID_NEON_STRUCT_MODE (mode
));
13428 gcc_assert (MEM_P (mem
));
13430 addr
= XEXP (mem
, 0);
13432 /* Strip off const from addresses like (const (plus (...))). */
13433 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
13434 addr
= XEXP (addr
, 0);
13436 switch (GET_CODE (addr
))
13439 templ
= "v%smia%%?\t%%0!, %%h1";
13440 ops
[0] = XEXP (addr
, 0);
13445 /* FIXME: We should be using vld1/vst1 here in BE mode? */
13446 templ
= "v%smdb%%?\t%%0!, %%h1";
13447 ops
[0] = XEXP (addr
, 0);
13452 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
13453 gcc_unreachable ();
13458 int nregs
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
13461 for (i
= 0; i
< nregs
; i
++)
13463 /* We're only using DImode here because it's a convenient size. */
13464 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
13465 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
13466 if (reg_overlap_mentioned_p (ops
[0], mem
))
13468 gcc_assert (overlap
== -1);
13473 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
13474 output_asm_insn (buff
, ops
);
13479 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
13480 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
13481 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
13482 output_asm_insn (buff
, ops
);
13489 templ
= "v%smia%%?\t%%m0, %%h1";
13494 sprintf (buff
, templ
, load
? "ld" : "st");
13495 output_asm_insn (buff
, ops
);
13500 /* Compute and return the length of neon_mov<mode>, where <mode> is
13501 one of VSTRUCT modes: EI, OI, CI or XI. */
13503 arm_attr_length_move_neon (rtx insn
)
13505 rtx reg
, mem
, addr
;
13507 enum machine_mode mode
;
13509 extract_insn_cached (insn
);
13511 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
13513 mode
= GET_MODE (recog_data
.operand
[0]);
13524 gcc_unreachable ();
13528 load
= REG_P (recog_data
.operand
[0]);
13529 reg
= recog_data
.operand
[!load
];
13530 mem
= recog_data
.operand
[load
];
13532 gcc_assert (MEM_P (mem
));
13534 mode
= GET_MODE (reg
);
13535 addr
= XEXP (mem
, 0);
13537 /* Strip off const from addresses like (const (plus (...))). */
13538 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
13539 addr
= XEXP (addr
, 0);
13541 if (GET_CODE (addr
) == LABEL_REF
|| GET_CODE (addr
) == PLUS
)
13543 int insns
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
13550 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13554 arm_address_offset_is_imm (rtx insn
)
13558 extract_insn_cached (insn
);
13560 if (REG_P (recog_data
.operand
[0]))
13563 mem
= recog_data
.operand
[0];
13565 gcc_assert (MEM_P (mem
));
13567 addr
= XEXP (mem
, 0);
13569 if (GET_CODE (addr
) == REG
13570 || (GET_CODE (addr
) == PLUS
13571 && GET_CODE (XEXP (addr
, 0)) == REG
13572 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
))
13578 /* Output an ADD r, s, #n where n may be too big for one instruction.
13579 If adding zero to one register, output nothing. */
13581 output_add_immediate (rtx
*operands
)
13583 HOST_WIDE_INT n
= INTVAL (operands
[2]);
13585 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
13588 output_multi_immediate (operands
,
13589 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
13592 output_multi_immediate (operands
,
13593 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
13600 /* Output a multiple immediate operation.
13601 OPERANDS is the vector of operands referred to in the output patterns.
13602 INSTR1 is the output pattern to use for the first constant.
13603 INSTR2 is the output pattern to use for subsequent constants.
13604 IMMED_OP is the index of the constant slot in OPERANDS.
13605 N is the constant value. */
13606 static const char *
13607 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
13608 int immed_op
, HOST_WIDE_INT n
)
13610 #if HOST_BITS_PER_WIDE_INT > 32
13616 /* Quick and easy output. */
13617 operands
[immed_op
] = const0_rtx
;
13618 output_asm_insn (instr1
, operands
);
13623 const char * instr
= instr1
;
13625 /* Note that n is never zero here (which would give no output). */
13626 for (i
= 0; i
< 32; i
+= 2)
13630 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
13631 output_asm_insn (instr
, operands
);
13641 /* Return the name of a shifter operation. */
13642 static const char *
13643 arm_shift_nmem(enum rtx_code code
)
13648 return ARM_LSL_NAME
;
13664 /* Return the appropriate ARM instruction for the operation code.
13665 The returned result should not be overwritten. OP is the rtx of the
13666 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
13669 arithmetic_instr (rtx op
, int shift_first_arg
)
13671 switch (GET_CODE (op
))
13677 return shift_first_arg
? "rsb" : "sub";
13692 return arm_shift_nmem(GET_CODE(op
));
13695 gcc_unreachable ();
13699 /* Ensure valid constant shifts and return the appropriate shift mnemonic
13700 for the operation code. The returned result should not be overwritten.
13701 OP is the rtx code of the shift.
13702 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
13704 static const char *
13705 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
13708 enum rtx_code code
= GET_CODE (op
);
13710 switch (GET_CODE (XEXP (op
, 1)))
13718 *amountp
= INTVAL (XEXP (op
, 1));
13722 gcc_unreachable ();
13728 gcc_assert (*amountp
!= -1);
13729 *amountp
= 32 - *amountp
;
13732 /* Fall through. */
13738 mnem
= arm_shift_nmem(code
);
13742 /* We never have to worry about the amount being other than a
13743 power of 2, since this case can never be reloaded from a reg. */
13744 gcc_assert (*amountp
!= -1);
13745 *amountp
= int_log2 (*amountp
);
13746 return ARM_LSL_NAME
;
13749 gcc_unreachable ();
13752 if (*amountp
!= -1)
13754 /* This is not 100% correct, but follows from the desire to merge
13755 multiplication by a power of 2 with the recognizer for a
13756 shift. >=32 is not a valid shift for "lsl", so we must try and
13757 output a shift that produces the correct arithmetical result.
13758 Using lsr #32 is identical except for the fact that the carry bit
13759 is not set correctly if we set the flags; but we never use the
13760 carry bit from such an operation, so we can ignore that. */
13761 if (code
== ROTATERT
)
13762 /* Rotate is just modulo 32. */
13764 else if (*amountp
!= (*amountp
& 31))
13766 if (code
== ASHIFT
)
13771 /* Shifts of 0 are no-ops. */
13779 /* Obtain the shift from the POWER of two. */
13781 static HOST_WIDE_INT
13782 int_log2 (HOST_WIDE_INT power
)
13784 HOST_WIDE_INT shift
= 0;
13786 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
13788 gcc_assert (shift
<= 31);
13795 /* Output a .ascii pseudo-op, keeping track of lengths. This is
13796 because /bin/as is horribly restrictive. The judgement about
13797 whether or not each character is 'printable' (and can be output as
13798 is) or not (and must be printed with an octal escape) must be made
13799 with reference to the *host* character set -- the situation is
13800 similar to that discussed in the comments above pp_c_char in
13801 c-pretty-print.c. */
13803 #define MAX_ASCII_LEN 51
13806 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
13809 int len_so_far
= 0;
13811 fputs ("\t.ascii\t\"", stream
);
13813 for (i
= 0; i
< len
; i
++)
13817 if (len_so_far
>= MAX_ASCII_LEN
)
13819 fputs ("\"\n\t.ascii\t\"", stream
);
13825 if (c
== '\\' || c
== '\"')
13827 putc ('\\', stream
);
13835 fprintf (stream
, "\\%03o", c
);
13840 fputs ("\"\n", stream
);
13843 /* Compute the register save mask for registers 0 through 12
13844 inclusive. This code is used by arm_compute_save_reg_mask. */
13846 static unsigned long
13847 arm_compute_save_reg0_reg12_mask (void)
13849 unsigned long func_type
= arm_current_func_type ();
13850 unsigned long save_reg_mask
= 0;
13853 if (IS_INTERRUPT (func_type
))
13855 unsigned int max_reg
;
13856 /* Interrupt functions must not corrupt any registers,
13857 even call clobbered ones. If this is a leaf function
13858 we can just examine the registers used by the RTL, but
13859 otherwise we have to assume that whatever function is
13860 called might clobber anything, and so we have to save
13861 all the call-clobbered registers as well. */
13862 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
13863 /* FIQ handlers have registers r8 - r12 banked, so
13864 we only need to check r0 - r7, Normal ISRs only
13865 bank r14 and r15, so we must check up to r12.
13866 r13 is the stack pointer which is always preserved,
13867 so we do not need to consider it here. */
13872 for (reg
= 0; reg
<= max_reg
; reg
++)
13873 if (df_regs_ever_live_p (reg
)
13874 || (! current_function_is_leaf
&& call_used_regs
[reg
]))
13875 save_reg_mask
|= (1 << reg
);
13877 /* Also save the pic base register if necessary. */
13879 && !TARGET_SINGLE_PIC_BASE
13880 && arm_pic_register
!= INVALID_REGNUM
13881 && crtl
->uses_pic_offset_table
)
13882 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13884 else if (IS_VOLATILE(func_type
))
13886 /* For noreturn functions we historically omitted register saves
13887 altogether. However this really messes up debugging. As a
13888 compromise save just the frame pointers. Combined with the link
13889 register saved elsewhere this should be sufficient to get
13891 if (frame_pointer_needed
)
13892 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
13893 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
13894 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
13895 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
13896 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
13900 /* In the normal case we only need to save those registers
13901 which are call saved and which are used by this function. */
13902 for (reg
= 0; reg
<= 11; reg
++)
13903 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
13904 save_reg_mask
|= (1 << reg
);
13906 /* Handle the frame pointer as a special case. */
13907 if (frame_pointer_needed
)
13908 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
13910 /* If we aren't loading the PIC register,
13911 don't stack it even though it may be live. */
13913 && !TARGET_SINGLE_PIC_BASE
13914 && arm_pic_register
!= INVALID_REGNUM
13915 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
13916 || crtl
->uses_pic_offset_table
))
13917 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13919 /* The prologue will copy SP into R0, so save it. */
13920 if (IS_STACKALIGN (func_type
))
13921 save_reg_mask
|= 1;
13924 /* Save registers so the exception handler can modify them. */
13925 if (crtl
->calls_eh_return
)
13931 reg
= EH_RETURN_DATA_REGNO (i
);
13932 if (reg
== INVALID_REGNUM
)
13934 save_reg_mask
|= 1 << reg
;
13938 return save_reg_mask
;
13942 /* Compute the number of bytes used to store the static chain register on the
13943 stack, above the stack frame. We need to know this accurately to get the
13944 alignment of the rest of the stack frame correct. */
13946 static int arm_compute_static_chain_stack_bytes (void)
13948 unsigned long func_type
= arm_current_func_type ();
13949 int static_chain_stack_bytes
= 0;
13951 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
&&
13952 IS_NESTED (func_type
) &&
13953 df_regs_ever_live_p (3) && crtl
->args
.pretend_args_size
== 0)
13954 static_chain_stack_bytes
= 4;
13956 return static_chain_stack_bytes
;
13960 /* Compute a bit mask of which registers need to be
13961 saved on the stack for the current function.
13962 This is used by arm_get_frame_offsets, which may add extra registers. */
13964 static unsigned long
13965 arm_compute_save_reg_mask (void)
13967 unsigned int save_reg_mask
= 0;
13968 unsigned long func_type
= arm_current_func_type ();
13971 if (IS_NAKED (func_type
))
13972 /* This should never really happen. */
13975 /* If we are creating a stack frame, then we must save the frame pointer,
13976 IP (which will hold the old stack pointer), LR and the PC. */
13977 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
13979 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
13982 | (1 << PC_REGNUM
);
13984 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
13986 /* Decide if we need to save the link register.
13987 Interrupt routines have their own banked link register,
13988 so they never need to save it.
13989 Otherwise if we do not use the link register we do not need to save
13990 it. If we are pushing other registers onto the stack however, we
13991 can save an instruction in the epilogue by pushing the link register
13992 now and then popping it back into the PC. This incurs extra memory
13993 accesses though, so we only do it when optimizing for size, and only
13994 if we know that we will not need a fancy return sequence. */
13995 if (df_regs_ever_live_p (LR_REGNUM
)
13998 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
13999 && !crtl
->calls_eh_return
))
14000 save_reg_mask
|= 1 << LR_REGNUM
;
14002 if (cfun
->machine
->lr_save_eliminated
)
14003 save_reg_mask
&= ~ (1 << LR_REGNUM
);
14005 if (TARGET_REALLY_IWMMXT
14006 && ((bit_count (save_reg_mask
)
14007 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
14008 arm_compute_static_chain_stack_bytes())
14011 /* The total number of registers that are going to be pushed
14012 onto the stack is odd. We need to ensure that the stack
14013 is 64-bit aligned before we start to save iWMMXt registers,
14014 and also before we start to create locals. (A local variable
14015 might be a double or long long which we will load/store using
14016 an iWMMXt instruction). Therefore we need to push another
14017 ARM register, so that the stack will be 64-bit aligned. We
14018 try to avoid using the arg registers (r0 -r3) as they might be
14019 used to pass values in a tail call. */
14020 for (reg
= 4; reg
<= 12; reg
++)
14021 if ((save_reg_mask
& (1 << reg
)) == 0)
14025 save_reg_mask
|= (1 << reg
);
14028 cfun
->machine
->sibcall_blocked
= 1;
14029 save_reg_mask
|= (1 << 3);
14033 /* We may need to push an additional register for use initializing the
14034 PIC base register. */
14035 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
14036 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
14038 reg
= thumb_find_work_register (1 << 4);
14039 if (!call_used_regs
[reg
])
14040 save_reg_mask
|= (1 << reg
);
14043 return save_reg_mask
;
14047 /* Compute a bit mask of which registers need to be
14048 saved on the stack for the current function. */
14049 static unsigned long
14050 thumb1_compute_save_reg_mask (void)
14052 unsigned long mask
;
14056 for (reg
= 0; reg
< 12; reg
++)
14057 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14061 && !TARGET_SINGLE_PIC_BASE
14062 && arm_pic_register
!= INVALID_REGNUM
14063 && crtl
->uses_pic_offset_table
)
14064 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
14066 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
14067 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
14068 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
14070 /* LR will also be pushed if any lo regs are pushed. */
14071 if (mask
& 0xff || thumb_force_lr_save ())
14072 mask
|= (1 << LR_REGNUM
);
14074 /* Make sure we have a low work register if we need one.
14075 We will need one if we are going to push a high register,
14076 but we are not currently intending to push a low register. */
14077 if ((mask
& 0xff) == 0
14078 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
14080 /* Use thumb_find_work_register to choose which register
14081 we will use. If the register is live then we will
14082 have to push it. Use LAST_LO_REGNUM as our fallback
14083 choice for the register to select. */
14084 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
14085 /* Make sure the register returned by thumb_find_work_register is
14086 not part of the return value. */
14087 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
14088 reg
= LAST_LO_REGNUM
;
14090 if (! call_used_regs
[reg
])
14094 /* The 504 below is 8 bytes less than 512 because there are two possible
14095 alignment words. We can't tell here if they will be present or not so we
14096 have to play it safe and assume that they are. */
14097 if ((CALLER_INTERWORKING_SLOT_SIZE
+
14098 ROUND_UP_WORD (get_frame_size ()) +
14099 crtl
->outgoing_args_size
) >= 504)
14101 /* This is the same as the code in thumb1_expand_prologue() which
14102 determines which register to use for stack decrement. */
14103 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
14104 if (mask
& (1 << reg
))
14107 if (reg
> LAST_LO_REGNUM
)
14109 /* Make sure we have a register available for stack decrement. */
14110 mask
|= 1 << LAST_LO_REGNUM
;
14118 /* Return the number of bytes required to save VFP registers. */
14120 arm_get_vfp_saved_size (void)
14122 unsigned int regno
;
14127 /* Space for saved VFP registers. */
14128 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14131 for (regno
= FIRST_VFP_REGNUM
;
14132 regno
< LAST_VFP_REGNUM
;
14135 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
14136 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
14140 /* Workaround ARM10 VFPr1 bug. */
14141 if (count
== 2 && !arm_arch6
)
14143 saved
+= count
* 8;
14152 if (count
== 2 && !arm_arch6
)
14154 saved
+= count
* 8;
14161 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
14162 everything bar the final return instruction. */
14164 output_return_instruction (rtx operand
, int really_return
, int reverse
)
14166 char conditional
[10];
14169 unsigned long live_regs_mask
;
14170 unsigned long func_type
;
14171 arm_stack_offsets
*offsets
;
14173 func_type
= arm_current_func_type ();
14175 if (IS_NAKED (func_type
))
14178 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
14180 /* If this function was declared non-returning, and we have
14181 found a tail call, then we have to trust that the called
14182 function won't return. */
14187 /* Otherwise, trap an attempted return by aborting. */
14189 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
14191 assemble_external_libcall (ops
[1]);
14192 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
14198 gcc_assert (!cfun
->calls_alloca
|| really_return
);
14200 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
14202 cfun
->machine
->return_used_this_function
= 1;
14204 offsets
= arm_get_frame_offsets ();
14205 live_regs_mask
= offsets
->saved_regs_mask
;
14207 if (live_regs_mask
)
14209 const char * return_reg
;
14211 /* If we do not have any special requirements for function exit
14212 (e.g. interworking) then we can load the return address
14213 directly into the PC. Otherwise we must load it into LR. */
14215 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
14216 return_reg
= reg_names
[PC_REGNUM
];
14218 return_reg
= reg_names
[LR_REGNUM
];
14220 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
14222 /* There are three possible reasons for the IP register
14223 being saved. 1) a stack frame was created, in which case
14224 IP contains the old stack pointer, or 2) an ISR routine
14225 corrupted it, or 3) it was saved to align the stack on
14226 iWMMXt. In case 1, restore IP into SP, otherwise just
14228 if (frame_pointer_needed
)
14230 live_regs_mask
&= ~ (1 << IP_REGNUM
);
14231 live_regs_mask
|= (1 << SP_REGNUM
);
14234 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
14237 /* On some ARM architectures it is faster to use LDR rather than
14238 LDM to load a single register. On other architectures, the
14239 cost is the same. In 26 bit mode, or for exception handlers,
14240 we have to use LDM to load the PC so that the CPSR is also
14242 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
14243 if (live_regs_mask
== (1U << reg
))
14246 if (reg
<= LAST_ARM_REGNUM
14247 && (reg
!= LR_REGNUM
14249 || ! IS_INTERRUPT (func_type
)))
14251 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
14252 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
14259 /* Generate the load multiple instruction to restore the
14260 registers. Note we can get here, even if
14261 frame_pointer_needed is true, but only if sp already
14262 points to the base of the saved core registers. */
14263 if (live_regs_mask
& (1 << SP_REGNUM
))
14265 unsigned HOST_WIDE_INT stack_adjust
;
14267 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
14268 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
14270 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
14271 if (TARGET_UNIFIED_ASM
)
14272 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
14274 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
14277 /* If we can't use ldmib (SA110 bug),
14278 then try to pop r3 instead. */
14280 live_regs_mask
|= 1 << 3;
14282 if (TARGET_UNIFIED_ASM
)
14283 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
14285 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
14289 if (TARGET_UNIFIED_ASM
)
14290 sprintf (instr
, "pop%s\t{", conditional
);
14292 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
14294 p
= instr
+ strlen (instr
);
14296 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
14297 if (live_regs_mask
& (1 << reg
))
14299 int l
= strlen (reg_names
[reg
]);
14305 memcpy (p
, ", ", 2);
14309 memcpy (p
, "%|", 2);
14310 memcpy (p
+ 2, reg_names
[reg
], l
);
14314 if (live_regs_mask
& (1 << LR_REGNUM
))
14316 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
14317 /* If returning from an interrupt, restore the CPSR. */
14318 if (IS_INTERRUPT (func_type
))
14325 output_asm_insn (instr
, & operand
);
14327 /* See if we need to generate an extra instruction to
14328 perform the actual function return. */
14330 && func_type
!= ARM_FT_INTERWORKED
14331 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
14333 /* The return has already been handled
14334 by loading the LR into the PC. */
14341 switch ((int) ARM_FUNC_TYPE (func_type
))
14345 /* ??? This is wrong for unified assembly syntax. */
14346 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
14349 case ARM_FT_INTERWORKED
:
14350 sprintf (instr
, "bx%s\t%%|lr", conditional
);
14353 case ARM_FT_EXCEPTION
:
14354 /* ??? This is wrong for unified assembly syntax. */
14355 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
14359 /* Use bx if it's available. */
14360 if (arm_arch5
|| arm_arch4t
)
14361 sprintf (instr
, "bx%s\t%%|lr", conditional
);
14363 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
14367 output_asm_insn (instr
, & operand
);
14373 /* Write the function name into the code section, directly preceding
14374 the function prologue.
14376 Code will be output similar to this:
14378 .ascii "arm_poke_function_name", 0
14381 .word 0xff000000 + (t1 - t0)
14382 arm_poke_function_name
14384 stmfd sp!, {fp, ip, lr, pc}
14387 When performing a stack backtrace, code can inspect the value
14388 of 'pc' stored at 'fp' + 0. If the trace function then looks
14389 at location pc - 12 and the top 8 bits are set, then we know
14390 that there is a function name embedded immediately preceding this
14391 location and has length ((pc[-3]) & 0xff000000).
14393 We assume that pc is declared as a pointer to an unsigned long.
14395 It is of no benefit to output the function name if we are assembling
14396 a leaf function. These function types will not contain a stack
14397 backtrace structure, therefore it is not possible to determine the
14400 arm_poke_function_name (FILE *stream
, const char *name
)
14402 unsigned long alignlength
;
14403 unsigned long length
;
14406 length
= strlen (name
) + 1;
14407 alignlength
= ROUND_UP_WORD (length
);
14409 ASM_OUTPUT_ASCII (stream
, name
, length
);
14410 ASM_OUTPUT_ALIGN (stream
, 2);
14411 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
14412 assemble_aligned_integer (UNITS_PER_WORD
, x
);
14415 /* Place some comments into the assembler stream
14416 describing the current function. */
14418 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
14420 unsigned long func_type
;
14424 thumb1_output_function_prologue (f
, frame_size
);
14428 /* Sanity check. */
14429 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
14431 func_type
= arm_current_func_type ();
14433 switch ((int) ARM_FUNC_TYPE (func_type
))
14436 case ARM_FT_NORMAL
:
14438 case ARM_FT_INTERWORKED
:
14439 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
14442 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
14445 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
14447 case ARM_FT_EXCEPTION
:
14448 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
14452 if (IS_NAKED (func_type
))
14453 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
14455 if (IS_VOLATILE (func_type
))
14456 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
14458 if (IS_NESTED (func_type
))
14459 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
14460 if (IS_STACKALIGN (func_type
))
14461 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
14463 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
14465 crtl
->args
.pretend_args_size
, frame_size
);
14467 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
14468 frame_pointer_needed
,
14469 cfun
->machine
->uses_anonymous_args
);
14471 if (cfun
->machine
->lr_save_eliminated
)
14472 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
14474 if (crtl
->calls_eh_return
)
14475 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
14480 arm_output_epilogue (rtx sibling
)
14483 unsigned long saved_regs_mask
;
14484 unsigned long func_type
;
14485 /* Floats_offset is the offset from the "virtual" frame. In an APCS
14486 frame that is $fp + 4 for a non-variadic function. */
14487 int floats_offset
= 0;
14489 FILE * f
= asm_out_file
;
14490 unsigned int lrm_count
= 0;
14491 int really_return
= (sibling
== NULL
);
14493 arm_stack_offsets
*offsets
;
14495 /* If we have already generated the return instruction
14496 then it is futile to generate anything else. */
14497 if (use_return_insn (FALSE
, sibling
) &&
14498 (cfun
->machine
->return_used_this_function
!= 0))
14501 func_type
= arm_current_func_type ();
14503 if (IS_NAKED (func_type
))
14504 /* Naked functions don't have epilogues. */
14507 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
14511 /* A volatile function should never return. Call abort. */
14512 op
= gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)" : "abort");
14513 assemble_external_libcall (op
);
14514 output_asm_insn ("bl\t%a0", &op
);
14519 /* If we are throwing an exception, then we really must be doing a
14520 return, so we can't tail-call. */
14521 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
14523 offsets
= arm_get_frame_offsets ();
14524 saved_regs_mask
= offsets
->saved_regs_mask
;
14527 lrm_count
= bit_count (saved_regs_mask
);
14529 floats_offset
= offsets
->saved_args
;
14530 /* Compute how far away the floats will be. */
14531 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
14532 if (saved_regs_mask
& (1 << reg
))
14533 floats_offset
+= 4;
14535 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
14537 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
14538 int vfp_offset
= offsets
->frame
;
14540 if (TARGET_FPA_EMU2
)
14542 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14543 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14545 floats_offset
+= 12;
14546 asm_fprintf (f
, "\tldfe\t%r, [%r, #-%d]\n",
14547 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
14552 start_reg
= LAST_FPA_REGNUM
;
14554 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14556 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14558 floats_offset
+= 12;
14560 /* We can't unstack more than four registers at once. */
14561 if (start_reg
- reg
== 3)
14563 asm_fprintf (f
, "\tlfm\t%r, 4, [%r, #-%d]\n",
14564 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
14565 start_reg
= reg
- 1;
14570 if (reg
!= start_reg
)
14571 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
14572 reg
+ 1, start_reg
- reg
,
14573 FP_REGNUM
, floats_offset
- vfp_offset
);
14574 start_reg
= reg
- 1;
14578 /* Just in case the last register checked also needs unstacking. */
14579 if (reg
!= start_reg
)
14580 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
14581 reg
+ 1, start_reg
- reg
,
14582 FP_REGNUM
, floats_offset
- vfp_offset
);
14585 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14589 /* The fldmd insns do not have base+offset addressing
14590 modes, so we use IP to hold the address. */
14591 saved_size
= arm_get_vfp_saved_size ();
14593 if (saved_size
> 0)
14595 floats_offset
+= saved_size
;
14596 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", IP_REGNUM
,
14597 FP_REGNUM
, floats_offset
- vfp_offset
);
14599 start_reg
= FIRST_VFP_REGNUM
;
14600 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
14602 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
14603 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
14605 if (start_reg
!= reg
)
14606 vfp_output_fldmd (f
, IP_REGNUM
,
14607 (start_reg
- FIRST_VFP_REGNUM
) / 2,
14608 (reg
- start_reg
) / 2);
14609 start_reg
= reg
+ 2;
14612 if (start_reg
!= reg
)
14613 vfp_output_fldmd (f
, IP_REGNUM
,
14614 (start_reg
- FIRST_VFP_REGNUM
) / 2,
14615 (reg
- start_reg
) / 2);
14620 /* The frame pointer is guaranteed to be non-double-word aligned.
14621 This is because it is set to (old_stack_pointer - 4) and the
14622 old_stack_pointer was double word aligned. Thus the offset to
14623 the iWMMXt registers to be loaded must also be non-double-word
14624 sized, so that the resultant address *is* double-word aligned.
14625 We can ignore floats_offset since that was already included in
14626 the live_regs_mask. */
14627 lrm_count
+= (lrm_count
% 2 ? 2 : 1);
14629 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
14630 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14632 asm_fprintf (f
, "\twldrd\t%r, [%r, #-%d]\n",
14633 reg
, FP_REGNUM
, lrm_count
* 4);
14638 /* saved_regs_mask should contain the IP, which at the time of stack
14639 frame generation actually contains the old stack pointer. So a
14640 quick way to unwind the stack is just pop the IP register directly
14641 into the stack pointer. */
14642 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
14643 saved_regs_mask
&= ~ (1 << IP_REGNUM
);
14644 saved_regs_mask
|= (1 << SP_REGNUM
);
14646 /* There are two registers left in saved_regs_mask - LR and PC. We
14647 only need to restore the LR register (the return address), but to
14648 save time we can load it directly into the PC, unless we need a
14649 special function exit sequence, or we are not really returning. */
14651 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
14652 && !crtl
->calls_eh_return
)
14653 /* Delete the LR from the register mask, so that the LR on
14654 the stack is loaded into the PC in the register mask. */
14655 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
14657 saved_regs_mask
&= ~ (1 << PC_REGNUM
);
14659 /* We must use SP as the base register, because SP is one of the
14660 registers being restored. If an interrupt or page fault
14661 happens in the ldm instruction, the SP might or might not
14662 have been restored. That would be bad, as then SP will no
14663 longer indicate the safe area of stack, and we can get stack
14664 corruption. Using SP as the base register means that it will
14665 be reset correctly to the original value, should an interrupt
14666 occur. If the stack pointer already points at the right
14667 place, then omit the subtraction. */
14668 if (offsets
->outgoing_args
!= (1 + (int) bit_count (saved_regs_mask
))
14669 || cfun
->calls_alloca
)
14670 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", SP_REGNUM
, FP_REGNUM
,
14671 4 * bit_count (saved_regs_mask
));
14672 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
, 0);
14674 if (IS_INTERRUPT (func_type
))
14675 /* Interrupt handlers will have pushed the
14676 IP onto the stack, so restore it now. */
14677 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, 1 << IP_REGNUM
, 0);
14681 /* This branch is executed for ARM mode (non-apcs frames) and
14682 Thumb-2 mode. Frame layout is essentially the same for those
14683 cases, except that in ARM mode frame pointer points to the
14684 first saved register, while in Thumb-2 mode the frame pointer points
14685 to the last saved register.
14687 It is possible to make frame pointer point to last saved
14688 register in both cases, and remove some conditionals below.
14689 That means that fp setup in prologue would be just "mov fp, sp"
14690 and sp restore in epilogue would be just "mov sp, fp", whereas
14691 now we have to use add/sub in those cases. However, the value
14692 of that would be marginal, as both mov and add/sub are 32-bit
14693 in ARM mode, and it would require extra conditionals
14694 in arm_expand_prologue to distingish ARM-apcs-frame case
14695 (where frame pointer is required to point at first register)
14696 and ARM-non-apcs-frame. Therefore, such change is postponed
14697 until real need arise. */
14698 unsigned HOST_WIDE_INT amount
;
14700 /* Restore stack pointer if necessary. */
14701 if (TARGET_ARM
&& frame_pointer_needed
)
14703 operands
[0] = stack_pointer_rtx
;
14704 operands
[1] = hard_frame_pointer_rtx
;
14706 operands
[2] = GEN_INT (offsets
->frame
- offsets
->saved_regs
);
14707 output_add_immediate (operands
);
14711 if (frame_pointer_needed
)
14713 /* For Thumb-2 restore sp from the frame pointer.
14714 Operand restrictions mean we have to incrememnt FP, then copy
14716 amount
= offsets
->locals_base
- offsets
->saved_regs
;
14717 operands
[0] = hard_frame_pointer_rtx
;
14721 unsigned long count
;
14722 operands
[0] = stack_pointer_rtx
;
14723 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
14724 /* pop call clobbered registers if it avoids a
14725 separate stack adjustment. */
14726 count
= offsets
->saved_regs
- offsets
->saved_args
;
14729 && !crtl
->calls_eh_return
14730 && bit_count(saved_regs_mask
) * 4 == count
14731 && !IS_INTERRUPT (func_type
)
14732 && !crtl
->tail_call_emit
)
14734 unsigned long mask
;
14735 /* Preserve return values, of any size. */
14736 mask
= (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
14738 mask
&= ~saved_regs_mask
;
14740 while (bit_count (mask
) * 4 > amount
)
14742 while ((mask
& (1 << reg
)) == 0)
14744 mask
&= ~(1 << reg
);
14746 if (bit_count (mask
) * 4 == amount
) {
14748 saved_regs_mask
|= mask
;
14755 operands
[1] = operands
[0];
14756 operands
[2] = GEN_INT (amount
);
14757 output_add_immediate (operands
);
14759 if (frame_pointer_needed
)
14760 asm_fprintf (f
, "\tmov\t%r, %r\n",
14761 SP_REGNUM
, HARD_FRAME_POINTER_REGNUM
);
14764 if (TARGET_FPA_EMU2
)
14766 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
14767 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14768 asm_fprintf (f
, "\tldfe\t%r, [%r], #12\n",
14773 start_reg
= FIRST_FPA_REGNUM
;
14775 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
14777 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14779 if (reg
- start_reg
== 3)
14781 asm_fprintf (f
, "\tlfmfd\t%r, 4, [%r]!\n",
14782 start_reg
, SP_REGNUM
);
14783 start_reg
= reg
+ 1;
14788 if (reg
!= start_reg
)
14789 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
14790 start_reg
, reg
- start_reg
,
14793 start_reg
= reg
+ 1;
14797 /* Just in case the last register checked also needs unstacking. */
14798 if (reg
!= start_reg
)
14799 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
14800 start_reg
, reg
- start_reg
, SP_REGNUM
);
14803 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14805 int end_reg
= LAST_VFP_REGNUM
+ 1;
14807 /* Scan the registers in reverse order. We need to match
14808 any groupings made in the prologue and generate matching
14810 for (reg
= LAST_VFP_REGNUM
- 1; reg
>= FIRST_VFP_REGNUM
; reg
-= 2)
14812 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
14813 && (!df_regs_ever_live_p (reg
+ 1)
14814 || call_used_regs
[reg
+ 1]))
14816 if (end_reg
> reg
+ 2)
14817 vfp_output_fldmd (f
, SP_REGNUM
,
14818 (reg
+ 2 - FIRST_VFP_REGNUM
) / 2,
14819 (end_reg
- (reg
+ 2)) / 2);
14823 if (end_reg
> reg
+ 2)
14824 vfp_output_fldmd (f
, SP_REGNUM
, 0,
14825 (end_reg
- (reg
+ 2)) / 2);
14829 for (reg
= FIRST_IWMMXT_REGNUM
; reg
<= LAST_IWMMXT_REGNUM
; reg
++)
14830 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14831 asm_fprintf (f
, "\twldrd\t%r, [%r], #8\n", reg
, SP_REGNUM
);
14833 /* If we can, restore the LR into the PC. */
14834 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
14835 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
14836 && !IS_STACKALIGN (func_type
)
14838 && crtl
->args
.pretend_args_size
== 0
14839 && saved_regs_mask
& (1 << LR_REGNUM
)
14840 && !crtl
->calls_eh_return
)
14842 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
14843 saved_regs_mask
|= (1 << PC_REGNUM
);
14844 rfe
= IS_INTERRUPT (func_type
);
14849 /* Load the registers off the stack. If we only have one register
14850 to load use the LDR instruction - it is faster. For Thumb-2
14851 always use pop and the assembler will pick the best instruction.*/
14852 if (TARGET_ARM
&& saved_regs_mask
== (1 << LR_REGNUM
)
14853 && !IS_INTERRUPT(func_type
))
14855 asm_fprintf (f
, "\tldr\t%r, [%r], #4\n", LR_REGNUM
, SP_REGNUM
);
14857 else if (saved_regs_mask
)
14859 if (saved_regs_mask
& (1 << SP_REGNUM
))
14860 /* Note - write back to the stack register is not enabled
14861 (i.e. "ldmfd sp!..."). We know that the stack pointer is
14862 in the list of registers and if we add writeback the
14863 instruction becomes UNPREDICTABLE. */
14864 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
,
14866 else if (TARGET_ARM
)
14867 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, saved_regs_mask
,
14870 print_multi_reg (f
, "pop\t", SP_REGNUM
, saved_regs_mask
, 0);
14873 if (crtl
->args
.pretend_args_size
)
14875 /* Unwind the pre-pushed regs. */
14876 operands
[0] = operands
[1] = stack_pointer_rtx
;
14877 operands
[2] = GEN_INT (crtl
->args
.pretend_args_size
);
14878 output_add_immediate (operands
);
14882 /* We may have already restored PC directly from the stack. */
14883 if (!really_return
|| saved_regs_mask
& (1 << PC_REGNUM
))
14886 /* Stack adjustment for exception handler. */
14887 if (crtl
->calls_eh_return
)
14888 asm_fprintf (f
, "\tadd\t%r, %r, %r\n", SP_REGNUM
, SP_REGNUM
,
14889 ARM_EH_STACKADJ_REGNUM
);
14891 /* Generate the return instruction. */
14892 switch ((int) ARM_FUNC_TYPE (func_type
))
14896 asm_fprintf (f
, "\tsubs\t%r, %r, #4\n", PC_REGNUM
, LR_REGNUM
);
14899 case ARM_FT_EXCEPTION
:
14900 asm_fprintf (f
, "\tmovs\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
14903 case ARM_FT_INTERWORKED
:
14904 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
14908 if (IS_STACKALIGN (func_type
))
14910 /* See comment in arm_expand_prologue. */
14911 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, 0);
14913 if (arm_arch5
|| arm_arch4t
)
14914 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
14916 asm_fprintf (f
, "\tmov\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
14924 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
14925 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
14927 arm_stack_offsets
*offsets
;
14933 /* Emit any call-via-reg trampolines that are needed for v4t support
14934 of call_reg and call_value_reg type insns. */
14935 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
14937 rtx label
= cfun
->machine
->call_via
[regno
];
14941 switch_to_section (function_section (current_function_decl
));
14942 targetm
.asm_out
.internal_label (asm_out_file
, "L",
14943 CODE_LABEL_NUMBER (label
));
14944 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
14948 /* ??? Probably not safe to set this here, since it assumes that a
14949 function will be emitted as assembly immediately after we generate
14950 RTL for it. This does not happen for inline functions. */
14951 cfun
->machine
->return_used_this_function
= 0;
14953 else /* TARGET_32BIT */
14955 /* We need to take into account any stack-frame rounding. */
14956 offsets
= arm_get_frame_offsets ();
14958 gcc_assert (!use_return_insn (FALSE
, NULL
)
14959 || (cfun
->machine
->return_used_this_function
!= 0)
14960 || offsets
->saved_regs
== offsets
->outgoing_args
14961 || frame_pointer_needed
);
14963 /* Reset the ARM-specific per-function variables. */
14964 after_arm_reorg
= 0;
14968 /* Generate and emit an insn that we will recognize as a push_multi.
14969 Unfortunately, since this insn does not reflect very well the actual
14970 semantics of the operation, we need to annotate the insn for the benefit
14971 of DWARF2 frame unwind information. */
14973 emit_multi_reg_push (unsigned long mask
)
14976 int num_dwarf_regs
;
14980 int dwarf_par_index
;
14983 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
14984 if (mask
& (1 << i
))
14987 gcc_assert (num_regs
&& num_regs
<= 16);
14989 /* We don't record the PC in the dwarf frame information. */
14990 num_dwarf_regs
= num_regs
;
14991 if (mask
& (1 << PC_REGNUM
))
14994 /* For the body of the insn we are going to generate an UNSPEC in
14995 parallel with several USEs. This allows the insn to be recognized
14996 by the push_multi pattern in the arm.md file.
14998 The body of the insn looks something like this:
15001 (set (mem:BLK (pre_modify:SI (reg:SI sp)
15002 (const_int:SI <num>)))
15003 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
15009 For the frame note however, we try to be more explicit and actually
15010 show each register being stored into the stack frame, plus a (single)
15011 decrement of the stack pointer. We do it this way in order to be
15012 friendly to the stack unwinding code, which only wants to see a single
15013 stack decrement per instruction. The RTL we generate for the note looks
15014 something like this:
15017 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
15018 (set (mem:SI (reg:SI sp)) (reg:SI r4))
15019 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
15020 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
15024 FIXME:: In an ideal world the PRE_MODIFY would not exist and
15025 instead we'd have a parallel expression detailing all
15026 the stores to the various memory addresses so that debug
15027 information is more up-to-date. Remember however while writing
15028 this to take care of the constraints with the push instruction.
15030 Note also that this has to be taken care of for the VFP registers.
15032 For more see PR43399. */
15034 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
15035 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
15036 dwarf_par_index
= 1;
15038 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
15040 if (mask
& (1 << i
))
15042 reg
= gen_rtx_REG (SImode
, i
);
15044 XVECEXP (par
, 0, 0)
15045 = gen_rtx_SET (VOIDmode
,
15048 gen_rtx_PRE_MODIFY (Pmode
,
15051 (stack_pointer_rtx
,
15054 gen_rtx_UNSPEC (BLKmode
,
15055 gen_rtvec (1, reg
),
15056 UNSPEC_PUSH_MULT
));
15058 if (i
!= PC_REGNUM
)
15060 tmp
= gen_rtx_SET (VOIDmode
,
15061 gen_frame_mem (SImode
, stack_pointer_rtx
),
15063 RTX_FRAME_RELATED_P (tmp
) = 1;
15064 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
15072 for (j
= 1, i
++; j
< num_regs
; i
++)
15074 if (mask
& (1 << i
))
15076 reg
= gen_rtx_REG (SImode
, i
);
15078 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
15080 if (i
!= PC_REGNUM
)
15083 = gen_rtx_SET (VOIDmode
,
15086 plus_constant (stack_pointer_rtx
,
15089 RTX_FRAME_RELATED_P (tmp
) = 1;
15090 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
15097 par
= emit_insn (par
);
15099 tmp
= gen_rtx_SET (VOIDmode
,
15101 plus_constant (stack_pointer_rtx
, -4 * num_regs
));
15102 RTX_FRAME_RELATED_P (tmp
) = 1;
15103 XVECEXP (dwarf
, 0, 0) = tmp
;
15105 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
15110 /* Calculate the size of the return value that is passed in registers. */
15112 arm_size_return_regs (void)
15114 enum machine_mode mode
;
15116 if (crtl
->return_rtx
!= 0)
15117 mode
= GET_MODE (crtl
->return_rtx
);
15119 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
15121 return GET_MODE_SIZE (mode
);
15125 emit_sfm (int base_reg
, int count
)
15132 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
15133 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
15135 reg
= gen_rtx_REG (XFmode
, base_reg
++);
15137 XVECEXP (par
, 0, 0)
15138 = gen_rtx_SET (VOIDmode
,
15141 gen_rtx_PRE_MODIFY (Pmode
,
15144 (stack_pointer_rtx
,
15147 gen_rtx_UNSPEC (BLKmode
,
15148 gen_rtvec (1, reg
),
15149 UNSPEC_PUSH_MULT
));
15150 tmp
= gen_rtx_SET (VOIDmode
,
15151 gen_frame_mem (XFmode
, stack_pointer_rtx
), reg
);
15152 RTX_FRAME_RELATED_P (tmp
) = 1;
15153 XVECEXP (dwarf
, 0, 1) = tmp
;
15155 for (i
= 1; i
< count
; i
++)
15157 reg
= gen_rtx_REG (XFmode
, base_reg
++);
15158 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
15160 tmp
= gen_rtx_SET (VOIDmode
,
15161 gen_frame_mem (XFmode
,
15162 plus_constant (stack_pointer_rtx
,
15165 RTX_FRAME_RELATED_P (tmp
) = 1;
15166 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
15169 tmp
= gen_rtx_SET (VOIDmode
,
15171 plus_constant (stack_pointer_rtx
, -12 * count
));
15173 RTX_FRAME_RELATED_P (tmp
) = 1;
15174 XVECEXP (dwarf
, 0, 0) = tmp
;
15176 par
= emit_insn (par
);
15177 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
15183 /* Return true if the current function needs to save/restore LR. */
15186 thumb_force_lr_save (void)
15188 return !cfun
->machine
->lr_save_eliminated
15189 && (!leaf_function_p ()
15190 || thumb_far_jump_used_p ()
15191 || df_regs_ever_live_p (LR_REGNUM
));
15195 /* Return true if r3 is used by any of the tail call insns in the
15196 current function. */
15199 any_sibcall_uses_r3 (void)
15204 if (!crtl
->tail_call_emit
)
15206 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR
->preds
)
15207 if (e
->flags
& EDGE_SIBCALL
)
15209 rtx call
= BB_END (e
->src
);
15210 if (!CALL_P (call
))
15211 call
= prev_nonnote_nondebug_insn (call
);
15212 gcc_assert (CALL_P (call
) && SIBLING_CALL_P (call
));
15213 if (find_regno_fusage (call
, USE
, 3))
15220 /* Compute the distance from register FROM to register TO.
15221 These can be the arg pointer (26), the soft frame pointer (25),
15222 the stack pointer (13) or the hard frame pointer (11).
15223 In thumb mode r7 is used as the soft frame pointer, if needed.
15224 Typical stack layout looks like this:
15226 old stack pointer -> | |
15229 | | saved arguments for
15230 | | vararg functions
15233 hard FP & arg pointer -> | | \
15241 soft frame pointer -> | | /
15246 locals base pointer -> | | /
15251 current stack pointer -> | | /
15254 For a given function some or all of these stack components
15255 may not be needed, giving rise to the possibility of
15256 eliminating some of the registers.
15258 The values returned by this function must reflect the behavior
15259 of arm_expand_prologue() and arm_compute_save_reg_mask().
15261 The sign of the number returned reflects the direction of stack
15262 growth, so the values are positive for all eliminations except
15263 from the soft frame pointer to the hard frame pointer.
15265 SFP may point just inside the local variables block to ensure correct
15269 /* Calculate stack offsets. These are used to calculate register elimination
15270 offsets and in prologue/epilogue code. Also calculates which registers
15271 should be saved. */
15273 static arm_stack_offsets
*
15274 arm_get_frame_offsets (void)
15276 struct arm_stack_offsets
*offsets
;
15277 unsigned long func_type
;
15281 HOST_WIDE_INT frame_size
;
15284 offsets
= &cfun
->machine
->stack_offsets
;
15286 /* We need to know if we are a leaf function. Unfortunately, it
15287 is possible to be called after start_sequence has been called,
15288 which causes get_insns to return the insns for the sequence,
15289 not the function, which will cause leaf_function_p to return
15290 the incorrect result.
15292 to know about leaf functions once reload has completed, and the
15293 frame size cannot be changed after that time, so we can safely
15294 use the cached value. */
15296 if (reload_completed
)
15299 /* Initially this is the size of the local variables. It will translated
15300 into an offset once we have determined the size of preceding data. */
15301 frame_size
= ROUND_UP_WORD (get_frame_size ());
15303 leaf
= leaf_function_p ();
15305 /* Space for variadic functions. */
15306 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
15308 /* In Thumb mode this is incorrect, but never used. */
15309 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
15310 arm_compute_static_chain_stack_bytes();
15314 unsigned int regno
;
15316 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
15317 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
15318 saved
= core_saved
;
15320 /* We know that SP will be doubleword aligned on entry, and we must
15321 preserve that condition at any subroutine call. We also require the
15322 soft frame pointer to be doubleword aligned. */
15324 if (TARGET_REALLY_IWMMXT
)
15326 /* Check for the call-saved iWMMXt registers. */
15327 for (regno
= FIRST_IWMMXT_REGNUM
;
15328 regno
<= LAST_IWMMXT_REGNUM
;
15330 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
15334 func_type
= arm_current_func_type ();
15335 if (! IS_VOLATILE (func_type
))
15337 /* Space for saved FPA registers. */
15338 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
15339 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
15342 /* Space for saved VFP registers. */
15343 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15344 saved
+= arm_get_vfp_saved_size ();
15347 else /* TARGET_THUMB1 */
15349 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
15350 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
15351 saved
= core_saved
;
15352 if (TARGET_BACKTRACE
)
15356 /* Saved registers include the stack frame. */
15357 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
15358 arm_compute_static_chain_stack_bytes();
15359 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
15360 /* A leaf function does not need any stack alignment if it has nothing
15362 if (leaf
&& frame_size
== 0)
15364 offsets
->outgoing_args
= offsets
->soft_frame
;
15365 offsets
->locals_base
= offsets
->soft_frame
;
15369 /* Ensure SFP has the correct alignment. */
15370 if (ARM_DOUBLEWORD_ALIGN
15371 && (offsets
->soft_frame
& 7))
15373 offsets
->soft_frame
+= 4;
15374 /* Try to align stack by pushing an extra reg. Don't bother doing this
15375 when there is a stack frame as the alignment will be rolled into
15376 the normal stack adjustment. */
15377 if (frame_size
+ crtl
->outgoing_args_size
== 0)
15381 /* If it is safe to use r3, then do so. This sometimes
15382 generates better code on Thumb-2 by avoiding the need to
15383 use 32-bit push/pop instructions. */
15384 if (! any_sibcall_uses_r3 ()
15385 && arm_size_return_regs () <= 12
15386 && (offsets
->saved_regs_mask
& (1 << 3)) == 0)
15391 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
15393 if ((offsets
->saved_regs_mask
& (1 << i
)) == 0)
15402 offsets
->saved_regs
+= 4;
15403 offsets
->saved_regs_mask
|= (1 << reg
);
15408 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
15409 offsets
->outgoing_args
= (offsets
->locals_base
15410 + crtl
->outgoing_args_size
);
15412 if (ARM_DOUBLEWORD_ALIGN
)
15414 /* Ensure SP remains doubleword aligned. */
15415 if (offsets
->outgoing_args
& 7)
15416 offsets
->outgoing_args
+= 4;
15417 gcc_assert (!(offsets
->outgoing_args
& 7));
15424 /* Calculate the relative offsets for the different stack pointers. Positive
15425 offsets are in the direction of stack growth. */
15428 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
15430 arm_stack_offsets
*offsets
;
15432 offsets
= arm_get_frame_offsets ();
15434 /* OK, now we have enough information to compute the distances.
15435 There must be an entry in these switch tables for each pair
15436 of registers in ELIMINABLE_REGS, even if some of the entries
15437 seem to be redundant or useless. */
15440 case ARG_POINTER_REGNUM
:
15443 case THUMB_HARD_FRAME_POINTER_REGNUM
:
15446 case FRAME_POINTER_REGNUM
:
15447 /* This is the reverse of the soft frame pointer
15448 to hard frame pointer elimination below. */
15449 return offsets
->soft_frame
- offsets
->saved_args
;
15451 case ARM_HARD_FRAME_POINTER_REGNUM
:
15452 /* This is only non-zero in the case where the static chain register
15453 is stored above the frame. */
15454 return offsets
->frame
- offsets
->saved_args
- 4;
15456 case STACK_POINTER_REGNUM
:
15457 /* If nothing has been pushed on the stack at all
15458 then this will return -4. This *is* correct! */
15459 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
15462 gcc_unreachable ();
15464 gcc_unreachable ();
15466 case FRAME_POINTER_REGNUM
:
15469 case THUMB_HARD_FRAME_POINTER_REGNUM
:
15472 case ARM_HARD_FRAME_POINTER_REGNUM
:
15473 /* The hard frame pointer points to the top entry in the
15474 stack frame. The soft frame pointer to the bottom entry
15475 in the stack frame. If there is no stack frame at all,
15476 then they are identical. */
15478 return offsets
->frame
- offsets
->soft_frame
;
15480 case STACK_POINTER_REGNUM
:
15481 return offsets
->outgoing_args
- offsets
->soft_frame
;
15484 gcc_unreachable ();
15486 gcc_unreachable ();
15489 /* You cannot eliminate from the stack pointer.
15490 In theory you could eliminate from the hard frame
15491 pointer to the stack pointer, but this will never
15492 happen, since if a stack frame is not needed the
15493 hard frame pointer will never be used. */
15494 gcc_unreachable ();
15498 /* Given FROM and TO register numbers, say whether this elimination is
15499 allowed. Frame pointer elimination is automatically handled.
15501 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
15502 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
15503 pointer, we must eliminate FRAME_POINTER_REGNUM into
15504 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
15505 ARG_POINTER_REGNUM. */
15508 arm_can_eliminate (const int from
, const int to
)
15510 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
15511 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
15512 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
15513 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
15517 /* Emit RTL to save coprocessor registers on function entry. Returns the
15518 number of bytes pushed. */
15521 arm_save_coproc_regs(void)
15523 int saved_size
= 0;
15525 unsigned start_reg
;
15528 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
15529 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
15531 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
15532 insn
= gen_rtx_MEM (V2SImode
, insn
);
15533 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
15534 RTX_FRAME_RELATED_P (insn
) = 1;
15538 /* Save any floating point call-saved registers used by this
15540 if (TARGET_FPA_EMU2
)
15542 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
15543 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15545 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
15546 insn
= gen_rtx_MEM (XFmode
, insn
);
15547 insn
= emit_set_insn (insn
, gen_rtx_REG (XFmode
, reg
));
15548 RTX_FRAME_RELATED_P (insn
) = 1;
15554 start_reg
= LAST_FPA_REGNUM
;
15556 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
15558 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
15560 if (start_reg
- reg
== 3)
15562 insn
= emit_sfm (reg
, 4);
15563 RTX_FRAME_RELATED_P (insn
) = 1;
15565 start_reg
= reg
- 1;
15570 if (start_reg
!= reg
)
15572 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
15573 RTX_FRAME_RELATED_P (insn
) = 1;
15574 saved_size
+= (start_reg
- reg
) * 12;
15576 start_reg
= reg
- 1;
15580 if (start_reg
!= reg
)
15582 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
15583 saved_size
+= (start_reg
- reg
) * 12;
15584 RTX_FRAME_RELATED_P (insn
) = 1;
15587 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
15589 start_reg
= FIRST_VFP_REGNUM
;
15591 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
15593 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
15594 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
15596 if (start_reg
!= reg
)
15597 saved_size
+= vfp_emit_fstmd (start_reg
,
15598 (reg
- start_reg
) / 2);
15599 start_reg
= reg
+ 2;
15602 if (start_reg
!= reg
)
15603 saved_size
+= vfp_emit_fstmd (start_reg
,
15604 (reg
- start_reg
) / 2);
15610 /* Set the Thumb frame pointer from the stack pointer. */
15613 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
15615 HOST_WIDE_INT amount
;
15618 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
15620 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15621 stack_pointer_rtx
, GEN_INT (amount
)));
15624 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
15625 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
15626 expects the first two operands to be the same. */
15629 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15631 hard_frame_pointer_rtx
));
15635 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15636 hard_frame_pointer_rtx
,
15637 stack_pointer_rtx
));
15639 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
15640 plus_constant (stack_pointer_rtx
, amount
));
15641 RTX_FRAME_RELATED_P (dwarf
) = 1;
15642 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15645 RTX_FRAME_RELATED_P (insn
) = 1;
15648 /* Generate the prologue instructions for entry into an ARM or Thumb-2
15651 arm_expand_prologue (void)
15656 unsigned long live_regs_mask
;
15657 unsigned long func_type
;
15659 int saved_pretend_args
= 0;
15660 int saved_regs
= 0;
15661 unsigned HOST_WIDE_INT args_to_push
;
15662 arm_stack_offsets
*offsets
;
15664 func_type
= arm_current_func_type ();
15666 /* Naked functions don't have prologues. */
15667 if (IS_NAKED (func_type
))
15670 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
15671 args_to_push
= crtl
->args
.pretend_args_size
;
15673 /* Compute which register we will have to save onto the stack. */
15674 offsets
= arm_get_frame_offsets ();
15675 live_regs_mask
= offsets
->saved_regs_mask
;
15677 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
15679 if (IS_STACKALIGN (func_type
))
15684 /* Handle a word-aligned stack pointer. We generate the following:
15689 <save and restore r0 in normal prologue/epilogue>
15693 The unwinder doesn't need to know about the stack realignment.
15694 Just tell it we saved SP in r0. */
15695 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
15697 r0
= gen_rtx_REG (SImode
, 0);
15698 r1
= gen_rtx_REG (SImode
, 1);
15699 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
15700 compiler won't choke. */
15701 dwarf
= gen_rtx_UNSPEC (SImode
, rtvec_alloc (0), UNSPEC_STACK_ALIGN
);
15702 dwarf
= gen_rtx_SET (VOIDmode
, r0
, dwarf
);
15703 insn
= gen_movsi (r0
, stack_pointer_rtx
);
15704 RTX_FRAME_RELATED_P (insn
) = 1;
15705 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15707 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
15708 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
15711 /* For APCS frames, if IP register is clobbered
15712 when creating frame, save that register in a special
15714 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
15716 if (IS_INTERRUPT (func_type
))
15718 /* Interrupt functions must not corrupt any registers.
15719 Creating a frame pointer however, corrupts the IP
15720 register, so we must push it first. */
15721 insn
= emit_multi_reg_push (1 << IP_REGNUM
);
15723 /* Do not set RTX_FRAME_RELATED_P on this insn.
15724 The dwarf stack unwinding code only wants to see one
15725 stack decrement per function, and this is not it. If
15726 this instruction is labeled as being part of the frame
15727 creation sequence then dwarf2out_frame_debug_expr will
15728 die when it encounters the assignment of IP to FP
15729 later on, since the use of SP here establishes SP as
15730 the CFA register and not IP.
15732 Anyway this instruction is not really part of the stack
15733 frame creation although it is part of the prologue. */
15735 else if (IS_NESTED (func_type
))
15737 /* The Static chain register is the same as the IP register
15738 used as a scratch register during stack frame creation.
15739 To get around this need to find somewhere to store IP
15740 whilst the frame is being created. We try the following
15743 1. The last argument register.
15744 2. A slot on the stack above the frame. (This only
15745 works if the function is not a varargs function).
15746 3. Register r3, after pushing the argument registers
15749 Note - we only need to tell the dwarf2 backend about the SP
15750 adjustment in the second variant; the static chain register
15751 doesn't need to be unwound, as it doesn't contain a value
15752 inherited from the caller. */
15754 if (df_regs_ever_live_p (3) == false)
15755 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
15756 else if (args_to_push
== 0)
15760 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
15763 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
15764 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
15767 /* Just tell the dwarf backend that we adjusted SP. */
15768 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
15769 plus_constant (stack_pointer_rtx
,
15771 RTX_FRAME_RELATED_P (insn
) = 1;
15772 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
15776 /* Store the args on the stack. */
15777 if (cfun
->machine
->uses_anonymous_args
)
15778 insn
= emit_multi_reg_push
15779 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
15782 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15783 GEN_INT (- args_to_push
)));
15785 RTX_FRAME_RELATED_P (insn
) = 1;
15787 saved_pretend_args
= 1;
15788 fp_offset
= args_to_push
;
15791 /* Now reuse r3 to preserve IP. */
15792 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
15796 insn
= emit_set_insn (ip_rtx
,
15797 plus_constant (stack_pointer_rtx
, fp_offset
));
15798 RTX_FRAME_RELATED_P (insn
) = 1;
15803 /* Push the argument registers, or reserve space for them. */
15804 if (cfun
->machine
->uses_anonymous_args
)
15805 insn
= emit_multi_reg_push
15806 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
15809 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15810 GEN_INT (- args_to_push
)));
15811 RTX_FRAME_RELATED_P (insn
) = 1;
15814 /* If this is an interrupt service routine, and the link register
15815 is going to be pushed, and we're not generating extra
15816 push of IP (needed when frame is needed and frame layout if apcs),
15817 subtracting four from LR now will mean that the function return
15818 can be done with a single instruction. */
15819 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
15820 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
15821 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
15824 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
15826 emit_set_insn (lr
, plus_constant (lr
, -4));
15829 if (live_regs_mask
)
15831 saved_regs
+= bit_count (live_regs_mask
) * 4;
15832 if (optimize_size
&& !frame_pointer_needed
15833 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
15835 /* If no coprocessor registers are being pushed and we don't have
15836 to worry about a frame pointer then push extra registers to
15837 create the stack frame. This is done is a way that does not
15838 alter the frame layout, so is independent of the epilogue. */
15842 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
15844 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
15845 if (frame
&& n
* 4 >= frame
)
15848 live_regs_mask
|= (1 << n
) - 1;
15849 saved_regs
+= frame
;
15852 insn
= emit_multi_reg_push (live_regs_mask
);
15853 RTX_FRAME_RELATED_P (insn
) = 1;
15856 if (! IS_VOLATILE (func_type
))
15857 saved_regs
+= arm_save_coproc_regs ();
15859 if (frame_pointer_needed
&& TARGET_ARM
)
15861 /* Create the new frame pointer. */
15862 if (TARGET_APCS_FRAME
)
15864 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
15865 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
15866 RTX_FRAME_RELATED_P (insn
) = 1;
15868 if (IS_NESTED (func_type
))
15870 /* Recover the static chain register. */
15871 if (!df_regs_ever_live_p (3)
15872 || saved_pretend_args
)
15873 insn
= gen_rtx_REG (SImode
, 3);
15874 else /* if (crtl->args.pretend_args_size == 0) */
15876 insn
= plus_constant (hard_frame_pointer_rtx
, 4);
15877 insn
= gen_frame_mem (SImode
, insn
);
15879 emit_set_insn (ip_rtx
, insn
);
15880 /* Add a USE to stop propagate_one_insn() from barfing. */
15881 emit_insn (gen_prologue_use (ip_rtx
));
15886 insn
= GEN_INT (saved_regs
- 4);
15887 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
15888 stack_pointer_rtx
, insn
));
15889 RTX_FRAME_RELATED_P (insn
) = 1;
15893 if (flag_stack_usage
)
15894 current_function_static_stack_size
15895 = offsets
->outgoing_args
- offsets
->saved_args
;
15897 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
15899 /* This add can produce multiple insns for a large constant, so we
15900 need to get tricky. */
15901 rtx last
= get_last_insn ();
15903 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
15904 - offsets
->outgoing_args
);
15906 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
15910 last
= last
? NEXT_INSN (last
) : get_insns ();
15911 RTX_FRAME_RELATED_P (last
) = 1;
15913 while (last
!= insn
);
15915 /* If the frame pointer is needed, emit a special barrier that
15916 will prevent the scheduler from moving stores to the frame
15917 before the stack adjustment. */
15918 if (frame_pointer_needed
)
15919 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
15920 hard_frame_pointer_rtx
));
15924 if (frame_pointer_needed
&& TARGET_THUMB2
)
15925 thumb_set_frame_pointer (offsets
);
15927 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
15929 unsigned long mask
;
15931 mask
= live_regs_mask
;
15932 mask
&= THUMB2_WORK_REGS
;
15933 if (!IS_NESTED (func_type
))
15934 mask
|= (1 << IP_REGNUM
);
15935 arm_load_pic_register (mask
);
15938 /* If we are profiling, make sure no instructions are scheduled before
15939 the call to mcount. Similarly if the user has requested no
15940 scheduling in the prolog. Similarly if we want non-call exceptions
15941 using the EABI unwinder, to prevent faulting instructions from being
15942 swapped with a stack adjustment. */
15943 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
15944 || (arm_except_unwind_info (&global_options
) == UI_TARGET
15945 && cfun
->can_throw_non_call_exceptions
))
15946 emit_insn (gen_blockage ());
15948 /* If the link register is being kept alive, with the return address in it,
15949 then make sure that it does not get reused by the ce2 pass. */
15950 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
15951 cfun
->machine
->lr_save_eliminated
= 1;
15954 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15956 arm_print_condition (FILE *stream
)
15958 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
15960 /* Branch conversion is not implemented for Thumb-2. */
15963 output_operand_lossage ("predicated Thumb instruction");
15966 if (current_insn_predicate
!= NULL
)
15968 output_operand_lossage
15969 ("predicated instruction in conditional sequence");
15973 fputs (arm_condition_codes
[arm_current_cc
], stream
);
15975 else if (current_insn_predicate
)
15977 enum arm_cond_code code
;
15981 output_operand_lossage ("predicated Thumb instruction");
15985 code
= get_arm_condition_code (current_insn_predicate
);
15986 fputs (arm_condition_codes
[code
], stream
);
15991 /* If CODE is 'd', then the X is a condition operand and the instruction
15992 should only be executed if the condition is true.
15993 if CODE is 'D', then the X is a condition operand and the instruction
15994 should only be executed if the condition is false: however, if the mode
15995 of the comparison is CCFPEmode, then always execute the instruction -- we
15996 do this because in these circumstances !GE does not necessarily imply LT;
15997 in these cases the instruction pattern will take care to make sure that
15998 an instruction containing %d will follow, thereby undoing the effects of
15999 doing this instruction unconditionally.
16000 If CODE is 'N' then X is a floating point operand that must be negated
16002 If CODE is 'B' then output a bitwise inverted value of X (a const int).
16003 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
16005 arm_print_operand (FILE *stream
, rtx x
, int code
)
16010 fputs (ASM_COMMENT_START
, stream
);
16014 fputs (user_label_prefix
, stream
);
16018 fputs (REGISTER_PREFIX
, stream
);
16022 arm_print_condition (stream
);
16026 /* Nothing in unified syntax, otherwise the current condition code. */
16027 if (!TARGET_UNIFIED_ASM
)
16028 arm_print_condition (stream
);
16032 /* The current condition code in unified syntax, otherwise nothing. */
16033 if (TARGET_UNIFIED_ASM
)
16034 arm_print_condition (stream
);
16038 /* The current condition code for a condition code setting instruction.
16039 Preceded by 's' in unified syntax, otherwise followed by 's'. */
16040 if (TARGET_UNIFIED_ASM
)
16042 fputc('s', stream
);
16043 arm_print_condition (stream
);
16047 arm_print_condition (stream
);
16048 fputc('s', stream
);
16053 /* If the instruction is conditionally executed then print
16054 the current condition code, otherwise print 's'. */
16055 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
16056 if (current_insn_predicate
)
16057 arm_print_condition (stream
);
16059 fputc('s', stream
);
16062 /* %# is a "break" sequence. It doesn't output anything, but is used to
16063 separate e.g. operand numbers from following text, if that text consists
16064 of further digits which we don't want to be part of the operand
16072 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
16073 r
= real_value_negate (&r
);
16074 fprintf (stream
, "%s", fp_const_from_val (&r
));
16078 /* An integer or symbol address without a preceding # sign. */
16080 switch (GET_CODE (x
))
16083 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
16087 output_addr_const (stream
, x
);
16091 gcc_unreachable ();
16096 if (GET_CODE (x
) == CONST_INT
)
16099 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
16100 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
16104 putc ('~', stream
);
16105 output_addr_const (stream
, x
);
16110 /* The low 16 bits of an immediate constant. */
16111 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
16115 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
16118 /* Truncate Cirrus shift counts. */
16120 if (GET_CODE (x
) == CONST_INT
)
16122 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 0x3f);
16125 arm_print_operand (stream
, x
, 0);
16129 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
16137 if (!shift_operator (x
, SImode
))
16139 output_operand_lossage ("invalid shift operand");
16143 shift
= shift_op (x
, &val
);
16147 fprintf (stream
, ", %s ", shift
);
16149 arm_print_operand (stream
, XEXP (x
, 1), 0);
16151 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
16156 /* An explanation of the 'Q', 'R' and 'H' register operands:
16158 In a pair of registers containing a DI or DF value the 'Q'
16159 operand returns the register number of the register containing
16160 the least significant part of the value. The 'R' operand returns
16161 the register number of the register containing the most
16162 significant part of the value.
16164 The 'H' operand returns the higher of the two register numbers.
16165 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
16166 same as the 'Q' operand, since the most significant part of the
16167 value is held in the lower number register. The reverse is true
16168 on systems where WORDS_BIG_ENDIAN is false.
16170 The purpose of these operands is to distinguish between cases
16171 where the endian-ness of the values is important (for example
16172 when they are added together), and cases where the endian-ness
16173 is irrelevant, but the order of register operations is important.
16174 For example when loading a value from memory into a register
16175 pair, the endian-ness does not matter. Provided that the value
16176 from the lower memory address is put into the lower numbered
16177 register, and the value from the higher address is put into the
16178 higher numbered register, the load will work regardless of whether
16179 the value being loaded is big-wordian or little-wordian. The
16180 order of the two register loads can matter however, if the address
16181 of the memory location is actually held in one of the registers
16182 being overwritten by the load.
16184 The 'Q' and 'R' constraints are also available for 64-bit
16187 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
16189 rtx part
= gen_lowpart (SImode
, x
);
16190 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
16194 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16196 output_operand_lossage ("invalid operand for code '%c'", code
);
16200 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
16204 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
16206 enum machine_mode mode
= GET_MODE (x
);
16209 if (mode
== VOIDmode
)
16211 part
= gen_highpart_mode (SImode
, mode
, x
);
16212 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, INTVAL (part
));
16216 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16218 output_operand_lossage ("invalid operand for code '%c'", code
);
16222 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
16226 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16228 output_operand_lossage ("invalid operand for code '%c'", code
);
16232 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
16236 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16238 output_operand_lossage ("invalid operand for code '%c'", code
);
16242 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
16246 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
16248 output_operand_lossage ("invalid operand for code '%c'", code
);
16252 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
16256 asm_fprintf (stream
, "%r",
16257 GET_CODE (XEXP (x
, 0)) == REG
16258 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
16262 asm_fprintf (stream
, "{%r-%r}",
16264 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
16267 /* Like 'M', but writing doubleword vector registers, for use by Neon
16271 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
16272 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
16274 asm_fprintf (stream
, "{d%d}", regno
);
16276 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
16281 /* CONST_TRUE_RTX means always -- that's the default. */
16282 if (x
== const_true_rtx
)
16285 if (!COMPARISON_P (x
))
16287 output_operand_lossage ("invalid operand for code '%c'", code
);
16291 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
16296 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
16297 want to do that. */
16298 if (x
== const_true_rtx
)
16300 output_operand_lossage ("instruction never executed");
16303 if (!COMPARISON_P (x
))
16305 output_operand_lossage ("invalid operand for code '%c'", code
);
16309 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
16310 (get_arm_condition_code (x
))],
16314 /* Cirrus registers can be accessed in a variety of ways:
16315 single floating point (f)
16316 double floating point (d)
16318 64bit integer (dx). */
16319 case 'W': /* Cirrus register in F mode. */
16320 case 'X': /* Cirrus register in D mode. */
16321 case 'Y': /* Cirrus register in FX mode. */
16322 case 'Z': /* Cirrus register in DX mode. */
16323 gcc_assert (GET_CODE (x
) == REG
16324 && REGNO_REG_CLASS (REGNO (x
)) == CIRRUS_REGS
);
16326 fprintf (stream
, "mv%s%s",
16328 : code
== 'X' ? "d"
16329 : code
== 'Y' ? "fx" : "dx", reg_names
[REGNO (x
)] + 2);
16333 /* Print cirrus register in the mode specified by the register's mode. */
16336 int mode
= GET_MODE (x
);
16338 if (GET_CODE (x
) != REG
|| REGNO_REG_CLASS (REGNO (x
)) != CIRRUS_REGS
)
16340 output_operand_lossage ("invalid operand for code '%c'", code
);
16344 fprintf (stream
, "mv%s%s",
16345 mode
== DFmode
? "d"
16346 : mode
== SImode
? "fx"
16347 : mode
== DImode
? "dx"
16348 : "f", reg_names
[REGNO (x
)] + 2);
16354 if (GET_CODE (x
) != REG
16355 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
16356 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
16357 /* Bad value for wCG register number. */
16359 output_operand_lossage ("invalid operand for code '%c'", code
);
16364 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
16367 /* Print an iWMMXt control register name. */
16369 if (GET_CODE (x
) != CONST_INT
16371 || INTVAL (x
) >= 16)
16372 /* Bad value for wC register number. */
16374 output_operand_lossage ("invalid operand for code '%c'", code
);
16380 static const char * wc_reg_names
[16] =
16382 "wCID", "wCon", "wCSSF", "wCASF",
16383 "wC4", "wC5", "wC6", "wC7",
16384 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
16385 "wC12", "wC13", "wC14", "wC15"
16388 fprintf (stream
, wc_reg_names
[INTVAL (x
)]);
16392 /* Print the high single-precision register of a VFP double-precision
16396 int mode
= GET_MODE (x
);
16399 if (GET_MODE_SIZE (mode
) != 8 || GET_CODE (x
) != REG
)
16401 output_operand_lossage ("invalid operand for code '%c'", code
);
16406 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
16408 output_operand_lossage ("invalid operand for code '%c'", code
);
16412 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
16416 /* Print a VFP/Neon double precision or quad precision register name. */
16420 int mode
= GET_MODE (x
);
16421 int is_quad
= (code
== 'q');
16424 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
16426 output_operand_lossage ("invalid operand for code '%c'", code
);
16430 if (GET_CODE (x
) != REG
16431 || !IS_VFP_REGNUM (REGNO (x
)))
16433 output_operand_lossage ("invalid operand for code '%c'", code
);
16438 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
16439 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
16441 output_operand_lossage ("invalid operand for code '%c'", code
);
16445 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
16446 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
16450 /* These two codes print the low/high doubleword register of a Neon quad
16451 register, respectively. For pair-structure types, can also print
16452 low/high quadword registers. */
16456 int mode
= GET_MODE (x
);
16459 if ((GET_MODE_SIZE (mode
) != 16
16460 && GET_MODE_SIZE (mode
) != 32) || GET_CODE (x
) != REG
)
16462 output_operand_lossage ("invalid operand for code '%c'", code
);
16467 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
16469 output_operand_lossage ("invalid operand for code '%c'", code
);
16473 if (GET_MODE_SIZE (mode
) == 16)
16474 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
16475 + (code
== 'f' ? 1 : 0));
16477 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
16478 + (code
== 'f' ? 1 : 0));
16482 /* Print a VFPv3 floating-point constant, represented as an integer
16486 int index
= vfp3_const_double_index (x
);
16487 gcc_assert (index
!= -1);
16488 fprintf (stream
, "%d", index
);
16492 /* Print bits representing opcode features for Neon.
16494 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
16495 and polynomials as unsigned.
16497 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
16499 Bit 2 is 1 for rounding functions, 0 otherwise. */
16501 /* Identify the type as 's', 'u', 'p' or 'f'. */
16504 HOST_WIDE_INT bits
= INTVAL (x
);
16505 fputc ("uspf"[bits
& 3], stream
);
16509 /* Likewise, but signed and unsigned integers are both 'i'. */
16512 HOST_WIDE_INT bits
= INTVAL (x
);
16513 fputc ("iipf"[bits
& 3], stream
);
16517 /* As for 'T', but emit 'u' instead of 'p'. */
16520 HOST_WIDE_INT bits
= INTVAL (x
);
16521 fputc ("usuf"[bits
& 3], stream
);
16525 /* Bit 2: rounding (vs none). */
16528 HOST_WIDE_INT bits
= INTVAL (x
);
16529 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
16533 /* Memory operand for vld1/vst1 instruction. */
16537 bool postinc
= FALSE
;
16538 unsigned align
, modesize
, align_bits
;
16540 gcc_assert (GET_CODE (x
) == MEM
);
16541 addr
= XEXP (x
, 0);
16542 if (GET_CODE (addr
) == POST_INC
)
16545 addr
= XEXP (addr
, 0);
16547 asm_fprintf (stream
, "[%r", REGNO (addr
));
16549 /* We know the alignment of this access, so we can emit a hint in the
16550 instruction (for some alignments) as an aid to the memory subsystem
16552 align
= MEM_ALIGN (x
) >> 3;
16553 modesize
= GET_MODE_SIZE (GET_MODE (x
));
16555 /* Only certain alignment specifiers are supported by the hardware. */
16556 if (modesize
== 16 && (align
% 32) == 0)
16558 else if ((modesize
== 8 || modesize
== 16) && (align
% 16) == 0)
16560 else if ((align
% 8) == 0)
16565 if (align_bits
!= 0)
16566 asm_fprintf (stream
, ":%d", align_bits
);
16568 asm_fprintf (stream
, "]");
16571 fputs("!", stream
);
16579 gcc_assert (GET_CODE (x
) == MEM
);
16580 addr
= XEXP (x
, 0);
16581 gcc_assert (GET_CODE (addr
) == REG
);
16582 asm_fprintf (stream
, "[%r]", REGNO (addr
));
16586 /* Translate an S register number into a D register number and element index. */
16589 int mode
= GET_MODE (x
);
16592 if (GET_MODE_SIZE (mode
) != 4 || GET_CODE (x
) != REG
)
16594 output_operand_lossage ("invalid operand for code '%c'", code
);
16599 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
16601 output_operand_lossage ("invalid operand for code '%c'", code
);
16605 regno
= regno
- FIRST_VFP_REGNUM
;
16606 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
16610 /* Register specifier for vld1.16/vst1.16. Translate the S register
16611 number into a D register number and element index. */
16614 int mode
= GET_MODE (x
);
16617 if (GET_MODE_SIZE (mode
) != 2 || GET_CODE (x
) != REG
)
16619 output_operand_lossage ("invalid operand for code '%c'", code
);
16624 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
16626 output_operand_lossage ("invalid operand for code '%c'", code
);
16630 regno
= regno
- FIRST_VFP_REGNUM
;
16631 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
16638 output_operand_lossage ("missing operand");
16642 switch (GET_CODE (x
))
16645 asm_fprintf (stream
, "%r", REGNO (x
));
16649 output_memory_reference_mode
= GET_MODE (x
);
16650 output_address (XEXP (x
, 0));
16657 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
16658 sizeof (fpstr
), 0, 1);
16659 fprintf (stream
, "#%s", fpstr
);
16662 fprintf (stream
, "#%s", fp_immediate_constant (x
));
16666 gcc_assert (GET_CODE (x
) != NEG
);
16667 fputc ('#', stream
);
16668 if (GET_CODE (x
) == HIGH
)
16670 fputs (":lower16:", stream
);
16674 output_addr_const (stream
, x
);
16680 /* Target hook for printing a memory address. */
16682 arm_print_operand_address (FILE *stream
, rtx x
)
16686 int is_minus
= GET_CODE (x
) == MINUS
;
16688 if (GET_CODE (x
) == REG
)
16689 asm_fprintf (stream
, "[%r, #0]", REGNO (x
));
16690 else if (GET_CODE (x
) == PLUS
|| is_minus
)
16692 rtx base
= XEXP (x
, 0);
16693 rtx index
= XEXP (x
, 1);
16694 HOST_WIDE_INT offset
= 0;
16695 if (GET_CODE (base
) != REG
16696 || (GET_CODE (index
) == REG
&& REGNO (index
) == SP_REGNUM
))
16698 /* Ensure that BASE is a register. */
16699 /* (one of them must be). */
16700 /* Also ensure the SP is not used as in index register. */
16705 switch (GET_CODE (index
))
16708 offset
= INTVAL (index
);
16711 asm_fprintf (stream
, "[%r, #%wd]",
16712 REGNO (base
), offset
);
16716 asm_fprintf (stream
, "[%r, %s%r]",
16717 REGNO (base
), is_minus
? "-" : "",
16727 asm_fprintf (stream
, "[%r, %s%r",
16728 REGNO (base
), is_minus
? "-" : "",
16729 REGNO (XEXP (index
, 0)));
16730 arm_print_operand (stream
, index
, 'S');
16731 fputs ("]", stream
);
16736 gcc_unreachable ();
16739 else if (GET_CODE (x
) == PRE_INC
|| GET_CODE (x
) == POST_INC
16740 || GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == POST_DEC
)
16742 extern enum machine_mode output_memory_reference_mode
;
16744 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
16746 if (GET_CODE (x
) == PRE_DEC
|| GET_CODE (x
) == PRE_INC
)
16747 asm_fprintf (stream
, "[%r, #%s%d]!",
16748 REGNO (XEXP (x
, 0)),
16749 GET_CODE (x
) == PRE_DEC
? "-" : "",
16750 GET_MODE_SIZE (output_memory_reference_mode
));
16752 asm_fprintf (stream
, "[%r], #%s%d",
16753 REGNO (XEXP (x
, 0)),
16754 GET_CODE (x
) == POST_DEC
? "-" : "",
16755 GET_MODE_SIZE (output_memory_reference_mode
));
16757 else if (GET_CODE (x
) == PRE_MODIFY
)
16759 asm_fprintf (stream
, "[%r, ", REGNO (XEXP (x
, 0)));
16760 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
16761 asm_fprintf (stream
, "#%wd]!",
16762 INTVAL (XEXP (XEXP (x
, 1), 1)));
16764 asm_fprintf (stream
, "%r]!",
16765 REGNO (XEXP (XEXP (x
, 1), 1)));
16767 else if (GET_CODE (x
) == POST_MODIFY
)
16769 asm_fprintf (stream
, "[%r], ", REGNO (XEXP (x
, 0)));
16770 if (GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
16771 asm_fprintf (stream
, "#%wd",
16772 INTVAL (XEXP (XEXP (x
, 1), 1)));
16774 asm_fprintf (stream
, "%r",
16775 REGNO (XEXP (XEXP (x
, 1), 1)));
16777 else output_addr_const (stream
, x
);
16781 if (GET_CODE (x
) == REG
)
16782 asm_fprintf (stream
, "[%r]", REGNO (x
));
16783 else if (GET_CODE (x
) == POST_INC
)
16784 asm_fprintf (stream
, "%r!", REGNO (XEXP (x
, 0)));
16785 else if (GET_CODE (x
) == PLUS
)
16787 gcc_assert (GET_CODE (XEXP (x
, 0)) == REG
);
16788 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
16789 asm_fprintf (stream
, "[%r, #%wd]",
16790 REGNO (XEXP (x
, 0)),
16791 INTVAL (XEXP (x
, 1)));
16793 asm_fprintf (stream
, "[%r, %r]",
16794 REGNO (XEXP (x
, 0)),
16795 REGNO (XEXP (x
, 1)));
16798 output_addr_const (stream
, x
);
16802 /* Target hook for indicating whether a punctuation character for
16803 TARGET_PRINT_OPERAND is valid. */
16805 arm_print_operand_punct_valid_p (unsigned char code
)
16807 return (code
== '@' || code
== '|' || code
== '.'
16808 || code
== '(' || code
== ')' || code
== '#'
16809 || (TARGET_32BIT
&& (code
== '?'))
16810 || (TARGET_THUMB2
&& (code
== '!'))
16811 || (TARGET_THUMB
&& (code
== '_')));
16814 /* Target hook for assembling integer objects. The ARM version needs to
16815 handle word-sized values specially. */
16817 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
16819 enum machine_mode mode
;
16821 if (size
== UNITS_PER_WORD
&& aligned_p
)
16823 fputs ("\t.word\t", asm_out_file
);
16824 output_addr_const (asm_out_file
, x
);
16826 /* Mark symbols as position independent. We only do this in the
16827 .text segment, not in the .data segment. */
16828 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
16829 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
16831 /* See legitimize_pic_address for an explanation of the
16832 TARGET_VXWORKS_RTP check. */
16833 if (TARGET_VXWORKS_RTP
16834 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
16835 fputs ("(GOT)", asm_out_file
);
16837 fputs ("(GOTOFF)", asm_out_file
);
16839 fputc ('\n', asm_out_file
);
16843 mode
= GET_MODE (x
);
16845 if (arm_vector_mode_supported_p (mode
))
16849 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
16851 units
= CONST_VECTOR_NUNITS (x
);
16852 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
16854 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
16855 for (i
= 0; i
< units
; i
++)
16857 rtx elt
= CONST_VECTOR_ELT (x
, i
);
16859 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
16862 for (i
= 0; i
< units
; i
++)
16864 rtx elt
= CONST_VECTOR_ELT (x
, i
);
16865 REAL_VALUE_TYPE rval
;
16867 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
16870 (rval
, GET_MODE_INNER (mode
),
16871 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
16877 return default_assemble_integer (x
, size
, aligned_p
);
16881 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
16885 if (!TARGET_AAPCS_BASED
)
16888 default_named_section_asm_out_constructor
16889 : default_named_section_asm_out_destructor
) (symbol
, priority
);
16893 /* Put these in the .init_array section, using a special relocation. */
16894 if (priority
!= DEFAULT_INIT_PRIORITY
)
16897 sprintf (buf
, "%s.%.5u",
16898 is_ctor
? ".init_array" : ".fini_array",
16900 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
16907 switch_to_section (s
);
16908 assemble_align (POINTER_SIZE
);
16909 fputs ("\t.word\t", asm_out_file
);
16910 output_addr_const (asm_out_file
, symbol
);
16911 fputs ("(target1)\n", asm_out_file
);
16914 /* Add a function to the list of static constructors. */
16917 arm_elf_asm_constructor (rtx symbol
, int priority
)
16919 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
16922 /* Add a function to the list of static destructors. */
16925 arm_elf_asm_destructor (rtx symbol
, int priority
)
16927 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
16930 /* A finite state machine takes care of noticing whether or not instructions
16931 can be conditionally executed, and thus decrease execution time and code
16932 size by deleting branch instructions. The fsm is controlled by
16933 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
16935 /* The state of the fsm controlling condition codes are:
16936 0: normal, do nothing special
16937 1: make ASM_OUTPUT_OPCODE not output this instruction
16938 2: make ASM_OUTPUT_OPCODE not output this instruction
16939 3: make instructions conditional
16940 4: make instructions conditional
16942 State transitions (state->state by whom under condition):
16943 0 -> 1 final_prescan_insn if the `target' is a label
16944 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
16945 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
16946 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
16947 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
16948 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
16949 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
16950 (the target insn is arm_target_insn).
16952 If the jump clobbers the conditions then we use states 2 and 4.
16954 A similar thing can be done with conditional return insns.
16956 XXX In case the `target' is an unconditional branch, this conditionalising
16957 of the instructions always reduces code size, but not always execution
16958 time. But then, I want to reduce the code size to somewhere near what
16959 /bin/cc produces. */
16961 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
16962 instructions. When a COND_EXEC instruction is seen the subsequent
16963 instructions are scanned so that multiple conditional instructions can be
16964 combined into a single IT block. arm_condexec_count and arm_condexec_mask
16965 specify the length and true/false mask for the IT block. These will be
16966 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
16968 /* Returns the index of the ARM condition code string in
16969 `arm_condition_codes'. COMPARISON should be an rtx like
16970 `(eq (...) (...))'. */
16971 static enum arm_cond_code
16972 get_arm_condition_code (rtx comparison
)
16974 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
16975 enum arm_cond_code code
;
16976 enum rtx_code comp_code
= GET_CODE (comparison
);
16978 if (GET_MODE_CLASS (mode
) != MODE_CC
)
16979 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
16980 XEXP (comparison
, 1));
16984 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
16985 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
16986 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
16987 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
16988 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
16989 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
16990 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
16991 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
16992 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
16993 case CC_DLTUmode
: code
= ARM_CC
;
16996 gcc_assert (comp_code
== EQ
|| comp_code
== NE
);
16998 if (comp_code
== EQ
)
16999 return ARM_INVERSE_CONDITION_CODE (code
);
17005 case NE
: return ARM_NE
;
17006 case EQ
: return ARM_EQ
;
17007 case GE
: return ARM_PL
;
17008 case LT
: return ARM_MI
;
17009 default: gcc_unreachable ();
17015 case NE
: return ARM_NE
;
17016 case EQ
: return ARM_EQ
;
17017 default: gcc_unreachable ();
17023 case NE
: return ARM_MI
;
17024 case EQ
: return ARM_PL
;
17025 default: gcc_unreachable ();
17030 /* These encodings assume that AC=1 in the FPA system control
17031 byte. This allows us to handle all cases except UNEQ and
17035 case GE
: return ARM_GE
;
17036 case GT
: return ARM_GT
;
17037 case LE
: return ARM_LS
;
17038 case LT
: return ARM_MI
;
17039 case NE
: return ARM_NE
;
17040 case EQ
: return ARM_EQ
;
17041 case ORDERED
: return ARM_VC
;
17042 case UNORDERED
: return ARM_VS
;
17043 case UNLT
: return ARM_LT
;
17044 case UNLE
: return ARM_LE
;
17045 case UNGT
: return ARM_HI
;
17046 case UNGE
: return ARM_PL
;
17047 /* UNEQ and LTGT do not have a representation. */
17048 case UNEQ
: /* Fall through. */
17049 case LTGT
: /* Fall through. */
17050 default: gcc_unreachable ();
17056 case NE
: return ARM_NE
;
17057 case EQ
: return ARM_EQ
;
17058 case GE
: return ARM_LE
;
17059 case GT
: return ARM_LT
;
17060 case LE
: return ARM_GE
;
17061 case LT
: return ARM_GT
;
17062 case GEU
: return ARM_LS
;
17063 case GTU
: return ARM_CC
;
17064 case LEU
: return ARM_CS
;
17065 case LTU
: return ARM_HI
;
17066 default: gcc_unreachable ();
17072 case LTU
: return ARM_CS
;
17073 case GEU
: return ARM_CC
;
17074 default: gcc_unreachable ();
17080 case NE
: return ARM_NE
;
17081 case EQ
: return ARM_EQ
;
17082 case GEU
: return ARM_CS
;
17083 case GTU
: return ARM_HI
;
17084 case LEU
: return ARM_LS
;
17085 case LTU
: return ARM_CC
;
17086 default: gcc_unreachable ();
17092 case GE
: return ARM_GE
;
17093 case LT
: return ARM_LT
;
17094 case GEU
: return ARM_CS
;
17095 case LTU
: return ARM_CC
;
17096 default: gcc_unreachable ();
17102 case NE
: return ARM_NE
;
17103 case EQ
: return ARM_EQ
;
17104 case GE
: return ARM_GE
;
17105 case GT
: return ARM_GT
;
17106 case LE
: return ARM_LE
;
17107 case LT
: return ARM_LT
;
17108 case GEU
: return ARM_CS
;
17109 case GTU
: return ARM_HI
;
17110 case LEU
: return ARM_LS
;
17111 case LTU
: return ARM_CC
;
17112 default: gcc_unreachable ();
17115 default: gcc_unreachable ();
17119 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
17122 thumb2_final_prescan_insn (rtx insn
)
17124 rtx first_insn
= insn
;
17125 rtx body
= PATTERN (insn
);
17127 enum arm_cond_code code
;
17131 /* Remove the previous insn from the count of insns to be output. */
17132 if (arm_condexec_count
)
17133 arm_condexec_count
--;
17135 /* Nothing to do if we are already inside a conditional block. */
17136 if (arm_condexec_count
)
17139 if (GET_CODE (body
) != COND_EXEC
)
17142 /* Conditional jumps are implemented directly. */
17143 if (GET_CODE (insn
) == JUMP_INSN
)
17146 predicate
= COND_EXEC_TEST (body
);
17147 arm_current_cc
= get_arm_condition_code (predicate
);
17149 n
= get_attr_ce_count (insn
);
17150 arm_condexec_count
= 1;
17151 arm_condexec_mask
= (1 << n
) - 1;
17152 arm_condexec_masklen
= n
;
17153 /* See if subsequent instructions can be combined into the same block. */
17156 insn
= next_nonnote_insn (insn
);
17158 /* Jumping into the middle of an IT block is illegal, so a label or
17159 barrier terminates the block. */
17160 if (GET_CODE (insn
) != INSN
&& GET_CODE(insn
) != JUMP_INSN
)
17163 body
= PATTERN (insn
);
17164 /* USE and CLOBBER aren't really insns, so just skip them. */
17165 if (GET_CODE (body
) == USE
17166 || GET_CODE (body
) == CLOBBER
)
17169 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
17170 if (GET_CODE (body
) != COND_EXEC
)
17172 /* Allow up to 4 conditionally executed instructions in a block. */
17173 n
= get_attr_ce_count (insn
);
17174 if (arm_condexec_masklen
+ n
> 4)
17177 predicate
= COND_EXEC_TEST (body
);
17178 code
= get_arm_condition_code (predicate
);
17179 mask
= (1 << n
) - 1;
17180 if (arm_current_cc
== code
)
17181 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
17182 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
17185 arm_condexec_count
++;
17186 arm_condexec_masklen
+= n
;
17188 /* A jump must be the last instruction in a conditional block. */
17189 if (GET_CODE(insn
) == JUMP_INSN
)
17192 /* Restore recog_data (getting the attributes of other insns can
17193 destroy this array, but final.c assumes that it remains intact
17194 across this call). */
17195 extract_constrain_insn_cached (first_insn
);
17199 arm_final_prescan_insn (rtx insn
)
17201 /* BODY will hold the body of INSN. */
17202 rtx body
= PATTERN (insn
);
17204 /* This will be 1 if trying to repeat the trick, and things need to be
17205 reversed if it appears to fail. */
17208 /* If we start with a return insn, we only succeed if we find another one. */
17209 int seeking_return
= 0;
17211 /* START_INSN will hold the insn from where we start looking. This is the
17212 first insn after the following code_label if REVERSE is true. */
17213 rtx start_insn
= insn
;
17215 /* If in state 4, check if the target branch is reached, in order to
17216 change back to state 0. */
17217 if (arm_ccfsm_state
== 4)
17219 if (insn
== arm_target_insn
)
17221 arm_target_insn
= NULL
;
17222 arm_ccfsm_state
= 0;
17227 /* If in state 3, it is possible to repeat the trick, if this insn is an
17228 unconditional branch to a label, and immediately following this branch
17229 is the previous target label which is only used once, and the label this
17230 branch jumps to is not too far off. */
17231 if (arm_ccfsm_state
== 3)
17233 if (simplejump_p (insn
))
17235 start_insn
= next_nonnote_insn (start_insn
);
17236 if (GET_CODE (start_insn
) == BARRIER
)
17238 /* XXX Isn't this always a barrier? */
17239 start_insn
= next_nonnote_insn (start_insn
);
17241 if (GET_CODE (start_insn
) == CODE_LABEL
17242 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
17243 && LABEL_NUSES (start_insn
) == 1)
17248 else if (GET_CODE (body
) == RETURN
)
17250 start_insn
= next_nonnote_insn (start_insn
);
17251 if (GET_CODE (start_insn
) == BARRIER
)
17252 start_insn
= next_nonnote_insn (start_insn
);
17253 if (GET_CODE (start_insn
) == CODE_LABEL
17254 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
17255 && LABEL_NUSES (start_insn
) == 1)
17258 seeking_return
= 1;
17267 gcc_assert (!arm_ccfsm_state
|| reverse
);
17268 if (GET_CODE (insn
) != JUMP_INSN
)
17271 /* This jump might be paralleled with a clobber of the condition codes
17272 the jump should always come first */
17273 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
17274 body
= XVECEXP (body
, 0, 0);
17277 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
17278 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
17281 int fail
= FALSE
, succeed
= FALSE
;
17282 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
17283 int then_not_else
= TRUE
;
17284 rtx this_insn
= start_insn
, label
= 0;
17286 /* Register the insn jumped to. */
17289 if (!seeking_return
)
17290 label
= XEXP (SET_SRC (body
), 0);
17292 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
17293 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
17294 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
17296 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
17297 then_not_else
= FALSE
;
17299 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == RETURN
)
17300 seeking_return
= 1;
17301 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == RETURN
)
17303 seeking_return
= 1;
17304 then_not_else
= FALSE
;
17307 gcc_unreachable ();
17309 /* See how many insns this branch skips, and what kind of insns. If all
17310 insns are okay, and the label or unconditional branch to the same
17311 label is not too far away, succeed. */
17312 for (insns_skipped
= 0;
17313 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
17317 this_insn
= next_nonnote_insn (this_insn
);
17321 switch (GET_CODE (this_insn
))
17324 /* Succeed if it is the target label, otherwise fail since
17325 control falls in from somewhere else. */
17326 if (this_insn
== label
)
17328 arm_ccfsm_state
= 1;
17336 /* Succeed if the following insn is the target label.
17338 If return insns are used then the last insn in a function
17339 will be a barrier. */
17340 this_insn
= next_nonnote_insn (this_insn
);
17341 if (this_insn
&& this_insn
== label
)
17343 arm_ccfsm_state
= 1;
17351 /* The AAPCS says that conditional calls should not be
17352 used since they make interworking inefficient (the
17353 linker can't transform BL<cond> into BLX). That's
17354 only a problem if the machine has BLX. */
17361 /* Succeed if the following insn is the target label, or
17362 if the following two insns are a barrier and the
17364 this_insn
= next_nonnote_insn (this_insn
);
17365 if (this_insn
&& GET_CODE (this_insn
) == BARRIER
)
17366 this_insn
= next_nonnote_insn (this_insn
);
17368 if (this_insn
&& this_insn
== label
17369 && insns_skipped
< max_insns_skipped
)
17371 arm_ccfsm_state
= 1;
17379 /* If this is an unconditional branch to the same label, succeed.
17380 If it is to another label, do nothing. If it is conditional,
17382 /* XXX Probably, the tests for SET and the PC are
17385 scanbody
= PATTERN (this_insn
);
17386 if (GET_CODE (scanbody
) == SET
17387 && GET_CODE (SET_DEST (scanbody
)) == PC
)
17389 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
17390 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
17392 arm_ccfsm_state
= 2;
17395 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
17398 /* Fail if a conditional return is undesirable (e.g. on a
17399 StrongARM), but still allow this if optimizing for size. */
17400 else if (GET_CODE (scanbody
) == RETURN
17401 && !use_return_insn (TRUE
, NULL
)
17404 else if (GET_CODE (scanbody
) == RETURN
17407 arm_ccfsm_state
= 2;
17410 else if (GET_CODE (scanbody
) == PARALLEL
)
17412 switch (get_attr_conds (this_insn
))
17422 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
17427 /* Instructions using or affecting the condition codes make it
17429 scanbody
= PATTERN (this_insn
);
17430 if (!(GET_CODE (scanbody
) == SET
17431 || GET_CODE (scanbody
) == PARALLEL
)
17432 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
17435 /* A conditional cirrus instruction must be followed by
17436 a non Cirrus instruction. However, since we
17437 conditionalize instructions in this function and by
17438 the time we get here we can't add instructions
17439 (nops), because shorten_branches() has already been
17440 called, we will disable conditionalizing Cirrus
17441 instructions to be safe. */
17442 if (GET_CODE (scanbody
) != USE
17443 && GET_CODE (scanbody
) != CLOBBER
17444 && get_attr_cirrus (this_insn
) != CIRRUS_NOT
)
17454 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
17455 arm_target_label
= CODE_LABEL_NUMBER (label
);
17458 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
17460 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
17462 this_insn
= next_nonnote_insn (this_insn
);
17463 gcc_assert (!this_insn
17464 || (GET_CODE (this_insn
) != BARRIER
17465 && GET_CODE (this_insn
) != CODE_LABEL
));
17469 /* Oh, dear! we ran off the end.. give up. */
17470 extract_constrain_insn_cached (insn
);
17471 arm_ccfsm_state
= 0;
17472 arm_target_insn
= NULL
;
17475 arm_target_insn
= this_insn
;
17478 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
17481 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
17483 if (reverse
|| then_not_else
)
17484 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
17487 /* Restore recog_data (getting the attributes of other insns can
17488 destroy this array, but final.c assumes that it remains intact
17489 across this call. */
17490 extract_constrain_insn_cached (insn
);
17494 /* Output IT instructions. */
17496 thumb2_asm_output_opcode (FILE * stream
)
17501 if (arm_condexec_mask
)
17503 for (n
= 0; n
< arm_condexec_masklen
; n
++)
17504 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
17506 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
17507 arm_condition_codes
[arm_current_cc
]);
17508 arm_condexec_mask
= 0;
17512 /* Returns true if REGNO is a valid register
17513 for holding a quantity of type MODE. */
17515 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
17517 if (GET_MODE_CLASS (mode
) == MODE_CC
)
17518 return (regno
== CC_REGNUM
17519 || (TARGET_HARD_FLOAT
&& TARGET_VFP
17520 && regno
== VFPCC_REGNUM
));
17523 /* For the Thumb we only allow values bigger than SImode in
17524 registers 0 - 6, so that there is always a second low
17525 register available to hold the upper part of the value.
17526 We probably we ought to ensure that the register is the
17527 start of an even numbered register pair. */
17528 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
17530 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
17531 && IS_CIRRUS_REGNUM (regno
))
17532 /* We have outlawed SI values in Cirrus registers because they
17533 reside in the lower 32 bits, but SF values reside in the
17534 upper 32 bits. This causes gcc all sorts of grief. We can't
17535 even split the registers into pairs because Cirrus SI values
17536 get sign extended to 64bits-- aldyh. */
17537 return (GET_MODE_CLASS (mode
) == MODE_FLOAT
) || (mode
== DImode
);
17539 if (TARGET_HARD_FLOAT
&& TARGET_VFP
17540 && IS_VFP_REGNUM (regno
))
17542 if (mode
== SFmode
|| mode
== SImode
)
17543 return VFP_REGNO_OK_FOR_SINGLE (regno
);
17545 if (mode
== DFmode
)
17546 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
17548 /* VFP registers can hold HFmode values, but there is no point in
17549 putting them there unless we have hardware conversion insns. */
17550 if (mode
== HFmode
)
17551 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
17554 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
17555 || (VALID_NEON_QREG_MODE (mode
)
17556 && NEON_REGNO_OK_FOR_QUAD (regno
))
17557 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
17558 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
17559 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
17560 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
17561 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
17566 if (TARGET_REALLY_IWMMXT
)
17568 if (IS_IWMMXT_GR_REGNUM (regno
))
17569 return mode
== SImode
;
17571 if (IS_IWMMXT_REGNUM (regno
))
17572 return VALID_IWMMXT_REG_MODE (mode
);
17575 /* We allow almost any value to be stored in the general registers.
17576 Restrict doubleword quantities to even register pairs so that we can
17577 use ldrd. Do not allow very large Neon structure opaque modes in
17578 general registers; they would use too many. */
17579 if (regno
<= LAST_ARM_REGNUM
)
17580 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
17581 && ARM_NUM_REGS (mode
) <= 4;
17583 if (regno
== FRAME_POINTER_REGNUM
17584 || regno
== ARG_POINTER_REGNUM
)
17585 /* We only allow integers in the fake hard registers. */
17586 return GET_MODE_CLASS (mode
) == MODE_INT
;
17588 /* The only registers left are the FPA registers
17589 which we only allow to hold FP values. */
17590 return (TARGET_HARD_FLOAT
&& TARGET_FPA
17591 && GET_MODE_CLASS (mode
) == MODE_FLOAT
17592 && regno
>= FIRST_FPA_REGNUM
17593 && regno
<= LAST_FPA_REGNUM
);
17596 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
17597 not used in arm mode. */
17600 arm_regno_class (int regno
)
17604 if (regno
== STACK_POINTER_REGNUM
)
17606 if (regno
== CC_REGNUM
)
17613 if (TARGET_THUMB2
&& regno
< 8)
17616 if ( regno
<= LAST_ARM_REGNUM
17617 || regno
== FRAME_POINTER_REGNUM
17618 || regno
== ARG_POINTER_REGNUM
)
17619 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
17621 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
17622 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
17624 if (IS_CIRRUS_REGNUM (regno
))
17625 return CIRRUS_REGS
;
17627 if (IS_VFP_REGNUM (regno
))
17629 if (regno
<= D7_VFP_REGNUM
)
17630 return VFP_D0_D7_REGS
;
17631 else if (regno
<= LAST_LO_VFP_REGNUM
)
17632 return VFP_LO_REGS
;
17634 return VFP_HI_REGS
;
17637 if (IS_IWMMXT_REGNUM (regno
))
17638 return IWMMXT_REGS
;
17640 if (IS_IWMMXT_GR_REGNUM (regno
))
17641 return IWMMXT_GR_REGS
;
17646 /* Handle a special case when computing the offset
17647 of an argument from the frame pointer. */
17649 arm_debugger_arg_offset (int value
, rtx addr
)
17653 /* We are only interested if dbxout_parms() failed to compute the offset. */
17657 /* We can only cope with the case where the address is held in a register. */
17658 if (GET_CODE (addr
) != REG
)
17661 /* If we are using the frame pointer to point at the argument, then
17662 an offset of 0 is correct. */
17663 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
17666 /* If we are using the stack pointer to point at the
17667 argument, then an offset of 0 is correct. */
17668 /* ??? Check this is consistent with thumb2 frame layout. */
17669 if ((TARGET_THUMB
|| !frame_pointer_needed
)
17670 && REGNO (addr
) == SP_REGNUM
)
17673 /* Oh dear. The argument is pointed to by a register rather
17674 than being held in a register, or being stored at a known
17675 offset from the frame pointer. Since GDB only understands
17676 those two kinds of argument we must translate the address
17677 held in the register into an offset from the frame pointer.
17678 We do this by searching through the insns for the function
17679 looking to see where this register gets its value. If the
17680 register is initialized from the frame pointer plus an offset
17681 then we are in luck and we can continue, otherwise we give up.
17683 This code is exercised by producing debugging information
17684 for a function with arguments like this:
17686 double func (double a, double b, int c, double d) {return d;}
17688 Without this code the stab for parameter 'd' will be set to
17689 an offset of 0 from the frame pointer, rather than 8. */
17691 /* The if() statement says:
17693 If the insn is a normal instruction
17694 and if the insn is setting the value in a register
17695 and if the register being set is the register holding the address of the argument
17696 and if the address is computing by an addition
17697 that involves adding to a register
17698 which is the frame pointer
17703 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
17705 if ( GET_CODE (insn
) == INSN
17706 && GET_CODE (PATTERN (insn
)) == SET
17707 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
17708 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
17709 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 0)) == REG
17710 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
17711 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 1)) == CONST_INT
17714 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
17723 warning (0, "unable to compute real location of stacked parameter");
17724 value
= 8; /* XXX magic hack */
17730 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
17733 if ((MASK) & insn_flags) \
17734 add_builtin_function ((NAME), (TYPE), (CODE), \
17735 BUILT_IN_MD, NULL, NULL_TREE); \
17739 struct builtin_description
17741 const unsigned int mask
;
17742 const enum insn_code icode
;
17743 const char * const name
;
17744 const enum arm_builtins code
;
17745 const enum rtx_code comparison
;
17746 const unsigned int flag
;
17749 static const struct builtin_description bdesc_2arg
[] =
17751 #define IWMMXT_BUILTIN(code, string, builtin) \
17752 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
17753 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17755 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
17756 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
17757 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
17758 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
17759 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
17760 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
17761 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
17762 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
17763 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
17764 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
17765 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
17766 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
17767 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
17768 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
17769 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
17770 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
17771 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
17772 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
17773 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
17774 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
17775 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
17776 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
17777 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
17778 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
17779 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
17780 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
17781 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
17782 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
17783 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
17784 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
17785 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
17786 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
17787 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
17788 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
17789 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
17790 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
17791 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
17792 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
17793 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
17794 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
17795 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
17796 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
17797 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
17798 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
17799 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
17800 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
17801 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
17802 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
17803 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
17804 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
17805 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
17806 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
17807 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
17808 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
17809 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
17810 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
17811 IWMMXT_BUILTIN (iwmmxt_wmadds
, "wmadds", WMADDS
)
17812 IWMMXT_BUILTIN (iwmmxt_wmaddu
, "wmaddu", WMADDU
)
17814 #define IWMMXT_BUILTIN2(code, builtin) \
17815 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
17817 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
17818 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
17819 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
17820 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
17821 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
17822 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
17823 IWMMXT_BUILTIN2 (ashlv4hi3_di
, WSLLH
)
17824 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt
, WSLLHI
)
17825 IWMMXT_BUILTIN2 (ashlv2si3_di
, WSLLW
)
17826 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt
, WSLLWI
)
17827 IWMMXT_BUILTIN2 (ashldi3_di
, WSLLD
)
17828 IWMMXT_BUILTIN2 (ashldi3_iwmmxt
, WSLLDI
)
17829 IWMMXT_BUILTIN2 (lshrv4hi3_di
, WSRLH
)
17830 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt
, WSRLHI
)
17831 IWMMXT_BUILTIN2 (lshrv2si3_di
, WSRLW
)
17832 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt
, WSRLWI
)
17833 IWMMXT_BUILTIN2 (lshrdi3_di
, WSRLD
)
17834 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt
, WSRLDI
)
17835 IWMMXT_BUILTIN2 (ashrv4hi3_di
, WSRAH
)
17836 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt
, WSRAHI
)
17837 IWMMXT_BUILTIN2 (ashrv2si3_di
, WSRAW
)
17838 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt
, WSRAWI
)
17839 IWMMXT_BUILTIN2 (ashrdi3_di
, WSRAD
)
17840 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt
, WSRADI
)
17841 IWMMXT_BUILTIN2 (rorv4hi3_di
, WRORH
)
17842 IWMMXT_BUILTIN2 (rorv4hi3
, WRORHI
)
17843 IWMMXT_BUILTIN2 (rorv2si3_di
, WRORW
)
17844 IWMMXT_BUILTIN2 (rorv2si3
, WRORWI
)
17845 IWMMXT_BUILTIN2 (rordi3_di
, WRORD
)
17846 IWMMXT_BUILTIN2 (rordi3
, WRORDI
)
17847 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
17848 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
17851 static const struct builtin_description bdesc_1arg
[] =
17853 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
17854 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
17855 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
17856 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
17857 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
17858 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
17859 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
17860 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
17861 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
17862 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
17863 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
17864 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
17865 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
17866 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
17867 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
17868 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
17869 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
17870 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
17873 /* Set up all the iWMMXt builtins. This is
17874 not called if TARGET_IWMMXT is zero. */
17877 arm_init_iwmmxt_builtins (void)
17879 const struct builtin_description
* d
;
17881 tree endlink
= void_list_node
;
17883 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
17884 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
17885 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
17888 = build_function_type (integer_type_node
,
17889 tree_cons (NULL_TREE
, integer_type_node
, endlink
));
17890 tree v8qi_ftype_v8qi_v8qi_int
17891 = build_function_type (V8QI_type_node
,
17892 tree_cons (NULL_TREE
, V8QI_type_node
,
17893 tree_cons (NULL_TREE
, V8QI_type_node
,
17894 tree_cons (NULL_TREE
,
17897 tree v4hi_ftype_v4hi_int
17898 = build_function_type (V4HI_type_node
,
17899 tree_cons (NULL_TREE
, V4HI_type_node
,
17900 tree_cons (NULL_TREE
, integer_type_node
,
17902 tree v2si_ftype_v2si_int
17903 = build_function_type (V2SI_type_node
,
17904 tree_cons (NULL_TREE
, V2SI_type_node
,
17905 tree_cons (NULL_TREE
, integer_type_node
,
17907 tree v2si_ftype_di_di
17908 = build_function_type (V2SI_type_node
,
17909 tree_cons (NULL_TREE
, long_long_integer_type_node
,
17910 tree_cons (NULL_TREE
, long_long_integer_type_node
,
17912 tree di_ftype_di_int
17913 = build_function_type (long_long_integer_type_node
,
17914 tree_cons (NULL_TREE
, long_long_integer_type_node
,
17915 tree_cons (NULL_TREE
, integer_type_node
,
17917 tree di_ftype_di_int_int
17918 = build_function_type (long_long_integer_type_node
,
17919 tree_cons (NULL_TREE
, long_long_integer_type_node
,
17920 tree_cons (NULL_TREE
, integer_type_node
,
17921 tree_cons (NULL_TREE
,
17924 tree int_ftype_v8qi
17925 = build_function_type (integer_type_node
,
17926 tree_cons (NULL_TREE
, V8QI_type_node
,
17928 tree int_ftype_v4hi
17929 = build_function_type (integer_type_node
,
17930 tree_cons (NULL_TREE
, V4HI_type_node
,
17932 tree int_ftype_v2si
17933 = build_function_type (integer_type_node
,
17934 tree_cons (NULL_TREE
, V2SI_type_node
,
17936 tree int_ftype_v8qi_int
17937 = build_function_type (integer_type_node
,
17938 tree_cons (NULL_TREE
, V8QI_type_node
,
17939 tree_cons (NULL_TREE
, integer_type_node
,
17941 tree int_ftype_v4hi_int
17942 = build_function_type (integer_type_node
,
17943 tree_cons (NULL_TREE
, V4HI_type_node
,
17944 tree_cons (NULL_TREE
, integer_type_node
,
17946 tree int_ftype_v2si_int
17947 = build_function_type (integer_type_node
,
17948 tree_cons (NULL_TREE
, V2SI_type_node
,
17949 tree_cons (NULL_TREE
, integer_type_node
,
17951 tree v8qi_ftype_v8qi_int_int
17952 = build_function_type (V8QI_type_node
,
17953 tree_cons (NULL_TREE
, V8QI_type_node
,
17954 tree_cons (NULL_TREE
, integer_type_node
,
17955 tree_cons (NULL_TREE
,
17958 tree v4hi_ftype_v4hi_int_int
17959 = build_function_type (V4HI_type_node
,
17960 tree_cons (NULL_TREE
, V4HI_type_node
,
17961 tree_cons (NULL_TREE
, integer_type_node
,
17962 tree_cons (NULL_TREE
,
17965 tree v2si_ftype_v2si_int_int
17966 = build_function_type (V2SI_type_node
,
17967 tree_cons (NULL_TREE
, V2SI_type_node
,
17968 tree_cons (NULL_TREE
, integer_type_node
,
17969 tree_cons (NULL_TREE
,
17972 /* Miscellaneous. */
17973 tree v8qi_ftype_v4hi_v4hi
17974 = build_function_type (V8QI_type_node
,
17975 tree_cons (NULL_TREE
, V4HI_type_node
,
17976 tree_cons (NULL_TREE
, V4HI_type_node
,
17978 tree v4hi_ftype_v2si_v2si
17979 = build_function_type (V4HI_type_node
,
17980 tree_cons (NULL_TREE
, V2SI_type_node
,
17981 tree_cons (NULL_TREE
, V2SI_type_node
,
17983 tree v2si_ftype_v4hi_v4hi
17984 = build_function_type (V2SI_type_node
,
17985 tree_cons (NULL_TREE
, V4HI_type_node
,
17986 tree_cons (NULL_TREE
, V4HI_type_node
,
17988 tree v2si_ftype_v8qi_v8qi
17989 = build_function_type (V2SI_type_node
,
17990 tree_cons (NULL_TREE
, V8QI_type_node
,
17991 tree_cons (NULL_TREE
, V8QI_type_node
,
17993 tree v4hi_ftype_v4hi_di
17994 = build_function_type (V4HI_type_node
,
17995 tree_cons (NULL_TREE
, V4HI_type_node
,
17996 tree_cons (NULL_TREE
,
17997 long_long_integer_type_node
,
17999 tree v2si_ftype_v2si_di
18000 = build_function_type (V2SI_type_node
,
18001 tree_cons (NULL_TREE
, V2SI_type_node
,
18002 tree_cons (NULL_TREE
,
18003 long_long_integer_type_node
,
18005 tree void_ftype_int_int
18006 = build_function_type (void_type_node
,
18007 tree_cons (NULL_TREE
, integer_type_node
,
18008 tree_cons (NULL_TREE
, integer_type_node
,
18011 = build_function_type (long_long_unsigned_type_node
, endlink
);
18013 = build_function_type (long_long_integer_type_node
,
18014 tree_cons (NULL_TREE
, V8QI_type_node
,
18017 = build_function_type (long_long_integer_type_node
,
18018 tree_cons (NULL_TREE
, V4HI_type_node
,
18021 = build_function_type (long_long_integer_type_node
,
18022 tree_cons (NULL_TREE
, V2SI_type_node
,
18024 tree v2si_ftype_v4hi
18025 = build_function_type (V2SI_type_node
,
18026 tree_cons (NULL_TREE
, V4HI_type_node
,
18028 tree v4hi_ftype_v8qi
18029 = build_function_type (V4HI_type_node
,
18030 tree_cons (NULL_TREE
, V8QI_type_node
,
18033 tree di_ftype_di_v4hi_v4hi
18034 = build_function_type (long_long_unsigned_type_node
,
18035 tree_cons (NULL_TREE
,
18036 long_long_unsigned_type_node
,
18037 tree_cons (NULL_TREE
, V4HI_type_node
,
18038 tree_cons (NULL_TREE
,
18042 tree di_ftype_v4hi_v4hi
18043 = build_function_type (long_long_unsigned_type_node
,
18044 tree_cons (NULL_TREE
, V4HI_type_node
,
18045 tree_cons (NULL_TREE
, V4HI_type_node
,
18048 /* Normal vector binops. */
18049 tree v8qi_ftype_v8qi_v8qi
18050 = build_function_type (V8QI_type_node
,
18051 tree_cons (NULL_TREE
, V8QI_type_node
,
18052 tree_cons (NULL_TREE
, V8QI_type_node
,
18054 tree v4hi_ftype_v4hi_v4hi
18055 = build_function_type (V4HI_type_node
,
18056 tree_cons (NULL_TREE
, V4HI_type_node
,
18057 tree_cons (NULL_TREE
, V4HI_type_node
,
18059 tree v2si_ftype_v2si_v2si
18060 = build_function_type (V2SI_type_node
,
18061 tree_cons (NULL_TREE
, V2SI_type_node
,
18062 tree_cons (NULL_TREE
, V2SI_type_node
,
18064 tree di_ftype_di_di
18065 = build_function_type (long_long_unsigned_type_node
,
18066 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
18067 tree_cons (NULL_TREE
,
18068 long_long_unsigned_type_node
,
18071 /* Add all builtins that are more or less simple operations on two
18073 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18075 /* Use one of the operands; the target can have a different mode for
18076 mask-generating compares. */
18077 enum machine_mode mode
;
18083 mode
= insn_data
[d
->icode
].operand
[1].mode
;
18088 type
= v8qi_ftype_v8qi_v8qi
;
18091 type
= v4hi_ftype_v4hi_v4hi
;
18094 type
= v2si_ftype_v2si_v2si
;
18097 type
= di_ftype_di_di
;
18101 gcc_unreachable ();
18104 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
18107 /* Add the remaining MMX insns with somewhat more complicated types. */
18108 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wzero", di_ftype_void
, ARM_BUILTIN_WZERO
);
18109 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_setwcx", void_ftype_int_int
, ARM_BUILTIN_SETWCX
);
18110 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_getwcx", int_ftype_int
, ARM_BUILTIN_GETWCX
);
18112 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSLLH
);
18113 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSLLW
);
18114 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wslld", di_ftype_di_di
, ARM_BUILTIN_WSLLD
);
18115 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSLLHI
);
18116 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSLLWI
);
18117 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wslldi", di_ftype_di_int
, ARM_BUILTIN_WSLLDI
);
18119 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSRLH
);
18120 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSRLW
);
18121 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrld", di_ftype_di_di
, ARM_BUILTIN_WSRLD
);
18122 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSRLHI
);
18123 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSRLWI
);
18124 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrldi", di_ftype_di_int
, ARM_BUILTIN_WSRLDI
);
18126 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSRAH
);
18127 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsraw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSRAW
);
18128 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrad", di_ftype_di_di
, ARM_BUILTIN_WSRAD
);
18129 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSRAHI
);
18130 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrawi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSRAWI
);
18131 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsradi", di_ftype_di_int
, ARM_BUILTIN_WSRADI
);
18133 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WRORH
);
18134 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorw", v2si_ftype_v2si_di
, ARM_BUILTIN_WRORW
);
18135 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrord", di_ftype_di_di
, ARM_BUILTIN_WRORD
);
18136 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WRORHI
);
18137 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WRORWI
);
18138 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrordi", di_ftype_di_int
, ARM_BUILTIN_WRORDI
);
18140 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSHUFH
);
18142 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi
, ARM_BUILTIN_WSADB
);
18143 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi
, ARM_BUILTIN_WSADH
);
18144 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi
, ARM_BUILTIN_WSADBZ
);
18145 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi
, ARM_BUILTIN_WSADHZ
);
18147 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsb", int_ftype_v8qi_int
, ARM_BUILTIN_TEXTRMSB
);
18148 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsh", int_ftype_v4hi_int
, ARM_BUILTIN_TEXTRMSH
);
18149 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsw", int_ftype_v2si_int
, ARM_BUILTIN_TEXTRMSW
);
18150 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmub", int_ftype_v8qi_int
, ARM_BUILTIN_TEXTRMUB
);
18151 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmuh", int_ftype_v4hi_int
, ARM_BUILTIN_TEXTRMUH
);
18152 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmuw", int_ftype_v2si_int
, ARM_BUILTIN_TEXTRMUW
);
18153 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int
, ARM_BUILTIN_TINSRB
);
18154 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int
, ARM_BUILTIN_TINSRH
);
18155 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int
, ARM_BUILTIN_TINSRW
);
18157 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_waccb", di_ftype_v8qi
, ARM_BUILTIN_WACCB
);
18158 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wacch", di_ftype_v4hi
, ARM_BUILTIN_WACCH
);
18159 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_waccw", di_ftype_v2si
, ARM_BUILTIN_WACCW
);
18161 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskb", int_ftype_v8qi
, ARM_BUILTIN_TMOVMSKB
);
18162 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskh", int_ftype_v4hi
, ARM_BUILTIN_TMOVMSKH
);
18163 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskw", int_ftype_v2si
, ARM_BUILTIN_TMOVMSKW
);
18165 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi
, ARM_BUILTIN_WPACKHSS
);
18166 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi
, ARM_BUILTIN_WPACKHUS
);
18167 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si
, ARM_BUILTIN_WPACKWUS
);
18168 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si
, ARM_BUILTIN_WPACKWSS
);
18169 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackdus", v2si_ftype_di_di
, ARM_BUILTIN_WPACKDUS
);
18170 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackdss", v2si_ftype_di_di
, ARM_BUILTIN_WPACKDSS
);
18172 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKEHUB
);
18173 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKEHUH
);
18174 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehuw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKEHUW
);
18175 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKEHSB
);
18176 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKEHSH
);
18177 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKEHSW
);
18178 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKELUB
);
18179 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKELUH
);
18180 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckeluw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKELUW
);
18181 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKELSB
);
18182 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKELSH
);
18183 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKELSW
);
18185 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi
, ARM_BUILTIN_WMACS
);
18186 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi
, ARM_BUILTIN_WMACSZ
);
18187 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi
, ARM_BUILTIN_WMACU
);
18188 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi
, ARM_BUILTIN_WMACUZ
);
18190 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int
, ARM_BUILTIN_WALIGN
);
18191 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmia", di_ftype_di_int_int
, ARM_BUILTIN_TMIA
);
18192 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiaph", di_ftype_di_int_int
, ARM_BUILTIN_TMIAPH
);
18193 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiabb", di_ftype_di_int_int
, ARM_BUILTIN_TMIABB
);
18194 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiabt", di_ftype_di_int_int
, ARM_BUILTIN_TMIABT
);
18195 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiatb", di_ftype_di_int_int
, ARM_BUILTIN_TMIATB
);
18196 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiatt", di_ftype_di_int_int
, ARM_BUILTIN_TMIATT
);
18200 arm_init_tls_builtins (void)
18204 ftype
= build_function_type (ptr_type_node
, void_list_node
);
18205 decl
= add_builtin_function ("__builtin_thread_pointer", ftype
,
18206 ARM_BUILTIN_THREAD_POINTER
, BUILT_IN_MD
,
18208 TREE_NOTHROW (decl
) = 1;
18209 TREE_READONLY (decl
) = 1;
18212 enum neon_builtin_type_bits
{
18228 #define v8qi_UP T_V8QI
18229 #define v4hi_UP T_V4HI
18230 #define v2si_UP T_V2SI
18231 #define v2sf_UP T_V2SF
18233 #define v16qi_UP T_V16QI
18234 #define v8hi_UP T_V8HI
18235 #define v4si_UP T_V4SI
18236 #define v4sf_UP T_V4SF
18237 #define v2di_UP T_V2DI
18242 #define UP(X) X##_UP
18277 NEON_LOADSTRUCTLANE
,
18279 NEON_STORESTRUCTLANE
,
18288 const neon_itype itype
;
18290 const enum insn_code codes
[T_MAX
];
18291 const unsigned int num_vars
;
18292 unsigned int base_fcode
;
18293 } neon_builtin_datum
;
18295 #define CF(N,X) CODE_FOR_neon_##N##X
18297 #define VAR1(T, N, A) \
18298 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
18299 #define VAR2(T, N, A, B) \
18300 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
18301 #define VAR3(T, N, A, B, C) \
18302 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
18303 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
18304 #define VAR4(T, N, A, B, C, D) \
18305 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
18306 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
18307 #define VAR5(T, N, A, B, C, D, E) \
18308 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
18309 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
18310 #define VAR6(T, N, A, B, C, D, E, F) \
18311 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
18312 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
18313 #define VAR7(T, N, A, B, C, D, E, F, G) \
18314 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
18315 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18317 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
18318 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18320 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18321 CF (N, G), CF (N, H) }, 8, 0
18322 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
18323 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18324 | UP (H) | UP (I), \
18325 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18326 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
18327 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
18328 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
18329 | UP (H) | UP (I) | UP (J), \
18330 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
18331 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
18333 /* The mode entries in the following table correspond to the "key" type of the
18334 instruction variant, i.e. equivalent to that which would be specified after
18335 the assembler mnemonic, which usually refers to the last vector operand.
18336 (Signed/unsigned/polynomial types are not differentiated between though, and
18337 are all mapped onto the same mode for a given element size.) The modes
18338 listed per instruction should be the same as those defined for that
18339 instruction's pattern in neon.md.
18340 WARNING: Variants should be listed in the same increasing order as
18341 neon_builtin_type_bits. */
18343 static neon_builtin_datum neon_builtin_data
[] =
18345 { VAR10 (BINOP
, vadd
,
18346 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18347 { VAR3 (BINOP
, vaddl
, v8qi
, v4hi
, v2si
) },
18348 { VAR3 (BINOP
, vaddw
, v8qi
, v4hi
, v2si
) },
18349 { VAR6 (BINOP
, vhadd
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18350 { VAR8 (BINOP
, vqadd
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18351 { VAR3 (BINOP
, vaddhn
, v8hi
, v4si
, v2di
) },
18352 { VAR8 (BINOP
, vmul
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18353 { VAR8 (TERNOP
, vmla
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18354 { VAR3 (TERNOP
, vmlal
, v8qi
, v4hi
, v2si
) },
18355 { VAR8 (TERNOP
, vmls
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18356 { VAR3 (TERNOP
, vmlsl
, v8qi
, v4hi
, v2si
) },
18357 { VAR4 (BINOP
, vqdmulh
, v4hi
, v2si
, v8hi
, v4si
) },
18358 { VAR2 (TERNOP
, vqdmlal
, v4hi
, v2si
) },
18359 { VAR2 (TERNOP
, vqdmlsl
, v4hi
, v2si
) },
18360 { VAR3 (BINOP
, vmull
, v8qi
, v4hi
, v2si
) },
18361 { VAR2 (SCALARMULL
, vmull_n
, v4hi
, v2si
) },
18362 { VAR2 (LANEMULL
, vmull_lane
, v4hi
, v2si
) },
18363 { VAR2 (SCALARMULL
, vqdmull_n
, v4hi
, v2si
) },
18364 { VAR2 (LANEMULL
, vqdmull_lane
, v4hi
, v2si
) },
18365 { VAR4 (SCALARMULH
, vqdmulh_n
, v4hi
, v2si
, v8hi
, v4si
) },
18366 { VAR4 (LANEMULH
, vqdmulh_lane
, v4hi
, v2si
, v8hi
, v4si
) },
18367 { VAR2 (BINOP
, vqdmull
, v4hi
, v2si
) },
18368 { VAR8 (BINOP
, vshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18369 { VAR8 (BINOP
, vqshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18370 { VAR8 (SHIFTIMM
, vshr_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18371 { VAR3 (SHIFTIMM
, vshrn_n
, v8hi
, v4si
, v2di
) },
18372 { VAR3 (SHIFTIMM
, vqshrn_n
, v8hi
, v4si
, v2di
) },
18373 { VAR3 (SHIFTIMM
, vqshrun_n
, v8hi
, v4si
, v2di
) },
18374 { VAR8 (SHIFTIMM
, vshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18375 { VAR8 (SHIFTIMM
, vqshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18376 { VAR8 (SHIFTIMM
, vqshlu_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18377 { VAR3 (SHIFTIMM
, vshll_n
, v8qi
, v4hi
, v2si
) },
18378 { VAR8 (SHIFTACC
, vsra_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18379 { VAR10 (BINOP
, vsub
,
18380 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18381 { VAR3 (BINOP
, vsubl
, v8qi
, v4hi
, v2si
) },
18382 { VAR3 (BINOP
, vsubw
, v8qi
, v4hi
, v2si
) },
18383 { VAR8 (BINOP
, vqsub
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18384 { VAR6 (BINOP
, vhsub
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18385 { VAR3 (BINOP
, vsubhn
, v8hi
, v4si
, v2di
) },
18386 { VAR8 (BINOP
, vceq
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18387 { VAR8 (BINOP
, vcge
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18388 { VAR8 (BINOP
, vcgt
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18389 { VAR2 (BINOP
, vcage
, v2sf
, v4sf
) },
18390 { VAR2 (BINOP
, vcagt
, v2sf
, v4sf
) },
18391 { VAR6 (BINOP
, vtst
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18392 { VAR8 (BINOP
, vabd
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18393 { VAR3 (BINOP
, vabdl
, v8qi
, v4hi
, v2si
) },
18394 { VAR6 (TERNOP
, vaba
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18395 { VAR3 (TERNOP
, vabal
, v8qi
, v4hi
, v2si
) },
18396 { VAR8 (BINOP
, vmax
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18397 { VAR8 (BINOP
, vmin
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18398 { VAR4 (BINOP
, vpadd
, v8qi
, v4hi
, v2si
, v2sf
) },
18399 { VAR6 (UNOP
, vpaddl
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18400 { VAR6 (BINOP
, vpadal
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18401 { VAR4 (BINOP
, vpmax
, v8qi
, v4hi
, v2si
, v2sf
) },
18402 { VAR4 (BINOP
, vpmin
, v8qi
, v4hi
, v2si
, v2sf
) },
18403 { VAR2 (BINOP
, vrecps
, v2sf
, v4sf
) },
18404 { VAR2 (BINOP
, vrsqrts
, v2sf
, v4sf
) },
18405 { VAR8 (SHIFTINSERT
, vsri_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18406 { VAR8 (SHIFTINSERT
, vsli_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
18407 { VAR8 (UNOP
, vabs
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18408 { VAR6 (UNOP
, vqabs
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18409 { VAR8 (UNOP
, vneg
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18410 { VAR6 (UNOP
, vqneg
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18411 { VAR6 (UNOP
, vcls
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18412 { VAR6 (UNOP
, vclz
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18413 { VAR2 (UNOP
, vcnt
, v8qi
, v16qi
) },
18414 { VAR4 (UNOP
, vrecpe
, v2si
, v2sf
, v4si
, v4sf
) },
18415 { VAR4 (UNOP
, vrsqrte
, v2si
, v2sf
, v4si
, v4sf
) },
18416 { VAR6 (UNOP
, vmvn
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
18417 /* FIXME: vget_lane supports more variants than this! */
18418 { VAR10 (GETLANE
, vget_lane
,
18419 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18420 { VAR10 (SETLANE
, vset_lane
,
18421 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18422 { VAR5 (CREATE
, vcreate
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18423 { VAR10 (DUP
, vdup_n
,
18424 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18425 { VAR10 (DUPLANE
, vdup_lane
,
18426 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18427 { VAR5 (COMBINE
, vcombine
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18428 { VAR5 (SPLIT
, vget_high
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18429 { VAR5 (SPLIT
, vget_low
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18430 { VAR3 (UNOP
, vmovn
, v8hi
, v4si
, v2di
) },
18431 { VAR3 (UNOP
, vqmovn
, v8hi
, v4si
, v2di
) },
18432 { VAR3 (UNOP
, vqmovun
, v8hi
, v4si
, v2di
) },
18433 { VAR3 (UNOP
, vmovl
, v8qi
, v4hi
, v2si
) },
18434 { VAR6 (LANEMUL
, vmul_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18435 { VAR6 (LANEMAC
, vmla_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18436 { VAR2 (LANEMAC
, vmlal_lane
, v4hi
, v2si
) },
18437 { VAR2 (LANEMAC
, vqdmlal_lane
, v4hi
, v2si
) },
18438 { VAR6 (LANEMAC
, vmls_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18439 { VAR2 (LANEMAC
, vmlsl_lane
, v4hi
, v2si
) },
18440 { VAR2 (LANEMAC
, vqdmlsl_lane
, v4hi
, v2si
) },
18441 { VAR6 (SCALARMUL
, vmul_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18442 { VAR6 (SCALARMAC
, vmla_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18443 { VAR2 (SCALARMAC
, vmlal_n
, v4hi
, v2si
) },
18444 { VAR2 (SCALARMAC
, vqdmlal_n
, v4hi
, v2si
) },
18445 { VAR6 (SCALARMAC
, vmls_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18446 { VAR2 (SCALARMAC
, vmlsl_n
, v4hi
, v2si
) },
18447 { VAR2 (SCALARMAC
, vqdmlsl_n
, v4hi
, v2si
) },
18448 { VAR10 (BINOP
, vext
,
18449 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18450 { VAR8 (UNOP
, vrev64
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18451 { VAR4 (UNOP
, vrev32
, v8qi
, v4hi
, v16qi
, v8hi
) },
18452 { VAR2 (UNOP
, vrev16
, v8qi
, v16qi
) },
18453 { VAR4 (CONVERT
, vcvt
, v2si
, v2sf
, v4si
, v4sf
) },
18454 { VAR4 (FIXCONV
, vcvt_n
, v2si
, v2sf
, v4si
, v4sf
) },
18455 { VAR10 (SELECT
, vbsl
,
18456 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18457 { VAR1 (VTBL
, vtbl1
, v8qi
) },
18458 { VAR1 (VTBL
, vtbl2
, v8qi
) },
18459 { VAR1 (VTBL
, vtbl3
, v8qi
) },
18460 { VAR1 (VTBL
, vtbl4
, v8qi
) },
18461 { VAR1 (VTBX
, vtbx1
, v8qi
) },
18462 { VAR1 (VTBX
, vtbx2
, v8qi
) },
18463 { VAR1 (VTBX
, vtbx3
, v8qi
) },
18464 { VAR1 (VTBX
, vtbx4
, v8qi
) },
18465 { VAR8 (RESULTPAIR
, vtrn
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18466 { VAR8 (RESULTPAIR
, vzip
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18467 { VAR8 (RESULTPAIR
, vuzp
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
18468 { VAR5 (REINTERP
, vreinterpretv8qi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18469 { VAR5 (REINTERP
, vreinterpretv4hi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18470 { VAR5 (REINTERP
, vreinterpretv2si
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18471 { VAR5 (REINTERP
, vreinterpretv2sf
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18472 { VAR5 (REINTERP
, vreinterpretdi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18473 { VAR5 (REINTERP
, vreinterpretv16qi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18474 { VAR5 (REINTERP
, vreinterpretv8hi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18475 { VAR5 (REINTERP
, vreinterpretv4si
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18476 { VAR5 (REINTERP
, vreinterpretv4sf
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18477 { VAR5 (REINTERP
, vreinterpretv2di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18478 { VAR10 (LOAD1
, vld1
,
18479 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18480 { VAR10 (LOAD1LANE
, vld1_lane
,
18481 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18482 { VAR10 (LOAD1
, vld1_dup
,
18483 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18484 { VAR10 (STORE1
, vst1
,
18485 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18486 { VAR10 (STORE1LANE
, vst1_lane
,
18487 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18488 { VAR9 (LOADSTRUCT
,
18489 vld2
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18490 { VAR7 (LOADSTRUCTLANE
, vld2_lane
,
18491 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18492 { VAR5 (LOADSTRUCT
, vld2_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18493 { VAR9 (STORESTRUCT
, vst2
,
18494 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18495 { VAR7 (STORESTRUCTLANE
, vst2_lane
,
18496 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18497 { VAR9 (LOADSTRUCT
,
18498 vld3
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18499 { VAR7 (LOADSTRUCTLANE
, vld3_lane
,
18500 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18501 { VAR5 (LOADSTRUCT
, vld3_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18502 { VAR9 (STORESTRUCT
, vst3
,
18503 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18504 { VAR7 (STORESTRUCTLANE
, vst3_lane
,
18505 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18506 { VAR9 (LOADSTRUCT
, vld4
,
18507 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18508 { VAR7 (LOADSTRUCTLANE
, vld4_lane
,
18509 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18510 { VAR5 (LOADSTRUCT
, vld4_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
18511 { VAR9 (STORESTRUCT
, vst4
,
18512 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
18513 { VAR7 (STORESTRUCTLANE
, vst4_lane
,
18514 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
18515 { VAR10 (LOGICBINOP
, vand
,
18516 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18517 { VAR10 (LOGICBINOP
, vorr
,
18518 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18519 { VAR10 (BINOP
, veor
,
18520 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18521 { VAR10 (LOGICBINOP
, vbic
,
18522 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
18523 { VAR10 (LOGICBINOP
, vorn
,
18524 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) }
18540 arm_init_neon_builtins (void)
18542 unsigned int i
, fcode
= ARM_BUILTIN_NEON_BASE
;
18544 tree neon_intQI_type_node
;
18545 tree neon_intHI_type_node
;
18546 tree neon_polyQI_type_node
;
18547 tree neon_polyHI_type_node
;
18548 tree neon_intSI_type_node
;
18549 tree neon_intDI_type_node
;
18550 tree neon_float_type_node
;
18552 tree intQI_pointer_node
;
18553 tree intHI_pointer_node
;
18554 tree intSI_pointer_node
;
18555 tree intDI_pointer_node
;
18556 tree float_pointer_node
;
18558 tree const_intQI_node
;
18559 tree const_intHI_node
;
18560 tree const_intSI_node
;
18561 tree const_intDI_node
;
18562 tree const_float_node
;
18564 tree const_intQI_pointer_node
;
18565 tree const_intHI_pointer_node
;
18566 tree const_intSI_pointer_node
;
18567 tree const_intDI_pointer_node
;
18568 tree const_float_pointer_node
;
18570 tree V8QI_type_node
;
18571 tree V4HI_type_node
;
18572 tree V2SI_type_node
;
18573 tree V2SF_type_node
;
18574 tree V16QI_type_node
;
18575 tree V8HI_type_node
;
18576 tree V4SI_type_node
;
18577 tree V4SF_type_node
;
18578 tree V2DI_type_node
;
18580 tree intUQI_type_node
;
18581 tree intUHI_type_node
;
18582 tree intUSI_type_node
;
18583 tree intUDI_type_node
;
18585 tree intEI_type_node
;
18586 tree intOI_type_node
;
18587 tree intCI_type_node
;
18588 tree intXI_type_node
;
18590 tree V8QI_pointer_node
;
18591 tree V4HI_pointer_node
;
18592 tree V2SI_pointer_node
;
18593 tree V2SF_pointer_node
;
18594 tree V16QI_pointer_node
;
18595 tree V8HI_pointer_node
;
18596 tree V4SI_pointer_node
;
18597 tree V4SF_pointer_node
;
18598 tree V2DI_pointer_node
;
18600 tree void_ftype_pv8qi_v8qi_v8qi
;
18601 tree void_ftype_pv4hi_v4hi_v4hi
;
18602 tree void_ftype_pv2si_v2si_v2si
;
18603 tree void_ftype_pv2sf_v2sf_v2sf
;
18604 tree void_ftype_pdi_di_di
;
18605 tree void_ftype_pv16qi_v16qi_v16qi
;
18606 tree void_ftype_pv8hi_v8hi_v8hi
;
18607 tree void_ftype_pv4si_v4si_v4si
;
18608 tree void_ftype_pv4sf_v4sf_v4sf
;
18609 tree void_ftype_pv2di_v2di_v2di
;
18611 tree reinterp_ftype_dreg
[5][5];
18612 tree reinterp_ftype_qreg
[5][5];
18613 tree dreg_types
[5], qreg_types
[5];
18615 /* Create distinguished type nodes for NEON vector element types,
18616 and pointers to values of such types, so we can detect them later. */
18617 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
18618 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
18619 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
18620 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
18621 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
18622 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
18623 neon_float_type_node
= make_node (REAL_TYPE
);
18624 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
18625 layout_type (neon_float_type_node
);
18627 /* Define typedefs which exactly correspond to the modes we are basing vector
18628 types on. If you change these names you'll need to change
18629 the table used by arm_mangle_type too. */
18630 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
18631 "__builtin_neon_qi");
18632 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
18633 "__builtin_neon_hi");
18634 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
18635 "__builtin_neon_si");
18636 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
18637 "__builtin_neon_sf");
18638 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
18639 "__builtin_neon_di");
18640 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
18641 "__builtin_neon_poly8");
18642 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
18643 "__builtin_neon_poly16");
18645 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
18646 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
18647 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
18648 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
18649 float_pointer_node
= build_pointer_type (neon_float_type_node
);
18651 /* Next create constant-qualified versions of the above types. */
18652 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
18654 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
18656 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
18658 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
18660 const_float_node
= build_qualified_type (neon_float_type_node
,
18663 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
18664 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
18665 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
18666 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
18667 const_float_pointer_node
= build_pointer_type (const_float_node
);
18669 /* Now create vector types based on our NEON element types. */
18670 /* 64-bit vectors. */
18672 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
18674 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
18676 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
18678 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
18679 /* 128-bit vectors. */
18681 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
18683 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
18685 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
18687 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
18689 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
18691 /* Unsigned integer types for various mode sizes. */
18692 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
18693 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
18694 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
18695 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
18697 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
18698 "__builtin_neon_uqi");
18699 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
18700 "__builtin_neon_uhi");
18701 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
18702 "__builtin_neon_usi");
18703 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
18704 "__builtin_neon_udi");
18706 /* Opaque integer types for structures of vectors. */
18707 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
18708 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
18709 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
18710 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
18712 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
18713 "__builtin_neon_ti");
18714 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
18715 "__builtin_neon_ei");
18716 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
18717 "__builtin_neon_oi");
18718 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
18719 "__builtin_neon_ci");
18720 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
18721 "__builtin_neon_xi");
18723 /* Pointers to vector types. */
18724 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
18725 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
18726 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
18727 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
18728 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
18729 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
18730 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
18731 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
18732 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
18734 /* Operations which return results as pairs. */
18735 void_ftype_pv8qi_v8qi_v8qi
=
18736 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
18737 V8QI_type_node
, NULL
);
18738 void_ftype_pv4hi_v4hi_v4hi
=
18739 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
18740 V4HI_type_node
, NULL
);
18741 void_ftype_pv2si_v2si_v2si
=
18742 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
18743 V2SI_type_node
, NULL
);
18744 void_ftype_pv2sf_v2sf_v2sf
=
18745 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
18746 V2SF_type_node
, NULL
);
18747 void_ftype_pdi_di_di
=
18748 build_function_type_list (void_type_node
, intDI_pointer_node
,
18749 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
18750 void_ftype_pv16qi_v16qi_v16qi
=
18751 build_function_type_list (void_type_node
, V16QI_pointer_node
,
18752 V16QI_type_node
, V16QI_type_node
, NULL
);
18753 void_ftype_pv8hi_v8hi_v8hi
=
18754 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
18755 V8HI_type_node
, NULL
);
18756 void_ftype_pv4si_v4si_v4si
=
18757 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
18758 V4SI_type_node
, NULL
);
18759 void_ftype_pv4sf_v4sf_v4sf
=
18760 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
18761 V4SF_type_node
, NULL
);
18762 void_ftype_pv2di_v2di_v2di
=
18763 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
18764 V2DI_type_node
, NULL
);
18766 dreg_types
[0] = V8QI_type_node
;
18767 dreg_types
[1] = V4HI_type_node
;
18768 dreg_types
[2] = V2SI_type_node
;
18769 dreg_types
[3] = V2SF_type_node
;
18770 dreg_types
[4] = neon_intDI_type_node
;
18772 qreg_types
[0] = V16QI_type_node
;
18773 qreg_types
[1] = V8HI_type_node
;
18774 qreg_types
[2] = V4SI_type_node
;
18775 qreg_types
[3] = V4SF_type_node
;
18776 qreg_types
[4] = V2DI_type_node
;
18778 for (i
= 0; i
< 5; i
++)
18781 for (j
= 0; j
< 5; j
++)
18783 reinterp_ftype_dreg
[i
][j
]
18784 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
18785 reinterp_ftype_qreg
[i
][j
]
18786 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
18790 for (i
= 0; i
< ARRAY_SIZE (neon_builtin_data
); i
++)
18792 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
18793 unsigned int j
, codeidx
= 0;
18795 d
->base_fcode
= fcode
;
18797 for (j
= 0; j
< T_MAX
; j
++)
18799 const char* const modenames
[] = {
18800 "v8qi", "v4hi", "v2si", "v2sf", "di",
18801 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
18805 enum insn_code icode
;
18806 int is_load
= 0, is_store
= 0;
18808 if ((d
->bits
& (1 << j
)) == 0)
18811 icode
= d
->codes
[codeidx
++];
18816 case NEON_LOAD1LANE
:
18817 case NEON_LOADSTRUCT
:
18818 case NEON_LOADSTRUCTLANE
:
18820 /* Fall through. */
18822 case NEON_STORE1LANE
:
18823 case NEON_STORESTRUCT
:
18824 case NEON_STORESTRUCTLANE
:
18827 /* Fall through. */
18830 case NEON_LOGICBINOP
:
18831 case NEON_SHIFTINSERT
:
18838 case NEON_SHIFTIMM
:
18839 case NEON_SHIFTACC
:
18845 case NEON_LANEMULL
:
18846 case NEON_LANEMULH
:
18848 case NEON_SCALARMUL
:
18849 case NEON_SCALARMULL
:
18850 case NEON_SCALARMULH
:
18851 case NEON_SCALARMAC
:
18857 tree return_type
= void_type_node
, args
= void_list_node
;
18859 /* Build a function type directly from the insn_data for this
18860 builtin. The build_function_type() function takes care of
18861 removing duplicates for us. */
18862 for (k
= insn_data
[icode
].n_operands
- 1; k
>= 0; k
--)
18866 if (is_load
&& k
== 1)
18868 /* Neon load patterns always have the memory operand
18869 (a SImode pointer) in the operand 1 position. We
18870 want a const pointer to the element type in that
18872 gcc_assert (insn_data
[icode
].operand
[k
].mode
== SImode
);
18878 eltype
= const_intQI_pointer_node
;
18883 eltype
= const_intHI_pointer_node
;
18888 eltype
= const_intSI_pointer_node
;
18893 eltype
= const_float_pointer_node
;
18898 eltype
= const_intDI_pointer_node
;
18901 default: gcc_unreachable ();
18904 else if (is_store
&& k
== 0)
18906 /* Similarly, Neon store patterns use operand 0 as
18907 the memory location to store to (a SImode pointer).
18908 Use a pointer to the element type of the store in
18910 gcc_assert (insn_data
[icode
].operand
[k
].mode
== SImode
);
18916 eltype
= intQI_pointer_node
;
18921 eltype
= intHI_pointer_node
;
18926 eltype
= intSI_pointer_node
;
18931 eltype
= float_pointer_node
;
18936 eltype
= intDI_pointer_node
;
18939 default: gcc_unreachable ();
18944 switch (insn_data
[icode
].operand
[k
].mode
)
18946 case VOIDmode
: eltype
= void_type_node
; break;
18948 case QImode
: eltype
= neon_intQI_type_node
; break;
18949 case HImode
: eltype
= neon_intHI_type_node
; break;
18950 case SImode
: eltype
= neon_intSI_type_node
; break;
18951 case SFmode
: eltype
= neon_float_type_node
; break;
18952 case DImode
: eltype
= neon_intDI_type_node
; break;
18953 case TImode
: eltype
= intTI_type_node
; break;
18954 case EImode
: eltype
= intEI_type_node
; break;
18955 case OImode
: eltype
= intOI_type_node
; break;
18956 case CImode
: eltype
= intCI_type_node
; break;
18957 case XImode
: eltype
= intXI_type_node
; break;
18958 /* 64-bit vectors. */
18959 case V8QImode
: eltype
= V8QI_type_node
; break;
18960 case V4HImode
: eltype
= V4HI_type_node
; break;
18961 case V2SImode
: eltype
= V2SI_type_node
; break;
18962 case V2SFmode
: eltype
= V2SF_type_node
; break;
18963 /* 128-bit vectors. */
18964 case V16QImode
: eltype
= V16QI_type_node
; break;
18965 case V8HImode
: eltype
= V8HI_type_node
; break;
18966 case V4SImode
: eltype
= V4SI_type_node
; break;
18967 case V4SFmode
: eltype
= V4SF_type_node
; break;
18968 case V2DImode
: eltype
= V2DI_type_node
; break;
18969 default: gcc_unreachable ();
18973 if (k
== 0 && !is_store
)
18974 return_type
= eltype
;
18976 args
= tree_cons (NULL_TREE
, eltype
, args
);
18979 ftype
= build_function_type (return_type
, args
);
18983 case NEON_RESULTPAIR
:
18985 switch (insn_data
[icode
].operand
[1].mode
)
18987 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
18988 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
18989 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
18990 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
18991 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
18992 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
18993 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
18994 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
18995 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
18996 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
18997 default: gcc_unreachable ();
19002 case NEON_REINTERP
:
19004 /* We iterate over 5 doubleword types, then 5 quadword
19007 switch (insn_data
[icode
].operand
[0].mode
)
19009 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
19010 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
19011 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
19012 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
19013 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
19014 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
19015 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
19016 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
19017 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
19018 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
19019 default: gcc_unreachable ();
19025 gcc_unreachable ();
19028 gcc_assert (ftype
!= NULL
);
19030 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[j
]);
19032 add_builtin_function (namebuf
, ftype
, fcode
++, BUILT_IN_MD
, NULL
,
19039 arm_init_fp16_builtins (void)
19041 tree fp16_type
= make_node (REAL_TYPE
);
19042 TYPE_PRECISION (fp16_type
) = 16;
19043 layout_type (fp16_type
);
19044 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
19048 arm_init_builtins (void)
19050 arm_init_tls_builtins ();
19052 if (TARGET_REALLY_IWMMXT
)
19053 arm_init_iwmmxt_builtins ();
19056 arm_init_neon_builtins ();
19058 if (arm_fp16_format
)
19059 arm_init_fp16_builtins ();
19062 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19064 static const char *
19065 arm_invalid_parameter_type (const_tree t
)
19067 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19068 return N_("function parameters cannot have __fp16 type");
19072 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
19074 static const char *
19075 arm_invalid_return_type (const_tree t
)
19077 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19078 return N_("functions cannot return __fp16 type");
19082 /* Implement TARGET_PROMOTED_TYPE. */
19085 arm_promoted_type (const_tree t
)
19087 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
19088 return float_type_node
;
19092 /* Implement TARGET_CONVERT_TO_TYPE.
19093 Specifically, this hook implements the peculiarity of the ARM
19094 half-precision floating-point C semantics that requires conversions between
19095 __fp16 to or from double to do an intermediate conversion to float. */
19098 arm_convert_to_type (tree type
, tree expr
)
19100 tree fromtype
= TREE_TYPE (expr
);
19101 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
19103 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
19104 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
19105 return convert (type
, convert (float_type_node
, expr
));
19109 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
19110 This simply adds HFmode as a supported mode; even though we don't
19111 implement arithmetic on this type directly, it's supported by
19112 optabs conversions, much the way the double-word arithmetic is
19113 special-cased in the default hook. */
19116 arm_scalar_mode_supported_p (enum machine_mode mode
)
19118 if (mode
== HFmode
)
19119 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
19121 return default_scalar_mode_supported_p (mode
);
19124 /* Errors in the source file can cause expand_expr to return const0_rtx
19125 where we expect a vector. To avoid crashing, use one of the vector
19126 clear instructions. */
19129 safe_vector_operand (rtx x
, enum machine_mode mode
)
19131 if (x
!= const0_rtx
)
19133 x
= gen_reg_rtx (mode
);
19135 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
19136 : gen_rtx_SUBREG (DImode
, x
, 0)));
19140 /* Subroutine of arm_expand_builtin to take care of binop insns. */
19143 arm_expand_binop_builtin (enum insn_code icode
,
19144 tree exp
, rtx target
)
19147 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19148 tree arg1
= CALL_EXPR_ARG (exp
, 1);
19149 rtx op0
= expand_normal (arg0
);
19150 rtx op1
= expand_normal (arg1
);
19151 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19152 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19153 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
19155 if (VECTOR_MODE_P (mode0
))
19156 op0
= safe_vector_operand (op0
, mode0
);
19157 if (VECTOR_MODE_P (mode1
))
19158 op1
= safe_vector_operand (op1
, mode1
);
19161 || GET_MODE (target
) != tmode
19162 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19163 target
= gen_reg_rtx (tmode
);
19165 gcc_assert (GET_MODE (op0
) == mode0
&& GET_MODE (op1
) == mode1
);
19167 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19168 op0
= copy_to_mode_reg (mode0
, op0
);
19169 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19170 op1
= copy_to_mode_reg (mode1
, op1
);
19172 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19179 /* Subroutine of arm_expand_builtin to take care of unop insns. */
19182 arm_expand_unop_builtin (enum insn_code icode
,
19183 tree exp
, rtx target
, int do_load
)
19186 tree arg0
= CALL_EXPR_ARG (exp
, 0);
19187 rtx op0
= expand_normal (arg0
);
19188 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19189 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
19192 || GET_MODE (target
) != tmode
19193 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19194 target
= gen_reg_rtx (tmode
);
19196 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
19199 if (VECTOR_MODE_P (mode0
))
19200 op0
= safe_vector_operand (op0
, mode0
);
19202 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19203 op0
= copy_to_mode_reg (mode0
, op0
);
19206 pat
= GEN_FCN (icode
) (target
, op0
);
19214 neon_builtin_compare (const void *a
, const void *b
)
19216 const neon_builtin_datum
*const key
= (const neon_builtin_datum
*) a
;
19217 const neon_builtin_datum
*const memb
= (const neon_builtin_datum
*) b
;
19218 unsigned int soughtcode
= key
->base_fcode
;
19220 if (soughtcode
>= memb
->base_fcode
19221 && soughtcode
< memb
->base_fcode
+ memb
->num_vars
)
19223 else if (soughtcode
< memb
->base_fcode
)
19229 static enum insn_code
19230 locate_neon_builtin_icode (int fcode
, neon_itype
*itype
)
19232 neon_builtin_datum key
19233 = { NULL
, (neon_itype
) 0, 0, { CODE_FOR_nothing
}, 0, 0 };
19234 neon_builtin_datum
*found
;
19237 key
.base_fcode
= fcode
;
19238 found
= (neon_builtin_datum
*)
19239 bsearch (&key
, &neon_builtin_data
[0], ARRAY_SIZE (neon_builtin_data
),
19240 sizeof (neon_builtin_data
[0]), neon_builtin_compare
);
19241 gcc_assert (found
);
19242 idx
= fcode
- (int) found
->base_fcode
;
19243 gcc_assert (idx
>= 0 && idx
< T_MAX
&& idx
< (int)found
->num_vars
);
19246 *itype
= found
->itype
;
19248 return found
->codes
[idx
];
19252 NEON_ARG_COPY_TO_REG
,
19257 #define NEON_MAX_BUILTIN_ARGS 5
19259 /* Expand a Neon builtin. */
19261 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
19266 tree arg
[NEON_MAX_BUILTIN_ARGS
];
19267 rtx op
[NEON_MAX_BUILTIN_ARGS
];
19268 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
19269 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
19274 || GET_MODE (target
) != tmode
19275 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
19276 target
= gen_reg_rtx (tmode
);
19278 va_start (ap
, exp
);
19282 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
19284 if (thisarg
== NEON_ARG_STOP
)
19288 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
19289 op
[argc
] = expand_normal (arg
[argc
]);
19290 mode
[argc
] = insn_data
[icode
].operand
[argc
+ have_retval
].mode
;
19294 case NEON_ARG_COPY_TO_REG
:
19295 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
19296 if (!(*insn_data
[icode
].operand
[argc
+ have_retval
].predicate
)
19297 (op
[argc
], mode
[argc
]))
19298 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
19301 case NEON_ARG_CONSTANT
:
19302 /* FIXME: This error message is somewhat unhelpful. */
19303 if (!(*insn_data
[icode
].operand
[argc
+ have_retval
].predicate
)
19304 (op
[argc
], mode
[argc
]))
19305 error ("argument must be a constant");
19308 case NEON_ARG_STOP
:
19309 gcc_unreachable ();
19322 pat
= GEN_FCN (icode
) (target
, op
[0]);
19326 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
19330 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
19334 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
19338 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
19342 gcc_unreachable ();
19348 pat
= GEN_FCN (icode
) (op
[0]);
19352 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
19356 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
19360 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
19364 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
19368 gcc_unreachable ();
19379 /* Expand a Neon builtin. These are "special" because they don't have symbolic
19380 constants defined per-instruction or per instruction-variant. Instead, the
19381 required info is looked up in the table neon_builtin_data. */
19383 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
19386 enum insn_code icode
= locate_neon_builtin_icode (fcode
, &itype
);
19393 return arm_expand_neon_args (target
, icode
, 1, exp
,
19394 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19398 case NEON_SCALARMUL
:
19399 case NEON_SCALARMULL
:
19400 case NEON_SCALARMULH
:
19401 case NEON_SHIFTINSERT
:
19402 case NEON_LOGICBINOP
:
19403 return arm_expand_neon_args (target
, icode
, 1, exp
,
19404 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19408 return arm_expand_neon_args (target
, icode
, 1, exp
,
19409 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19410 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19414 case NEON_SHIFTIMM
:
19415 return arm_expand_neon_args (target
, icode
, 1, exp
,
19416 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
19420 return arm_expand_neon_args (target
, icode
, 1, exp
,
19421 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19425 case NEON_REINTERP
:
19426 return arm_expand_neon_args (target
, icode
, 1, exp
,
19427 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19431 return arm_expand_neon_args (target
, icode
, 1, exp
,
19432 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19434 case NEON_RESULTPAIR
:
19435 return arm_expand_neon_args (target
, icode
, 0, exp
,
19436 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19440 case NEON_LANEMULL
:
19441 case NEON_LANEMULH
:
19442 return arm_expand_neon_args (target
, icode
, 1, exp
,
19443 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19444 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19447 return arm_expand_neon_args (target
, icode
, 1, exp
,
19448 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19449 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19451 case NEON_SHIFTACC
:
19452 return arm_expand_neon_args (target
, icode
, 1, exp
,
19453 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19454 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19456 case NEON_SCALARMAC
:
19457 return arm_expand_neon_args (target
, icode
, 1, exp
,
19458 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19459 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
19463 return arm_expand_neon_args (target
, icode
, 1, exp
,
19464 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
19468 case NEON_LOADSTRUCT
:
19469 return arm_expand_neon_args (target
, icode
, 1, exp
,
19470 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19472 case NEON_LOAD1LANE
:
19473 case NEON_LOADSTRUCTLANE
:
19474 return arm_expand_neon_args (target
, icode
, 1, exp
,
19475 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19479 case NEON_STORESTRUCT
:
19480 return arm_expand_neon_args (target
, icode
, 0, exp
,
19481 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
19483 case NEON_STORE1LANE
:
19484 case NEON_STORESTRUCTLANE
:
19485 return arm_expand_neon_args (target
, icode
, 0, exp
,
19486 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
19490 gcc_unreachable ();
19493 /* Emit code to reinterpret one Neon type as another, without altering bits. */
19495 neon_reinterpret (rtx dest
, rtx src
)
19497 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
19500 /* Emit code to place a Neon pair result in memory locations (with equal
19503 neon_emit_pair_result_insn (enum machine_mode mode
,
19504 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
19507 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
19508 rtx tmp1
= gen_reg_rtx (mode
);
19509 rtx tmp2
= gen_reg_rtx (mode
);
19511 emit_insn (intfn (tmp1
, op1
, tmp2
, op2
));
19513 emit_move_insn (mem
, tmp1
);
19514 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
19515 emit_move_insn (mem
, tmp2
);
19518 /* Set up operands for a register copy from src to dest, taking care not to
19519 clobber registers in the process.
19520 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
19521 be called with a large N, so that should be OK. */
19524 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
19526 unsigned int copied
= 0, opctr
= 0;
19527 unsigned int done
= (1 << count
) - 1;
19530 while (copied
!= done
)
19532 for (i
= 0; i
< count
; i
++)
19536 for (j
= 0; good
&& j
< count
; j
++)
19537 if (i
!= j
&& (copied
& (1 << j
)) == 0
19538 && reg_overlap_mentioned_p (src
[j
], dest
[i
]))
19543 operands
[opctr
++] = dest
[i
];
19544 operands
[opctr
++] = src
[i
];
19550 gcc_assert (opctr
== count
* 2);
19553 /* Expand an expression EXP that calls a built-in function,
19554 with result going to TARGET if that's convenient
19555 (and in mode MODE if that's convenient).
19556 SUBTARGET may be used as the target for computing one of EXP's operands.
19557 IGNORE is nonzero if the value is to be ignored. */
19560 arm_expand_builtin (tree exp
,
19562 rtx subtarget ATTRIBUTE_UNUSED
,
19563 enum machine_mode mode ATTRIBUTE_UNUSED
,
19564 int ignore ATTRIBUTE_UNUSED
)
19566 const struct builtin_description
* d
;
19567 enum insn_code icode
;
19568 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
19576 int fcode
= DECL_FUNCTION_CODE (fndecl
);
19578 enum machine_mode tmode
;
19579 enum machine_mode mode0
;
19580 enum machine_mode mode1
;
19581 enum machine_mode mode2
;
19583 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
19584 return arm_expand_neon_builtin (fcode
, exp
, target
);
19588 case ARM_BUILTIN_TEXTRMSB
:
19589 case ARM_BUILTIN_TEXTRMUB
:
19590 case ARM_BUILTIN_TEXTRMSH
:
19591 case ARM_BUILTIN_TEXTRMUH
:
19592 case ARM_BUILTIN_TEXTRMSW
:
19593 case ARM_BUILTIN_TEXTRMUW
:
19594 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
19595 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
19596 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
19597 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
19598 : CODE_FOR_iwmmxt_textrmw
);
19600 arg0
= CALL_EXPR_ARG (exp
, 0);
19601 arg1
= CALL_EXPR_ARG (exp
, 1);
19602 op0
= expand_normal (arg0
);
19603 op1
= expand_normal (arg1
);
19604 tmode
= insn_data
[icode
].operand
[0].mode
;
19605 mode0
= insn_data
[icode
].operand
[1].mode
;
19606 mode1
= insn_data
[icode
].operand
[2].mode
;
19608 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19609 op0
= copy_to_mode_reg (mode0
, op0
);
19610 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19612 /* @@@ better error message */
19613 error ("selector must be an immediate");
19614 return gen_reg_rtx (tmode
);
19617 || GET_MODE (target
) != tmode
19618 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19619 target
= gen_reg_rtx (tmode
);
19620 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19626 case ARM_BUILTIN_TINSRB
:
19627 case ARM_BUILTIN_TINSRH
:
19628 case ARM_BUILTIN_TINSRW
:
19629 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
19630 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
19631 : CODE_FOR_iwmmxt_tinsrw
);
19632 arg0
= CALL_EXPR_ARG (exp
, 0);
19633 arg1
= CALL_EXPR_ARG (exp
, 1);
19634 arg2
= CALL_EXPR_ARG (exp
, 2);
19635 op0
= expand_normal (arg0
);
19636 op1
= expand_normal (arg1
);
19637 op2
= expand_normal (arg2
);
19638 tmode
= insn_data
[icode
].operand
[0].mode
;
19639 mode0
= insn_data
[icode
].operand
[1].mode
;
19640 mode1
= insn_data
[icode
].operand
[2].mode
;
19641 mode2
= insn_data
[icode
].operand
[3].mode
;
19643 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19644 op0
= copy_to_mode_reg (mode0
, op0
);
19645 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19646 op1
= copy_to_mode_reg (mode1
, op1
);
19647 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
19649 /* @@@ better error message */
19650 error ("selector must be an immediate");
19654 || GET_MODE (target
) != tmode
19655 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19656 target
= gen_reg_rtx (tmode
);
19657 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19663 case ARM_BUILTIN_SETWCX
:
19664 arg0
= CALL_EXPR_ARG (exp
, 0);
19665 arg1
= CALL_EXPR_ARG (exp
, 1);
19666 op0
= force_reg (SImode
, expand_normal (arg0
));
19667 op1
= expand_normal (arg1
);
19668 emit_insn (gen_iwmmxt_tmcr (op1
, op0
));
19671 case ARM_BUILTIN_GETWCX
:
19672 arg0
= CALL_EXPR_ARG (exp
, 0);
19673 op0
= expand_normal (arg0
);
19674 target
= gen_reg_rtx (SImode
);
19675 emit_insn (gen_iwmmxt_tmrc (target
, op0
));
19678 case ARM_BUILTIN_WSHUFH
:
19679 icode
= CODE_FOR_iwmmxt_wshufh
;
19680 arg0
= CALL_EXPR_ARG (exp
, 0);
19681 arg1
= CALL_EXPR_ARG (exp
, 1);
19682 op0
= expand_normal (arg0
);
19683 op1
= expand_normal (arg1
);
19684 tmode
= insn_data
[icode
].operand
[0].mode
;
19685 mode1
= insn_data
[icode
].operand
[1].mode
;
19686 mode2
= insn_data
[icode
].operand
[2].mode
;
19688 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
19689 op0
= copy_to_mode_reg (mode1
, op0
);
19690 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
19692 /* @@@ better error message */
19693 error ("mask must be an immediate");
19697 || GET_MODE (target
) != tmode
19698 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19699 target
= gen_reg_rtx (tmode
);
19700 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
19706 case ARM_BUILTIN_WSADB
:
19707 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb
, exp
, target
);
19708 case ARM_BUILTIN_WSADH
:
19709 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh
, exp
, target
);
19710 case ARM_BUILTIN_WSADBZ
:
19711 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
19712 case ARM_BUILTIN_WSADHZ
:
19713 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
19715 /* Several three-argument builtins. */
19716 case ARM_BUILTIN_WMACS
:
19717 case ARM_BUILTIN_WMACU
:
19718 case ARM_BUILTIN_WALIGN
:
19719 case ARM_BUILTIN_TMIA
:
19720 case ARM_BUILTIN_TMIAPH
:
19721 case ARM_BUILTIN_TMIATT
:
19722 case ARM_BUILTIN_TMIATB
:
19723 case ARM_BUILTIN_TMIABT
:
19724 case ARM_BUILTIN_TMIABB
:
19725 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
19726 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
19727 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
19728 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
19729 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
19730 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
19731 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
19732 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
19733 : CODE_FOR_iwmmxt_walign
);
19734 arg0
= CALL_EXPR_ARG (exp
, 0);
19735 arg1
= CALL_EXPR_ARG (exp
, 1);
19736 arg2
= CALL_EXPR_ARG (exp
, 2);
19737 op0
= expand_normal (arg0
);
19738 op1
= expand_normal (arg1
);
19739 op2
= expand_normal (arg2
);
19740 tmode
= insn_data
[icode
].operand
[0].mode
;
19741 mode0
= insn_data
[icode
].operand
[1].mode
;
19742 mode1
= insn_data
[icode
].operand
[2].mode
;
19743 mode2
= insn_data
[icode
].operand
[3].mode
;
19745 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
19746 op0
= copy_to_mode_reg (mode0
, op0
);
19747 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
19748 op1
= copy_to_mode_reg (mode1
, op1
);
19749 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
19750 op2
= copy_to_mode_reg (mode2
, op2
);
19752 || GET_MODE (target
) != tmode
19753 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
19754 target
= gen_reg_rtx (tmode
);
19755 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
19761 case ARM_BUILTIN_WZERO
:
19762 target
= gen_reg_rtx (DImode
);
19763 emit_insn (gen_iwmmxt_clrdi (target
));
19766 case ARM_BUILTIN_THREAD_POINTER
:
19767 return arm_load_tp (target
);
19773 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
19774 if (d
->code
== (const enum arm_builtins
) fcode
)
19775 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
19777 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
19778 if (d
->code
== (const enum arm_builtins
) fcode
)
19779 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
19781 /* @@@ Should really do something sensible here. */
19785 /* Return the number (counting from 0) of
19786 the least significant set bit in MASK. */
19789 number_of_first_bit_set (unsigned mask
)
19794 (mask
& (1 << bit
)) == 0;
19801 /* Emit code to push or pop registers to or from the stack. F is the
19802 assembly file. MASK is the registers to push or pop. PUSH is
19803 nonzero if we should push, and zero if we should pop. For debugging
19804 output, if pushing, adjust CFA_OFFSET by the amount of space added
19805 to the stack. REAL_REGS should have the same number of bits set as
19806 MASK, and will be used instead (in the same order) to describe which
19807 registers were saved - this is used to mark the save slots when we
19808 push high registers after moving them to low registers. */
19810 thumb_pushpop (FILE *f
, unsigned long mask
, int push
, int *cfa_offset
,
19811 unsigned long real_regs
)
19814 int lo_mask
= mask
& 0xFF;
19815 int pushed_words
= 0;
19819 if (lo_mask
== 0 && !push
&& (mask
& (1 << PC_REGNUM
)))
19821 /* Special case. Do not generate a POP PC statement here, do it in
19823 thumb_exit (f
, -1);
19827 if (push
&& arm_except_unwind_info (&global_options
) == UI_TARGET
)
19829 fprintf (f
, "\t.save\t{");
19830 for (regno
= 0; regno
< 15; regno
++)
19832 if (real_regs
& (1 << regno
))
19834 if (real_regs
& ((1 << regno
) -1))
19836 asm_fprintf (f
, "%r", regno
);
19839 fprintf (f
, "}\n");
19842 fprintf (f
, "\t%s\t{", push
? "push" : "pop");
19844 /* Look at the low registers first. */
19845 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
19849 asm_fprintf (f
, "%r", regno
);
19851 if ((lo_mask
& ~1) != 0)
19858 if (push
&& (mask
& (1 << LR_REGNUM
)))
19860 /* Catch pushing the LR. */
19864 asm_fprintf (f
, "%r", LR_REGNUM
);
19868 else if (!push
&& (mask
& (1 << PC_REGNUM
)))
19870 /* Catch popping the PC. */
19871 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
19872 || crtl
->calls_eh_return
)
19874 /* The PC is never poped directly, instead
19875 it is popped into r3 and then BX is used. */
19876 fprintf (f
, "}\n");
19878 thumb_exit (f
, -1);
19887 asm_fprintf (f
, "%r", PC_REGNUM
);
19891 fprintf (f
, "}\n");
19893 if (push
&& pushed_words
&& dwarf2out_do_frame ())
19895 char *l
= dwarf2out_cfi_label (false);
19896 int pushed_mask
= real_regs
;
19898 *cfa_offset
+= pushed_words
* 4;
19899 dwarf2out_def_cfa (l
, SP_REGNUM
, *cfa_offset
);
19902 pushed_mask
= real_regs
;
19903 for (regno
= 0; regno
<= 14; regno
++, pushed_mask
>>= 1)
19905 if (pushed_mask
& 1)
19906 dwarf2out_reg_save (l
, regno
, 4 * pushed_words
++ - *cfa_offset
);
19911 /* Generate code to return from a thumb function.
19912 If 'reg_containing_return_addr' is -1, then the return address is
19913 actually on the stack, at the stack pointer. */
19915 thumb_exit (FILE *f
, int reg_containing_return_addr
)
19917 unsigned regs_available_for_popping
;
19918 unsigned regs_to_pop
;
19920 unsigned available
;
19924 int restore_a4
= FALSE
;
19926 /* Compute the registers we need to pop. */
19930 if (reg_containing_return_addr
== -1)
19932 regs_to_pop
|= 1 << LR_REGNUM
;
19936 if (TARGET_BACKTRACE
)
19938 /* Restore the (ARM) frame pointer and stack pointer. */
19939 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
19943 /* If there is nothing to pop then just emit the BX instruction and
19945 if (pops_needed
== 0)
19947 if (crtl
->calls_eh_return
)
19948 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
19950 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
19953 /* Otherwise if we are not supporting interworking and we have not created
19954 a backtrace structure and the function was not entered in ARM mode then
19955 just pop the return address straight into the PC. */
19956 else if (!TARGET_INTERWORK
19957 && !TARGET_BACKTRACE
19958 && !is_called_in_ARM_mode (current_function_decl
)
19959 && !crtl
->calls_eh_return
)
19961 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
19965 /* Find out how many of the (return) argument registers we can corrupt. */
19966 regs_available_for_popping
= 0;
19968 /* If returning via __builtin_eh_return, the bottom three registers
19969 all contain information needed for the return. */
19970 if (crtl
->calls_eh_return
)
19974 /* If we can deduce the registers used from the function's
19975 return value. This is more reliable that examining
19976 df_regs_ever_live_p () because that will be set if the register is
19977 ever used in the function, not just if the register is used
19978 to hold a return value. */
19980 if (crtl
->return_rtx
!= 0)
19981 mode
= GET_MODE (crtl
->return_rtx
);
19983 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
19985 size
= GET_MODE_SIZE (mode
);
19989 /* In a void function we can use any argument register.
19990 In a function that returns a structure on the stack
19991 we can use the second and third argument registers. */
19992 if (mode
== VOIDmode
)
19993 regs_available_for_popping
=
19994 (1 << ARG_REGISTER (1))
19995 | (1 << ARG_REGISTER (2))
19996 | (1 << ARG_REGISTER (3));
19998 regs_available_for_popping
=
19999 (1 << ARG_REGISTER (2))
20000 | (1 << ARG_REGISTER (3));
20002 else if (size
<= 4)
20003 regs_available_for_popping
=
20004 (1 << ARG_REGISTER (2))
20005 | (1 << ARG_REGISTER (3));
20006 else if (size
<= 8)
20007 regs_available_for_popping
=
20008 (1 << ARG_REGISTER (3));
20011 /* Match registers to be popped with registers into which we pop them. */
20012 for (available
= regs_available_for_popping
,
20013 required
= regs_to_pop
;
20014 required
!= 0 && available
!= 0;
20015 available
&= ~(available
& - available
),
20016 required
&= ~(required
& - required
))
20019 /* If we have any popping registers left over, remove them. */
20021 regs_available_for_popping
&= ~available
;
20023 /* Otherwise if we need another popping register we can use
20024 the fourth argument register. */
20025 else if (pops_needed
)
20027 /* If we have not found any free argument registers and
20028 reg a4 contains the return address, we must move it. */
20029 if (regs_available_for_popping
== 0
20030 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
20032 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
20033 reg_containing_return_addr
= LR_REGNUM
;
20035 else if (size
> 12)
20037 /* Register a4 is being used to hold part of the return value,
20038 but we have dire need of a free, low register. */
20041 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
20044 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
20046 /* The fourth argument register is available. */
20047 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
20053 /* Pop as many registers as we can. */
20054 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
20055 regs_available_for_popping
);
20057 /* Process the registers we popped. */
20058 if (reg_containing_return_addr
== -1)
20060 /* The return address was popped into the lowest numbered register. */
20061 regs_to_pop
&= ~(1 << LR_REGNUM
);
20063 reg_containing_return_addr
=
20064 number_of_first_bit_set (regs_available_for_popping
);
20066 /* Remove this register for the mask of available registers, so that
20067 the return address will not be corrupted by further pops. */
20068 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
20071 /* If we popped other registers then handle them here. */
20072 if (regs_available_for_popping
)
20076 /* Work out which register currently contains the frame pointer. */
20077 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
20079 /* Move it into the correct place. */
20080 asm_fprintf (f
, "\tmov\t%r, %r\n",
20081 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
20083 /* (Temporarily) remove it from the mask of popped registers. */
20084 regs_available_for_popping
&= ~(1 << frame_pointer
);
20085 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
20087 if (regs_available_for_popping
)
20091 /* We popped the stack pointer as well,
20092 find the register that contains it. */
20093 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
20095 /* Move it into the stack register. */
20096 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
20098 /* At this point we have popped all necessary registers, so
20099 do not worry about restoring regs_available_for_popping
20100 to its correct value:
20102 assert (pops_needed == 0)
20103 assert (regs_available_for_popping == (1 << frame_pointer))
20104 assert (regs_to_pop == (1 << STACK_POINTER)) */
20108 /* Since we have just move the popped value into the frame
20109 pointer, the popping register is available for reuse, and
20110 we know that we still have the stack pointer left to pop. */
20111 regs_available_for_popping
|= (1 << frame_pointer
);
20115 /* If we still have registers left on the stack, but we no longer have
20116 any registers into which we can pop them, then we must move the return
20117 address into the link register and make available the register that
20119 if (regs_available_for_popping
== 0 && pops_needed
> 0)
20121 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
20123 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
20124 reg_containing_return_addr
);
20126 reg_containing_return_addr
= LR_REGNUM
;
20129 /* If we have registers left on the stack then pop some more.
20130 We know that at most we will want to pop FP and SP. */
20131 if (pops_needed
> 0)
20136 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
20137 regs_available_for_popping
);
20139 /* We have popped either FP or SP.
20140 Move whichever one it is into the correct register. */
20141 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
20142 move_to
= number_of_first_bit_set (regs_to_pop
);
20144 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
20146 regs_to_pop
&= ~(1 << move_to
);
20151 /* If we still have not popped everything then we must have only
20152 had one register available to us and we are now popping the SP. */
20153 if (pops_needed
> 0)
20157 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
20158 regs_available_for_popping
);
20160 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
20162 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
20164 assert (regs_to_pop == (1 << STACK_POINTER))
20165 assert (pops_needed == 1)
20169 /* If necessary restore the a4 register. */
20172 if (reg_containing_return_addr
!= LR_REGNUM
)
20174 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
20175 reg_containing_return_addr
= LR_REGNUM
;
20178 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
20181 if (crtl
->calls_eh_return
)
20182 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
20184 /* Return to caller. */
20185 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
20188 /* Scan INSN just before assembler is output for it.
20189 For Thumb-1, we track the status of the condition codes; this
20190 information is used in the cbranchsi4_insn pattern. */
20192 thumb1_final_prescan_insn (rtx insn
)
20194 if (flag_print_asm_name
)
20195 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
20196 INSN_ADDRESSES (INSN_UID (insn
)));
20197 /* Don't overwrite the previous setter when we get to a cbranch. */
20198 if (INSN_CODE (insn
) != CODE_FOR_cbranchsi4_insn
)
20200 enum attr_conds conds
;
20202 if (cfun
->machine
->thumb1_cc_insn
)
20204 if (modified_in_p (cfun
->machine
->thumb1_cc_op0
, insn
)
20205 || modified_in_p (cfun
->machine
->thumb1_cc_op1
, insn
))
20208 conds
= get_attr_conds (insn
);
20209 if (conds
== CONDS_SET
)
20211 rtx set
= single_set (insn
);
20212 cfun
->machine
->thumb1_cc_insn
= insn
;
20213 cfun
->machine
->thumb1_cc_op0
= SET_DEST (set
);
20214 cfun
->machine
->thumb1_cc_op1
= const0_rtx
;
20215 cfun
->machine
->thumb1_cc_mode
= CC_NOOVmode
;
20216 if (INSN_CODE (insn
) == CODE_FOR_thumb1_subsi3_insn
)
20218 rtx src1
= XEXP (SET_SRC (set
), 1);
20219 if (src1
== const0_rtx
)
20220 cfun
->machine
->thumb1_cc_mode
= CCmode
;
20223 else if (conds
!= CONDS_NOCOND
)
20224 cfun
->machine
->thumb1_cc_insn
= NULL_RTX
;
20229 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
20231 unsigned HOST_WIDE_INT mask
= 0xff;
20234 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
20235 if (val
== 0) /* XXX */
20238 for (i
= 0; i
< 25; i
++)
20239 if ((val
& (mask
<< i
)) == val
)
20245 /* Returns nonzero if the current function contains,
20246 or might contain a far jump. */
20248 thumb_far_jump_used_p (void)
20252 /* This test is only important for leaf functions. */
20253 /* assert (!leaf_function_p ()); */
20255 /* If we have already decided that far jumps may be used,
20256 do not bother checking again, and always return true even if
20257 it turns out that they are not being used. Once we have made
20258 the decision that far jumps are present (and that hence the link
20259 register will be pushed onto the stack) we cannot go back on it. */
20260 if (cfun
->machine
->far_jump_used
)
20263 /* If this function is not being called from the prologue/epilogue
20264 generation code then it must be being called from the
20265 INITIAL_ELIMINATION_OFFSET macro. */
20266 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
20268 /* In this case we know that we are being asked about the elimination
20269 of the arg pointer register. If that register is not being used,
20270 then there are no arguments on the stack, and we do not have to
20271 worry that a far jump might force the prologue to push the link
20272 register, changing the stack offsets. In this case we can just
20273 return false, since the presence of far jumps in the function will
20274 not affect stack offsets.
20276 If the arg pointer is live (or if it was live, but has now been
20277 eliminated and so set to dead) then we do have to test to see if
20278 the function might contain a far jump. This test can lead to some
20279 false negatives, since before reload is completed, then length of
20280 branch instructions is not known, so gcc defaults to returning their
20281 longest length, which in turn sets the far jump attribute to true.
20283 A false negative will not result in bad code being generated, but it
20284 will result in a needless push and pop of the link register. We
20285 hope that this does not occur too often.
20287 If we need doubleword stack alignment this could affect the other
20288 elimination offsets so we can't risk getting it wrong. */
20289 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
20290 cfun
->machine
->arg_pointer_live
= 1;
20291 else if (!cfun
->machine
->arg_pointer_live
)
20295 /* Check to see if the function contains a branch
20296 insn with the far jump attribute set. */
20297 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
20299 if (GET_CODE (insn
) == JUMP_INSN
20300 /* Ignore tablejump patterns. */
20301 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
20302 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
20303 && get_attr_far_jump (insn
) == FAR_JUMP_YES
20306 /* Record the fact that we have decided that
20307 the function does use far jumps. */
20308 cfun
->machine
->far_jump_used
= 1;
20316 /* Return nonzero if FUNC must be entered in ARM mode. */
20318 is_called_in_ARM_mode (tree func
)
20320 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
20322 /* Ignore the problem about functions whose address is taken. */
20323 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
20327 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
20333 /* Given the stack offsets and register mask in OFFSETS, decide how
20334 many additional registers to push instead of subtracting a constant
20335 from SP. For epilogues the principle is the same except we use pop.
20336 FOR_PROLOGUE indicates which we're generating. */
20338 thumb1_extra_regs_pushed (arm_stack_offsets
*offsets
, bool for_prologue
)
20340 HOST_WIDE_INT amount
;
20341 unsigned long live_regs_mask
= offsets
->saved_regs_mask
;
20342 /* Extract a mask of the ones we can give to the Thumb's push/pop
20344 unsigned long l_mask
= live_regs_mask
& (for_prologue
? 0x40ff : 0xff);
20345 /* Then count how many other high registers will need to be pushed. */
20346 unsigned long high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20347 int n_free
, reg_base
;
20349 if (!for_prologue
&& frame_pointer_needed
)
20350 amount
= offsets
->locals_base
- offsets
->saved_regs
;
20352 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20354 /* If the stack frame size is 512 exactly, we can save one load
20355 instruction, which should make this a win even when optimizing
20357 if (!optimize_size
&& amount
!= 512)
20360 /* Can't do this if there are high registers to push. */
20361 if (high_regs_pushed
!= 0)
20364 /* Shouldn't do it in the prologue if no registers would normally
20365 be pushed at all. In the epilogue, also allow it if we'll have
20366 a pop insn for the PC. */
20369 || TARGET_BACKTRACE
20370 || (live_regs_mask
& 1 << LR_REGNUM
) == 0
20371 || TARGET_INTERWORK
20372 || crtl
->args
.pretend_args_size
!= 0))
20375 /* Don't do this if thumb_expand_prologue wants to emit instructions
20376 between the push and the stack frame allocation. */
20378 && ((flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20379 || (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)))
20386 reg_base
= arm_size_return_regs () / UNITS_PER_WORD
;
20387 live_regs_mask
>>= reg_base
;
20390 while (reg_base
+ n_free
< 8 && !(live_regs_mask
& 1)
20391 && (for_prologue
|| call_used_regs
[reg_base
+ n_free
]))
20393 live_regs_mask
>>= 1;
20399 gcc_assert (amount
/ 4 * 4 == amount
);
20401 if (amount
>= 512 && (amount
- n_free
* 4) < 512)
20402 return (amount
- 508) / 4;
20403 if (amount
<= n_free
* 4)
20408 /* The bits which aren't usefully expanded as rtl. */
20410 thumb_unexpanded_epilogue (void)
20412 arm_stack_offsets
*offsets
;
20414 unsigned long live_regs_mask
= 0;
20415 int high_regs_pushed
= 0;
20417 int had_to_push_lr
;
20420 if (cfun
->machine
->return_used_this_function
!= 0)
20423 if (IS_NAKED (arm_current_func_type ()))
20426 offsets
= arm_get_frame_offsets ();
20427 live_regs_mask
= offsets
->saved_regs_mask
;
20428 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20430 /* If we can deduce the registers used from the function's return value.
20431 This is more reliable that examining df_regs_ever_live_p () because that
20432 will be set if the register is ever used in the function, not just if
20433 the register is used to hold a return value. */
20434 size
= arm_size_return_regs ();
20436 extra_pop
= thumb1_extra_regs_pushed (offsets
, false);
20439 unsigned long extra_mask
= (1 << extra_pop
) - 1;
20440 live_regs_mask
|= extra_mask
<< (size
/ UNITS_PER_WORD
);
20443 /* The prolog may have pushed some high registers to use as
20444 work registers. e.g. the testsuite file:
20445 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
20446 compiles to produce:
20447 push {r4, r5, r6, r7, lr}
20451 as part of the prolog. We have to undo that pushing here. */
20453 if (high_regs_pushed
)
20455 unsigned long mask
= live_regs_mask
& 0xff;
20458 /* The available low registers depend on the size of the value we are
20466 /* Oh dear! We have no low registers into which we can pop
20469 ("no low registers available for popping high registers");
20471 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
20472 if (live_regs_mask
& (1 << next_hi_reg
))
20475 while (high_regs_pushed
)
20477 /* Find lo register(s) into which the high register(s) can
20479 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
20481 if (mask
& (1 << regno
))
20482 high_regs_pushed
--;
20483 if (high_regs_pushed
== 0)
20487 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
20489 /* Pop the values into the low register(s). */
20490 thumb_pushpop (asm_out_file
, mask
, 0, NULL
, mask
);
20492 /* Move the value(s) into the high registers. */
20493 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
20495 if (mask
& (1 << regno
))
20497 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
20500 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
20501 if (live_regs_mask
& (1 << next_hi_reg
))
20506 live_regs_mask
&= ~0x0f00;
20509 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
20510 live_regs_mask
&= 0xff;
20512 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
20514 /* Pop the return address into the PC. */
20515 if (had_to_push_lr
)
20516 live_regs_mask
|= 1 << PC_REGNUM
;
20518 /* Either no argument registers were pushed or a backtrace
20519 structure was created which includes an adjusted stack
20520 pointer, so just pop everything. */
20521 if (live_regs_mask
)
20522 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
20525 /* We have either just popped the return address into the
20526 PC or it is was kept in LR for the entire function.
20527 Note that thumb_pushpop has already called thumb_exit if the
20528 PC was in the list. */
20529 if (!had_to_push_lr
)
20530 thumb_exit (asm_out_file
, LR_REGNUM
);
20534 /* Pop everything but the return address. */
20535 if (live_regs_mask
)
20536 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
20539 if (had_to_push_lr
)
20543 /* We have no free low regs, so save one. */
20544 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
20548 /* Get the return address into a temporary register. */
20549 thumb_pushpop (asm_out_file
, 1 << LAST_ARG_REGNUM
, 0, NULL
,
20550 1 << LAST_ARG_REGNUM
);
20554 /* Move the return address to lr. */
20555 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
20557 /* Restore the low register. */
20558 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
20563 regno
= LAST_ARG_REGNUM
;
20568 /* Remove the argument registers that were pushed onto the stack. */
20569 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
20570 SP_REGNUM
, SP_REGNUM
,
20571 crtl
->args
.pretend_args_size
);
20573 thumb_exit (asm_out_file
, regno
);
20579 /* Functions to save and restore machine-specific function data. */
20580 static struct machine_function
*
20581 arm_init_machine_status (void)
20583 struct machine_function
*machine
;
20584 machine
= ggc_alloc_cleared_machine_function ();
20586 #if ARM_FT_UNKNOWN != 0
20587 machine
->func_type
= ARM_FT_UNKNOWN
;
20592 /* Return an RTX indicating where the return address to the
20593 calling function can be found. */
20595 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
20600 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
20603 /* Do anything needed before RTL is emitted for each function. */
20605 arm_init_expanders (void)
20607 /* Arrange to initialize and mark the machine per-function status. */
20608 init_machine_status
= arm_init_machine_status
;
20610 /* This is to stop the combine pass optimizing away the alignment
20611 adjustment of va_arg. */
20612 /* ??? It is claimed that this should not be necessary. */
20614 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
20618 /* Like arm_compute_initial_elimination offset. Simpler because there
20619 isn't an ABI specified frame pointer for Thumb. Instead, we set it
20620 to point at the base of the local variables after static stack
20621 space for a function has been allocated. */
20624 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
20626 arm_stack_offsets
*offsets
;
20628 offsets
= arm_get_frame_offsets ();
20632 case ARG_POINTER_REGNUM
:
20635 case STACK_POINTER_REGNUM
:
20636 return offsets
->outgoing_args
- offsets
->saved_args
;
20638 case FRAME_POINTER_REGNUM
:
20639 return offsets
->soft_frame
- offsets
->saved_args
;
20641 case ARM_HARD_FRAME_POINTER_REGNUM
:
20642 return offsets
->saved_regs
- offsets
->saved_args
;
20644 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20645 return offsets
->locals_base
- offsets
->saved_args
;
20648 gcc_unreachable ();
20652 case FRAME_POINTER_REGNUM
:
20655 case STACK_POINTER_REGNUM
:
20656 return offsets
->outgoing_args
- offsets
->soft_frame
;
20658 case ARM_HARD_FRAME_POINTER_REGNUM
:
20659 return offsets
->saved_regs
- offsets
->soft_frame
;
20661 case THUMB_HARD_FRAME_POINTER_REGNUM
:
20662 return offsets
->locals_base
- offsets
->soft_frame
;
20665 gcc_unreachable ();
20670 gcc_unreachable ();
20674 /* Generate the rest of a function's prologue. */
20676 thumb1_expand_prologue (void)
20680 HOST_WIDE_INT amount
;
20681 arm_stack_offsets
*offsets
;
20682 unsigned long func_type
;
20684 unsigned long live_regs_mask
;
20686 func_type
= arm_current_func_type ();
20688 /* Naked functions don't have prologues. */
20689 if (IS_NAKED (func_type
))
20692 if (IS_INTERRUPT (func_type
))
20694 error ("interrupt Service Routines cannot be coded in Thumb mode");
20698 offsets
= arm_get_frame_offsets ();
20699 live_regs_mask
= offsets
->saved_regs_mask
;
20700 /* Load the pic register before setting the frame pointer,
20701 so we can use r7 as a temporary work register. */
20702 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
20703 arm_load_pic_register (live_regs_mask
);
20705 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
20706 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
20707 stack_pointer_rtx
);
20709 if (flag_stack_usage
)
20710 current_function_static_stack_size
20711 = offsets
->outgoing_args
- offsets
->saved_args
;
20713 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20714 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, true);
20719 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20720 GEN_INT (- amount
)));
20721 RTX_FRAME_RELATED_P (insn
) = 1;
20727 /* The stack decrement is too big for an immediate value in a single
20728 insn. In theory we could issue multiple subtracts, but after
20729 three of them it becomes more space efficient to place the full
20730 value in the constant pool and load into a register. (Also the
20731 ARM debugger really likes to see only one stack decrement per
20732 function). So instead we look for a scratch register into which
20733 we can load the decrement, and then we subtract this from the
20734 stack pointer. Unfortunately on the thumb the only available
20735 scratch registers are the argument registers, and we cannot use
20736 these as they may hold arguments to the function. Instead we
20737 attempt to locate a call preserved register which is used by this
20738 function. If we can find one, then we know that it will have
20739 been pushed at the start of the prologue and so we can corrupt
20741 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
20742 if (live_regs_mask
& (1 << regno
))
20745 gcc_assert(regno
<= LAST_LO_REGNUM
);
20747 reg
= gen_rtx_REG (SImode
, regno
);
20749 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
20751 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
20752 stack_pointer_rtx
, reg
));
20753 RTX_FRAME_RELATED_P (insn
) = 1;
20754 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
20755 plus_constant (stack_pointer_rtx
,
20757 RTX_FRAME_RELATED_P (dwarf
) = 1;
20758 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
20762 if (frame_pointer_needed
)
20763 thumb_set_frame_pointer (offsets
);
20765 /* If we are profiling, make sure no instructions are scheduled before
20766 the call to mcount. Similarly if the user has requested no
20767 scheduling in the prolog. Similarly if we want non-call exceptions
20768 using the EABI unwinder, to prevent faulting instructions from being
20769 swapped with a stack adjustment. */
20770 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
20771 || (arm_except_unwind_info (&global_options
) == UI_TARGET
20772 && cfun
->can_throw_non_call_exceptions
))
20773 emit_insn (gen_blockage ());
20775 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
20776 if (live_regs_mask
& 0xff)
20777 cfun
->machine
->lr_save_eliminated
= 0;
20782 thumb1_expand_epilogue (void)
20784 HOST_WIDE_INT amount
;
20785 arm_stack_offsets
*offsets
;
20788 /* Naked functions don't have prologues. */
20789 if (IS_NAKED (arm_current_func_type ()))
20792 offsets
= arm_get_frame_offsets ();
20793 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
20795 if (frame_pointer_needed
)
20797 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
20798 amount
= offsets
->locals_base
- offsets
->saved_regs
;
20800 amount
-= 4 * thumb1_extra_regs_pushed (offsets
, false);
20802 gcc_assert (amount
>= 0);
20806 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
20807 GEN_INT (amount
)));
20810 /* r3 is always free in the epilogue. */
20811 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
20813 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
20814 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
20818 /* Emit a USE (stack_pointer_rtx), so that
20819 the stack adjustment will not be deleted. */
20820 emit_insn (gen_prologue_use (stack_pointer_rtx
));
20822 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
20823 emit_insn (gen_blockage ());
20825 /* Emit a clobber for each insn that will be restored in the epilogue,
20826 so that flow2 will get register lifetimes correct. */
20827 for (regno
= 0; regno
< 13; regno
++)
20828 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
20829 emit_clobber (gen_rtx_REG (SImode
, regno
));
20831 if (! df_regs_ever_live_p (LR_REGNUM
))
20832 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
20836 thumb1_output_function_prologue (FILE *f
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
20838 arm_stack_offsets
*offsets
;
20839 unsigned long live_regs_mask
= 0;
20840 unsigned long l_mask
;
20841 unsigned high_regs_pushed
= 0;
20842 int cfa_offset
= 0;
20845 if (IS_NAKED (arm_current_func_type ()))
20848 if (is_called_in_ARM_mode (current_function_decl
))
20852 gcc_assert (GET_CODE (DECL_RTL (current_function_decl
)) == MEM
);
20853 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
20855 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
20857 /* Generate code sequence to switch us into Thumb mode. */
20858 /* The .code 32 directive has already been emitted by
20859 ASM_DECLARE_FUNCTION_NAME. */
20860 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
20861 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
20863 /* Generate a label, so that the debugger will notice the
20864 change in instruction sets. This label is also used by
20865 the assembler to bypass the ARM code when this function
20866 is called from a Thumb encoded function elsewhere in the
20867 same file. Hence the definition of STUB_NAME here must
20868 agree with the definition in gas/config/tc-arm.c. */
20870 #define STUB_NAME ".real_start_of"
20872 fprintf (f
, "\t.code\t16\n");
20874 if (arm_dllexport_name_p (name
))
20875 name
= arm_strip_name_encoding (name
);
20877 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
20878 fprintf (f
, "\t.thumb_func\n");
20879 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
20882 if (crtl
->args
.pretend_args_size
)
20884 /* Output unwind directive for the stack adjustment. */
20885 if (arm_except_unwind_info (&global_options
) == UI_TARGET
)
20886 fprintf (f
, "\t.pad #%d\n",
20887 crtl
->args
.pretend_args_size
);
20889 if (cfun
->machine
->uses_anonymous_args
)
20893 fprintf (f
, "\tpush\t{");
20895 num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
20897 for (regno
= LAST_ARG_REGNUM
+ 1 - num_pushes
;
20898 regno
<= LAST_ARG_REGNUM
;
20900 asm_fprintf (f
, "%r%s", regno
,
20901 regno
== LAST_ARG_REGNUM
? "" : ", ");
20903 fprintf (f
, "}\n");
20906 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n",
20907 SP_REGNUM
, SP_REGNUM
,
20908 crtl
->args
.pretend_args_size
);
20910 /* We don't need to record the stores for unwinding (would it
20911 help the debugger any if we did?), but record the change in
20912 the stack pointer. */
20913 if (dwarf2out_do_frame ())
20915 char *l
= dwarf2out_cfi_label (false);
20917 cfa_offset
= cfa_offset
+ crtl
->args
.pretend_args_size
;
20918 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
20922 /* Get the registers we are going to push. */
20923 offsets
= arm_get_frame_offsets ();
20924 live_regs_mask
= offsets
->saved_regs_mask
;
20925 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
20926 l_mask
= live_regs_mask
& 0x40ff;
20927 /* Then count how many other high registers will need to be pushed. */
20928 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
20930 if (TARGET_BACKTRACE
)
20933 unsigned work_register
;
20935 /* We have been asked to create a stack backtrace structure.
20936 The code looks like this:
20940 0 sub SP, #16 Reserve space for 4 registers.
20941 2 push {R7} Push low registers.
20942 4 add R7, SP, #20 Get the stack pointer before the push.
20943 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
20944 8 mov R7, PC Get hold of the start of this code plus 12.
20945 10 str R7, [SP, #16] Store it.
20946 12 mov R7, FP Get hold of the current frame pointer.
20947 14 str R7, [SP, #4] Store it.
20948 16 mov R7, LR Get hold of the current return address.
20949 18 str R7, [SP, #12] Store it.
20950 20 add R7, SP, #16 Point at the start of the backtrace structure.
20951 22 mov FP, R7 Put this value into the frame pointer. */
20953 work_register
= thumb_find_work_register (live_regs_mask
);
20955 if (arm_except_unwind_info (&global_options
) == UI_TARGET
)
20956 asm_fprintf (f
, "\t.pad #16\n");
20959 (f
, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
20960 SP_REGNUM
, SP_REGNUM
);
20962 if (dwarf2out_do_frame ())
20964 char *l
= dwarf2out_cfi_label (false);
20966 cfa_offset
= cfa_offset
+ 16;
20967 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
20972 thumb_pushpop (f
, l_mask
, 1, &cfa_offset
, l_mask
);
20973 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
20978 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
20979 offset
+ 16 + crtl
->args
.pretend_args_size
);
20981 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
20984 /* Make sure that the instruction fetching the PC is in the right place
20985 to calculate "start of backtrace creation code + 12". */
20988 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
20989 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
20991 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
20992 ARM_HARD_FRAME_POINTER_REGNUM
);
20993 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
20998 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
20999 ARM_HARD_FRAME_POINTER_REGNUM
);
21000 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21002 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
21003 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21007 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, LR_REGNUM
);
21008 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
21010 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
21012 asm_fprintf (f
, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
21013 ARM_HARD_FRAME_POINTER_REGNUM
, work_register
);
21015 /* Optimization: If we are not pushing any low registers but we are going
21016 to push some high registers then delay our first push. This will just
21017 be a push of LR and we can combine it with the push of the first high
21019 else if ((l_mask
& 0xff) != 0
21020 || (high_regs_pushed
== 0 && l_mask
))
21022 unsigned long mask
= l_mask
;
21023 mask
|= (1 << thumb1_extra_regs_pushed (offsets
, true)) - 1;
21024 thumb_pushpop (f
, mask
, 1, &cfa_offset
, mask
);
21027 if (high_regs_pushed
)
21029 unsigned pushable_regs
;
21030 unsigned next_hi_reg
;
21032 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
21033 if (live_regs_mask
& (1 << next_hi_reg
))
21036 pushable_regs
= l_mask
& 0xff;
21038 if (pushable_regs
== 0)
21039 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
21041 while (high_regs_pushed
> 0)
21043 unsigned long real_regs_mask
= 0;
21045 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
21047 if (pushable_regs
& (1 << regno
))
21049 asm_fprintf (f
, "\tmov\t%r, %r\n", regno
, next_hi_reg
);
21051 high_regs_pushed
--;
21052 real_regs_mask
|= (1 << next_hi_reg
);
21054 if (high_regs_pushed
)
21056 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
21058 if (live_regs_mask
& (1 << next_hi_reg
))
21063 pushable_regs
&= ~((1 << regno
) - 1);
21069 /* If we had to find a work register and we have not yet
21070 saved the LR then add it to the list of regs to push. */
21071 if (l_mask
== (1 << LR_REGNUM
))
21073 thumb_pushpop (f
, pushable_regs
| (1 << LR_REGNUM
),
21075 real_regs_mask
| (1 << LR_REGNUM
));
21079 thumb_pushpop (f
, pushable_regs
, 1, &cfa_offset
, real_regs_mask
);
21084 /* Handle the case of a double word load into a low register from
21085 a computed memory address. The computed address may involve a
21086 register which is overwritten by the load. */
21088 thumb_load_double_from_address (rtx
*operands
)
21096 gcc_assert (GET_CODE (operands
[0]) == REG
);
21097 gcc_assert (GET_CODE (operands
[1]) == MEM
);
21099 /* Get the memory address. */
21100 addr
= XEXP (operands
[1], 0);
21102 /* Work out how the memory address is computed. */
21103 switch (GET_CODE (addr
))
21106 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21108 if (REGNO (operands
[0]) == REGNO (addr
))
21110 output_asm_insn ("ldr\t%H0, %2", operands
);
21111 output_asm_insn ("ldr\t%0, %1", operands
);
21115 output_asm_insn ("ldr\t%0, %1", operands
);
21116 output_asm_insn ("ldr\t%H0, %2", operands
);
21121 /* Compute <address> + 4 for the high order load. */
21122 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21124 output_asm_insn ("ldr\t%0, %1", operands
);
21125 output_asm_insn ("ldr\t%H0, %2", operands
);
21129 arg1
= XEXP (addr
, 0);
21130 arg2
= XEXP (addr
, 1);
21132 if (CONSTANT_P (arg1
))
21133 base
= arg2
, offset
= arg1
;
21135 base
= arg1
, offset
= arg2
;
21137 gcc_assert (GET_CODE (base
) == REG
);
21139 /* Catch the case of <address> = <reg> + <reg> */
21140 if (GET_CODE (offset
) == REG
)
21142 int reg_offset
= REGNO (offset
);
21143 int reg_base
= REGNO (base
);
21144 int reg_dest
= REGNO (operands
[0]);
21146 /* Add the base and offset registers together into the
21147 higher destination register. */
21148 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
21149 reg_dest
+ 1, reg_base
, reg_offset
);
21151 /* Load the lower destination register from the address in
21152 the higher destination register. */
21153 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
21154 reg_dest
, reg_dest
+ 1);
21156 /* Load the higher destination register from its own address
21158 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
21159 reg_dest
+ 1, reg_dest
+ 1);
21163 /* Compute <address> + 4 for the high order load. */
21164 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21166 /* If the computed address is held in the low order register
21167 then load the high order register first, otherwise always
21168 load the low order register first. */
21169 if (REGNO (operands
[0]) == REGNO (base
))
21171 output_asm_insn ("ldr\t%H0, %2", operands
);
21172 output_asm_insn ("ldr\t%0, %1", operands
);
21176 output_asm_insn ("ldr\t%0, %1", operands
);
21177 output_asm_insn ("ldr\t%H0, %2", operands
);
21183 /* With no registers to worry about we can just load the value
21185 operands
[2] = adjust_address (operands
[1], SImode
, 4);
21187 output_asm_insn ("ldr\t%H0, %2", operands
);
21188 output_asm_insn ("ldr\t%0, %1", operands
);
21192 gcc_unreachable ();
21199 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
21206 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21209 operands
[4] = operands
[5];
21212 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
21213 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
21217 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21220 operands
[4] = operands
[5];
21223 if (REGNO (operands
[5]) > REGNO (operands
[6]))
21226 operands
[5] = operands
[6];
21229 if (REGNO (operands
[4]) > REGNO (operands
[5]))
21232 operands
[4] = operands
[5];
21236 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
21237 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
21241 gcc_unreachable ();
21247 /* Output a call-via instruction for thumb state. */
21249 thumb_call_via_reg (rtx reg
)
21251 int regno
= REGNO (reg
);
21254 gcc_assert (regno
< LR_REGNUM
);
21256 /* If we are in the normal text section we can use a single instance
21257 per compilation unit. If we are doing function sections, then we need
21258 an entry per section, since we can't rely on reachability. */
21259 if (in_section
== text_section
)
21261 thumb_call_reg_needed
= 1;
21263 if (thumb_call_via_label
[regno
] == NULL
)
21264 thumb_call_via_label
[regno
] = gen_label_rtx ();
21265 labelp
= thumb_call_via_label
+ regno
;
21269 if (cfun
->machine
->call_via
[regno
] == NULL
)
21270 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
21271 labelp
= cfun
->machine
->call_via
+ regno
;
21274 output_asm_insn ("bl\t%a0", labelp
);
21278 /* Routines for generating rtl. */
21280 thumb_expand_movmemqi (rtx
*operands
)
21282 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
21283 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
21284 HOST_WIDE_INT len
= INTVAL (operands
[2]);
21285 HOST_WIDE_INT offset
= 0;
21289 emit_insn (gen_movmem12b (out
, in
, out
, in
));
21295 emit_insn (gen_movmem8b (out
, in
, out
, in
));
21301 rtx reg
= gen_reg_rtx (SImode
);
21302 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
21303 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
21310 rtx reg
= gen_reg_rtx (HImode
);
21311 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
21312 plus_constant (in
, offset
))));
21313 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (out
, offset
)),
21321 rtx reg
= gen_reg_rtx (QImode
);
21322 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
21323 plus_constant (in
, offset
))));
21324 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (out
, offset
)),
21330 thumb_reload_out_hi (rtx
*operands
)
21332 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
21335 /* Handle reading a half-word from memory during reload. */
21337 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
21339 gcc_unreachable ();
21342 /* Return the length of a function name prefix
21343 that starts with the character 'c'. */
21345 arm_get_strip_length (int c
)
21349 ARM_NAME_ENCODING_LENGTHS
21354 /* Return a pointer to a function's name with any
21355 and all prefix encodings stripped from it. */
21357 arm_strip_name_encoding (const char *name
)
21361 while ((skip
= arm_get_strip_length (* name
)))
21367 /* If there is a '*' anywhere in the name's prefix, then
21368 emit the stripped name verbatim, otherwise prepend an
21369 underscore if leading underscores are being used. */
21371 arm_asm_output_labelref (FILE *stream
, const char *name
)
21376 while ((skip
= arm_get_strip_length (* name
)))
21378 verbatim
|= (*name
== '*');
21383 fputs (name
, stream
);
21385 asm_fprintf (stream
, "%U%s", name
);
21389 arm_file_start (void)
21393 if (TARGET_UNIFIED_ASM
)
21394 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
21398 const char *fpu_name
;
21399 if (arm_selected_arch
)
21400 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_selected_arch
->name
);
21402 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_selected_cpu
->name
);
21404 if (TARGET_SOFT_FLOAT
)
21407 fpu_name
= "softvfp";
21409 fpu_name
= "softfpa";
21413 fpu_name
= arm_fpu_desc
->name
;
21414 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
21416 if (TARGET_HARD_FLOAT
)
21417 asm_fprintf (asm_out_file
, "\t.eabi_attribute 27, 3\n");
21418 if (TARGET_HARD_FLOAT_ABI
)
21419 asm_fprintf (asm_out_file
, "\t.eabi_attribute 28, 1\n");
21422 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
21424 /* Some of these attributes only apply when the corresponding features
21425 are used. However we don't have any easy way of figuring this out.
21426 Conservatively record the setting that would have been used. */
21428 /* Tag_ABI_FP_rounding. */
21429 if (flag_rounding_math
)
21430 asm_fprintf (asm_out_file
, "\t.eabi_attribute 19, 1\n");
21431 if (!flag_unsafe_math_optimizations
)
21433 /* Tag_ABI_FP_denomal. */
21434 asm_fprintf (asm_out_file
, "\t.eabi_attribute 20, 1\n");
21435 /* Tag_ABI_FP_exceptions. */
21436 asm_fprintf (asm_out_file
, "\t.eabi_attribute 21, 1\n");
21438 /* Tag_ABI_FP_user_exceptions. */
21439 if (flag_signaling_nans
)
21440 asm_fprintf (asm_out_file
, "\t.eabi_attribute 22, 1\n");
21441 /* Tag_ABI_FP_number_model. */
21442 asm_fprintf (asm_out_file
, "\t.eabi_attribute 23, %d\n",
21443 flag_finite_math_only
? 1 : 3);
21445 /* Tag_ABI_align8_needed. */
21446 asm_fprintf (asm_out_file
, "\t.eabi_attribute 24, 1\n");
21447 /* Tag_ABI_align8_preserved. */
21448 asm_fprintf (asm_out_file
, "\t.eabi_attribute 25, 1\n");
21449 /* Tag_ABI_enum_size. */
21450 asm_fprintf (asm_out_file
, "\t.eabi_attribute 26, %d\n",
21451 flag_short_enums
? 1 : 2);
21453 /* Tag_ABI_optimization_goals. */
21456 else if (optimize
>= 2)
21462 asm_fprintf (asm_out_file
, "\t.eabi_attribute 30, %d\n", val
);
21464 /* Tag_ABI_FP_16bit_format. */
21465 if (arm_fp16_format
)
21466 asm_fprintf (asm_out_file
, "\t.eabi_attribute 38, %d\n",
21467 (int)arm_fp16_format
);
21469 if (arm_lang_output_object_attributes_hook
)
21470 arm_lang_output_object_attributes_hook();
21472 default_file_start();
21476 arm_file_end (void)
21480 if (NEED_INDICATE_EXEC_STACK
)
21481 /* Add .note.GNU-stack. */
21482 file_end_indicate_exec_stack ();
21484 if (! thumb_call_reg_needed
)
21487 switch_to_section (text_section
);
21488 asm_fprintf (asm_out_file
, "\t.code 16\n");
21489 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
21491 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
21493 rtx label
= thumb_call_via_label
[regno
];
21497 targetm
.asm_out
.internal_label (asm_out_file
, "L",
21498 CODE_LABEL_NUMBER (label
));
21499 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
21505 /* Symbols in the text segment can be accessed without indirecting via the
21506 constant pool; it may take an extra binary operation, but this is still
21507 faster than indirecting via memory. Don't do this when not optimizing,
21508 since we won't be calculating al of the offsets necessary to do this
21512 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
21514 if (optimize
> 0 && TREE_CONSTANT (decl
))
21515 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
21517 default_encode_section_info (decl
, rtl
, first
);
21519 #endif /* !ARM_PE */
21522 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
21524 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
21525 && !strcmp (prefix
, "L"))
21527 arm_ccfsm_state
= 0;
21528 arm_target_insn
= NULL
;
21530 default_internal_label (stream
, prefix
, labelno
);
21533 /* Output code to add DELTA to the first argument, and then jump
21534 to FUNCTION. Used for C++ multiple inheritance. */
21536 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
21537 HOST_WIDE_INT delta
,
21538 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
21541 static int thunk_label
= 0;
21544 int mi_delta
= delta
;
21545 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
21547 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
21550 mi_delta
= - mi_delta
;
21554 int labelno
= thunk_label
++;
21555 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
21556 /* Thunks are entered in arm mode when avaiable. */
21557 if (TARGET_THUMB1_ONLY
)
21559 /* push r3 so we can use it as a temporary. */
21560 /* TODO: Omit this save if r3 is not used. */
21561 fputs ("\tpush {r3}\n", file
);
21562 fputs ("\tldr\tr3, ", file
);
21566 fputs ("\tldr\tr12, ", file
);
21568 assemble_name (file
, label
);
21569 fputc ('\n', file
);
21572 /* If we are generating PIC, the ldr instruction below loads
21573 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
21574 the address of the add + 8, so we have:
21576 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
21579 Note that we have "+ 1" because some versions of GNU ld
21580 don't set the low bit of the result for R_ARM_REL32
21581 relocations against thumb function symbols.
21582 On ARMv6M this is +4, not +8. */
21583 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
21584 assemble_name (file
, labelpc
);
21585 fputs (":\n", file
);
21586 if (TARGET_THUMB1_ONLY
)
21588 /* This is 2 insns after the start of the thunk, so we know it
21589 is 4-byte aligned. */
21590 fputs ("\tadd\tr3, pc, r3\n", file
);
21591 fputs ("\tmov r12, r3\n", file
);
21594 fputs ("\tadd\tr12, pc, r12\n", file
);
21596 else if (TARGET_THUMB1_ONLY
)
21597 fputs ("\tmov r12, r3\n", file
);
21599 if (TARGET_THUMB1_ONLY
)
21601 if (mi_delta
> 255)
21603 fputs ("\tldr\tr3, ", file
);
21604 assemble_name (file
, label
);
21605 fputs ("+4\n", file
);
21606 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
21607 mi_op
, this_regno
, this_regno
);
21609 else if (mi_delta
!= 0)
21611 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
21612 mi_op
, this_regno
, this_regno
,
21618 /* TODO: Use movw/movt for large constants when available. */
21619 while (mi_delta
!= 0)
21621 if ((mi_delta
& (3 << shift
)) == 0)
21625 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
21626 mi_op
, this_regno
, this_regno
,
21627 mi_delta
& (0xff << shift
));
21628 mi_delta
&= ~(0xff << shift
);
21635 if (TARGET_THUMB1_ONLY
)
21636 fputs ("\tpop\t{r3}\n", file
);
21638 fprintf (file
, "\tbx\tr12\n");
21639 ASM_OUTPUT_ALIGN (file
, 2);
21640 assemble_name (file
, label
);
21641 fputs (":\n", file
);
21644 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
21645 rtx tem
= XEXP (DECL_RTL (function
), 0);
21646 tem
= gen_rtx_PLUS (GET_MODE (tem
), tem
, GEN_INT (-7));
21647 tem
= gen_rtx_MINUS (GET_MODE (tem
),
21649 gen_rtx_SYMBOL_REF (Pmode
,
21650 ggc_strdup (labelpc
)));
21651 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
21654 /* Output ".word .LTHUNKn". */
21655 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
21657 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
21658 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
21662 fputs ("\tb\t", file
);
21663 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
21664 if (NEED_PLT_RELOC
)
21665 fputs ("(PLT)", file
);
21666 fputc ('\n', file
);
21671 arm_emit_vector_const (FILE *file
, rtx x
)
21674 const char * pattern
;
21676 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
21678 switch (GET_MODE (x
))
21680 case V2SImode
: pattern
= "%08x"; break;
21681 case V4HImode
: pattern
= "%04x"; break;
21682 case V8QImode
: pattern
= "%02x"; break;
21683 default: gcc_unreachable ();
21686 fprintf (file
, "0x");
21687 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
21691 element
= CONST_VECTOR_ELT (x
, i
);
21692 fprintf (file
, pattern
, INTVAL (element
));
21698 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
21699 HFmode constant pool entries are actually loaded with ldr. */
21701 arm_emit_fp16_const (rtx c
)
21706 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
21707 bits
= real_to_target (NULL
, &r
, HFmode
);
21708 if (WORDS_BIG_ENDIAN
)
21709 assemble_zeros (2);
21710 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
21711 if (!WORDS_BIG_ENDIAN
)
21712 assemble_zeros (2);
21716 arm_output_load_gr (rtx
*operands
)
21723 if (GET_CODE (operands
[1]) != MEM
21724 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
21725 || GET_CODE (reg
= XEXP (sum
, 0)) != REG
21726 || GET_CODE (offset
= XEXP (sum
, 1)) != CONST_INT
21727 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
21728 return "wldrw%?\t%0, %1";
21730 /* Fix up an out-of-range load of a GR register. */
21731 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
21732 wcgr
= operands
[0];
21734 output_asm_insn ("ldr%?\t%0, %1", operands
);
21736 operands
[0] = wcgr
;
21738 output_asm_insn ("tmcr%?\t%0, %1", operands
);
21739 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
21744 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
21746 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
21747 named arg and all anonymous args onto the stack.
21748 XXX I know the prologue shouldn't be pushing registers, but it is faster
21752 arm_setup_incoming_varargs (CUMULATIVE_ARGS
*pcum
,
21753 enum machine_mode mode
,
21756 int second_time ATTRIBUTE_UNUSED
)
21760 cfun
->machine
->uses_anonymous_args
= 1;
21761 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
21763 nregs
= pcum
->aapcs_ncrn
;
21764 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
21768 nregs
= pcum
->nregs
;
21770 if (nregs
< NUM_ARG_REGS
)
21771 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
21774 /* Return nonzero if the CONSUMER instruction (a store) does not need
21775 PRODUCER's value to calculate the address. */
21778 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
21780 rtx value
= PATTERN (producer
);
21781 rtx addr
= PATTERN (consumer
);
21783 if (GET_CODE (value
) == COND_EXEC
)
21784 value
= COND_EXEC_CODE (value
);
21785 if (GET_CODE (value
) == PARALLEL
)
21786 value
= XVECEXP (value
, 0, 0);
21787 value
= XEXP (value
, 0);
21788 if (GET_CODE (addr
) == COND_EXEC
)
21789 addr
= COND_EXEC_CODE (addr
);
21790 if (GET_CODE (addr
) == PARALLEL
)
21791 addr
= XVECEXP (addr
, 0, 0);
21792 addr
= XEXP (addr
, 0);
21794 return !reg_overlap_mentioned_p (value
, addr
);
21797 /* Return nonzero if the CONSUMER instruction (a store) does need
21798 PRODUCER's value to calculate the address. */
21801 arm_early_store_addr_dep (rtx producer
, rtx consumer
)
21803 return !arm_no_early_store_addr_dep (producer
, consumer
);
21806 /* Return nonzero if the CONSUMER instruction (a load) does need
21807 PRODUCER's value to calculate the address. */
21810 arm_early_load_addr_dep (rtx producer
, rtx consumer
)
21812 rtx value
= PATTERN (producer
);
21813 rtx addr
= PATTERN (consumer
);
21815 if (GET_CODE (value
) == COND_EXEC
)
21816 value
= COND_EXEC_CODE (value
);
21817 if (GET_CODE (value
) == PARALLEL
)
21818 value
= XVECEXP (value
, 0, 0);
21819 value
= XEXP (value
, 0);
21820 if (GET_CODE (addr
) == COND_EXEC
)
21821 addr
= COND_EXEC_CODE (addr
);
21822 if (GET_CODE (addr
) == PARALLEL
)
21823 addr
= XVECEXP (addr
, 0, 0);
21824 addr
= XEXP (addr
, 1);
21826 return reg_overlap_mentioned_p (value
, addr
);
21829 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21830 have an early register shift value or amount dependency on the
21831 result of PRODUCER. */
21834 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
21836 rtx value
= PATTERN (producer
);
21837 rtx op
= PATTERN (consumer
);
21840 if (GET_CODE (value
) == COND_EXEC
)
21841 value
= COND_EXEC_CODE (value
);
21842 if (GET_CODE (value
) == PARALLEL
)
21843 value
= XVECEXP (value
, 0, 0);
21844 value
= XEXP (value
, 0);
21845 if (GET_CODE (op
) == COND_EXEC
)
21846 op
= COND_EXEC_CODE (op
);
21847 if (GET_CODE (op
) == PARALLEL
)
21848 op
= XVECEXP (op
, 0, 0);
21851 early_op
= XEXP (op
, 0);
21852 /* This is either an actual independent shift, or a shift applied to
21853 the first operand of another operation. We want the whole shift
21855 if (GET_CODE (early_op
) == REG
)
21858 return !reg_overlap_mentioned_p (value
, early_op
);
21861 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
21862 have an early register shift value dependency on the result of
21866 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
21868 rtx value
= PATTERN (producer
);
21869 rtx op
= PATTERN (consumer
);
21872 if (GET_CODE (value
) == COND_EXEC
)
21873 value
= COND_EXEC_CODE (value
);
21874 if (GET_CODE (value
) == PARALLEL
)
21875 value
= XVECEXP (value
, 0, 0);
21876 value
= XEXP (value
, 0);
21877 if (GET_CODE (op
) == COND_EXEC
)
21878 op
= COND_EXEC_CODE (op
);
21879 if (GET_CODE (op
) == PARALLEL
)
21880 op
= XVECEXP (op
, 0, 0);
21883 early_op
= XEXP (op
, 0);
21885 /* This is either an actual independent shift, or a shift applied to
21886 the first operand of another operation. We want the value being
21887 shifted, in either case. */
21888 if (GET_CODE (early_op
) != REG
)
21889 early_op
= XEXP (early_op
, 0);
21891 return !reg_overlap_mentioned_p (value
, early_op
);
21894 /* Return nonzero if the CONSUMER (a mul or mac op) does not
21895 have an early register mult dependency on the result of
21899 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
21901 rtx value
= PATTERN (producer
);
21902 rtx op
= PATTERN (consumer
);
21904 if (GET_CODE (value
) == COND_EXEC
)
21905 value
= COND_EXEC_CODE (value
);
21906 if (GET_CODE (value
) == PARALLEL
)
21907 value
= XVECEXP (value
, 0, 0);
21908 value
= XEXP (value
, 0);
21909 if (GET_CODE (op
) == COND_EXEC
)
21910 op
= COND_EXEC_CODE (op
);
21911 if (GET_CODE (op
) == PARALLEL
)
21912 op
= XVECEXP (op
, 0, 0);
21915 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
21917 if (GET_CODE (XEXP (op
, 0)) == MULT
)
21918 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
21920 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
21926 /* We can't rely on the caller doing the proper promotion when
21927 using APCS or ATPCS. */
21930 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
21932 return !TARGET_AAPCS_BASED
;
21935 static enum machine_mode
21936 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
21937 enum machine_mode mode
,
21938 int *punsignedp ATTRIBUTE_UNUSED
,
21939 const_tree fntype ATTRIBUTE_UNUSED
,
21940 int for_return ATTRIBUTE_UNUSED
)
21942 if (GET_MODE_CLASS (mode
) == MODE_INT
21943 && GET_MODE_SIZE (mode
) < 4)
21949 /* AAPCS based ABIs use short enums by default. */
21952 arm_default_short_enums (void)
21954 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
21958 /* AAPCS requires that anonymous bitfields affect structure alignment. */
21961 arm_align_anon_bitfield (void)
21963 return TARGET_AAPCS_BASED
;
21967 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
21970 arm_cxx_guard_type (void)
21972 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
21975 /* Return non-zero if the consumer (a multiply-accumulate instruction)
21976 has an accumulator dependency on the result of the producer (a
21977 multiplication instruction) and no other dependency on that result. */
21979 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
21981 rtx mul
= PATTERN (producer
);
21982 rtx mac
= PATTERN (consumer
);
21984 rtx mac_op0
, mac_op1
, mac_acc
;
21986 if (GET_CODE (mul
) == COND_EXEC
)
21987 mul
= COND_EXEC_CODE (mul
);
21988 if (GET_CODE (mac
) == COND_EXEC
)
21989 mac
= COND_EXEC_CODE (mac
);
21991 /* Check that mul is of the form (set (...) (mult ...))
21992 and mla is of the form (set (...) (plus (mult ...) (...))). */
21993 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
21994 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
21995 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
21998 mul_result
= XEXP (mul
, 0);
21999 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
22000 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
22001 mac_acc
= XEXP (XEXP (mac
, 1), 1);
22003 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
22004 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
22005 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));
22009 /* The EABI says test the least significant bit of a guard variable. */
22012 arm_cxx_guard_mask_bit (void)
22014 return TARGET_AAPCS_BASED
;
22018 /* The EABI specifies that all array cookies are 8 bytes long. */
22021 arm_get_cookie_size (tree type
)
22025 if (!TARGET_AAPCS_BASED
)
22026 return default_cxx_get_cookie_size (type
);
22028 size
= build_int_cst (sizetype
, 8);
22033 /* The EABI says that array cookies should also contain the element size. */
22036 arm_cookie_has_size (void)
22038 return TARGET_AAPCS_BASED
;
22042 /* The EABI says constructors and destructors should return a pointer to
22043 the object constructed/destroyed. */
22046 arm_cxx_cdtor_returns_this (void)
22048 return TARGET_AAPCS_BASED
;
22051 /* The EABI says that an inline function may never be the key
22055 arm_cxx_key_method_may_be_inline (void)
22057 return !TARGET_AAPCS_BASED
;
22061 arm_cxx_determine_class_data_visibility (tree decl
)
22063 if (!TARGET_AAPCS_BASED
22064 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
22067 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
22068 is exported. However, on systems without dynamic vague linkage,
22069 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
22070 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
22071 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
22073 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
22074 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
22078 arm_cxx_class_data_always_comdat (void)
22080 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
22081 vague linkage if the class has no key function. */
22082 return !TARGET_AAPCS_BASED
;
22086 /* The EABI says __aeabi_atexit should be used to register static
22090 arm_cxx_use_aeabi_atexit (void)
22092 return TARGET_AAPCS_BASED
;
22097 arm_set_return_address (rtx source
, rtx scratch
)
22099 arm_stack_offsets
*offsets
;
22100 HOST_WIDE_INT delta
;
22102 unsigned long saved_regs
;
22104 offsets
= arm_get_frame_offsets ();
22105 saved_regs
= offsets
->saved_regs_mask
;
22107 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
22108 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
22111 if (frame_pointer_needed
)
22112 addr
= plus_constant(hard_frame_pointer_rtx
, -4);
22115 /* LR will be the first saved register. */
22116 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
22121 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
22122 GEN_INT (delta
& ~4095)));
22127 addr
= stack_pointer_rtx
;
22129 addr
= plus_constant (addr
, delta
);
22131 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
22137 thumb_set_return_address (rtx source
, rtx scratch
)
22139 arm_stack_offsets
*offsets
;
22140 HOST_WIDE_INT delta
;
22141 HOST_WIDE_INT limit
;
22144 unsigned long mask
;
22148 offsets
= arm_get_frame_offsets ();
22149 mask
= offsets
->saved_regs_mask
;
22150 if (mask
& (1 << LR_REGNUM
))
22153 /* Find the saved regs. */
22154 if (frame_pointer_needed
)
22156 delta
= offsets
->soft_frame
- offsets
->saved_args
;
22157 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
22163 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
22166 /* Allow for the stack frame. */
22167 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
22169 /* The link register is always the first saved register. */
22172 /* Construct the address. */
22173 addr
= gen_rtx_REG (SImode
, reg
);
22176 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
22177 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
22181 addr
= plus_constant (addr
, delta
);
22183 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
22186 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
22189 /* Implements target hook vector_mode_supported_p. */
22191 arm_vector_mode_supported_p (enum machine_mode mode
)
22193 /* Neon also supports V2SImode, etc. listed in the clause below. */
22194 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
22195 || mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
22198 if ((TARGET_NEON
|| TARGET_IWMMXT
)
22199 && ((mode
== V2SImode
)
22200 || (mode
== V4HImode
)
22201 || (mode
== V8QImode
)))
22207 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22208 registers when autovectorizing for Neon, at least until multiple vector
22209 widths are supported properly by the middle-end. */
22211 static enum machine_mode
22212 arm_preferred_simd_mode (enum machine_mode mode
)
22218 return TARGET_NEON_VECTORIZE_QUAD
? V4SFmode
: V2SFmode
;
22220 return TARGET_NEON_VECTORIZE_QUAD
? V4SImode
: V2SImode
;
22222 return TARGET_NEON_VECTORIZE_QUAD
? V8HImode
: V4HImode
;
22224 return TARGET_NEON_VECTORIZE_QUAD
? V16QImode
: V8QImode
;
22226 if (TARGET_NEON_VECTORIZE_QUAD
)
22233 if (TARGET_REALLY_IWMMXT
)
22249 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22251 We need to define this for LO_REGS on thumb. Otherwise we can end up
22252 using r0-r4 for function arguments, r7 for the stack frame and don't
22253 have enough left over to do doubleword arithmetic. */
22256 arm_class_likely_spilled_p (reg_class_t rclass
)
22258 if ((TARGET_THUMB
&& rclass
== LO_REGS
)
22259 || rclass
== CC_REG
)
22265 /* Implements target hook small_register_classes_for_mode_p. */
22267 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED
)
22269 return TARGET_THUMB1
;
22272 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
22273 ARM insns and therefore guarantee that the shift count is modulo 256.
22274 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
22275 guarantee no particular behavior for out-of-range counts. */
22277 static unsigned HOST_WIDE_INT
22278 arm_shift_truncation_mask (enum machine_mode mode
)
22280 return mode
== SImode
? 255 : 0;
22284 /* Map internal gcc register numbers to DWARF2 register numbers. */
22287 arm_dbx_register_number (unsigned int regno
)
22292 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
22293 compatibility. The EABI defines them as registers 96-103. */
22294 if (IS_FPA_REGNUM (regno
))
22295 return (TARGET_AAPCS_BASED
? 96 : 16) + regno
- FIRST_FPA_REGNUM
;
22297 if (IS_VFP_REGNUM (regno
))
22299 /* See comment in arm_dwarf_register_span. */
22300 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
22301 return 64 + regno
- FIRST_VFP_REGNUM
;
22303 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
22306 if (IS_IWMMXT_GR_REGNUM (regno
))
22307 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
22309 if (IS_IWMMXT_REGNUM (regno
))
22310 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
22312 gcc_unreachable ();
22315 /* Dwarf models VFPv3 registers as 32 64-bit registers.
22316 GCC models tham as 64 32-bit registers, so we need to describe this to
22317 the DWARF generation code. Other registers can use the default. */
22319 arm_dwarf_register_span (rtx rtl
)
22326 regno
= REGNO (rtl
);
22327 if (!IS_VFP_REGNUM (regno
))
22330 /* XXX FIXME: The EABI defines two VFP register ranges:
22331 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
22333 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
22334 corresponding D register. Until GDB supports this, we shall use the
22335 legacy encodings. We also use these encodings for D0-D15 for
22336 compatibility with older debuggers. */
22337 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
22340 nregs
= GET_MODE_SIZE (GET_MODE (rtl
)) / 8;
22341 p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nregs
));
22342 regno
= (regno
- FIRST_VFP_REGNUM
) / 2;
22343 for (i
= 0; i
< nregs
; i
++)
22344 XVECEXP (p
, 0, i
) = gen_rtx_REG (DImode
, 256 + regno
+ i
);
22349 #if ARM_UNWIND_INFO
22350 /* Emit unwind directives for a store-multiple instruction or stack pointer
22351 push during alignment.
22352 These should only ever be generated by the function prologue code, so
22353 expect them to have a particular form. */
22356 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
22359 HOST_WIDE_INT offset
;
22360 HOST_WIDE_INT nregs
;
22366 e
= XVECEXP (p
, 0, 0);
22367 if (GET_CODE (e
) != SET
)
22370 /* First insn will adjust the stack pointer. */
22371 if (GET_CODE (e
) != SET
22372 || GET_CODE (XEXP (e
, 0)) != REG
22373 || REGNO (XEXP (e
, 0)) != SP_REGNUM
22374 || GET_CODE (XEXP (e
, 1)) != PLUS
)
22377 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
22378 nregs
= XVECLEN (p
, 0) - 1;
22380 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
22383 /* The function prologue may also push pc, but not annotate it as it is
22384 never restored. We turn this into a stack pointer adjustment. */
22385 if (nregs
* 4 == offset
- 4)
22387 fprintf (asm_out_file
, "\t.pad #4\n");
22391 fprintf (asm_out_file
, "\t.save {");
22393 else if (IS_VFP_REGNUM (reg
))
22396 fprintf (asm_out_file
, "\t.vsave {");
22398 else if (reg
>= FIRST_FPA_REGNUM
&& reg
<= LAST_FPA_REGNUM
)
22400 /* FPA registers are done differently. */
22401 asm_fprintf (asm_out_file
, "\t.save %r, %wd\n", reg
, nregs
);
22405 /* Unknown register type. */
22408 /* If the stack increment doesn't match the size of the saved registers,
22409 something has gone horribly wrong. */
22410 if (offset
!= nregs
* reg_size
)
22415 /* The remaining insns will describe the stores. */
22416 for (i
= 1; i
<= nregs
; i
++)
22418 /* Expect (set (mem <addr>) (reg)).
22419 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
22420 e
= XVECEXP (p
, 0, i
);
22421 if (GET_CODE (e
) != SET
22422 || GET_CODE (XEXP (e
, 0)) != MEM
22423 || GET_CODE (XEXP (e
, 1)) != REG
)
22426 reg
= REGNO (XEXP (e
, 1));
22431 fprintf (asm_out_file
, ", ");
22432 /* We can't use %r for vfp because we need to use the
22433 double precision register names. */
22434 if (IS_VFP_REGNUM (reg
))
22435 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
22437 asm_fprintf (asm_out_file
, "%r", reg
);
22439 #ifdef ENABLE_CHECKING
22440 /* Check that the addresses are consecutive. */
22441 e
= XEXP (XEXP (e
, 0), 0);
22442 if (GET_CODE (e
) == PLUS
)
22444 offset
+= reg_size
;
22445 if (GET_CODE (XEXP (e
, 0)) != REG
22446 || REGNO (XEXP (e
, 0)) != SP_REGNUM
22447 || GET_CODE (XEXP (e
, 1)) != CONST_INT
22448 || offset
!= INTVAL (XEXP (e
, 1)))
22452 || GET_CODE (e
) != REG
22453 || REGNO (e
) != SP_REGNUM
)
22457 fprintf (asm_out_file
, "}\n");
22460 /* Emit unwind directives for a SET. */
22463 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
22471 switch (GET_CODE (e0
))
22474 /* Pushing a single register. */
22475 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
22476 || GET_CODE (XEXP (XEXP (e0
, 0), 0)) != REG
22477 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
22480 asm_fprintf (asm_out_file
, "\t.save ");
22481 if (IS_VFP_REGNUM (REGNO (e1
)))
22482 asm_fprintf(asm_out_file
, "{d%d}\n",
22483 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
22485 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
22489 if (REGNO (e0
) == SP_REGNUM
)
22491 /* A stack increment. */
22492 if (GET_CODE (e1
) != PLUS
22493 || GET_CODE (XEXP (e1
, 0)) != REG
22494 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
22495 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
22498 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
22499 -INTVAL (XEXP (e1
, 1)));
22501 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
22503 HOST_WIDE_INT offset
;
22505 if (GET_CODE (e1
) == PLUS
)
22507 if (GET_CODE (XEXP (e1
, 0)) != REG
22508 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
22510 reg
= REGNO (XEXP (e1
, 0));
22511 offset
= INTVAL (XEXP (e1
, 1));
22512 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
22513 HARD_FRAME_POINTER_REGNUM
, reg
,
22516 else if (GET_CODE (e1
) == REG
)
22519 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
22520 HARD_FRAME_POINTER_REGNUM
, reg
);
22525 else if (GET_CODE (e1
) == REG
&& REGNO (e1
) == SP_REGNUM
)
22527 /* Move from sp to reg. */
22528 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
22530 else if (GET_CODE (e1
) == PLUS
22531 && GET_CODE (XEXP (e1
, 0)) == REG
22532 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
22533 && GET_CODE (XEXP (e1
, 1)) == CONST_INT
)
22535 /* Set reg to offset from sp. */
22536 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
22537 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
22539 else if (GET_CODE (e1
) == UNSPEC
&& XINT (e1
, 1) == UNSPEC_STACK_ALIGN
)
22541 /* Stack pointer save before alignment. */
22543 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
22556 /* Emit unwind directives for the given insn. */
22559 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
22563 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
22566 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
22567 && (TREE_NOTHROW (current_function_decl
)
22568 || crtl
->all_throwers_are_sibcalls
))
22571 if (GET_CODE (insn
) == NOTE
|| !RTX_FRAME_RELATED_P (insn
))
22574 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
22576 pat
= XEXP (pat
, 0);
22578 pat
= PATTERN (insn
);
22580 switch (GET_CODE (pat
))
22583 arm_unwind_emit_set (asm_out_file
, pat
);
22587 /* Store multiple. */
22588 arm_unwind_emit_sequence (asm_out_file
, pat
);
22597 /* Output a reference from a function exception table to the type_info
22598 object X. The EABI specifies that the symbol should be relocated by
22599 an R_ARM_TARGET2 relocation. */
22602 arm_output_ttype (rtx x
)
22604 fputs ("\t.word\t", asm_out_file
);
22605 output_addr_const (asm_out_file
, x
);
22606 /* Use special relocations for symbol references. */
22607 if (GET_CODE (x
) != CONST_INT
)
22608 fputs ("(TARGET2)", asm_out_file
);
22609 fputc ('\n', asm_out_file
);
22614 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22617 arm_asm_emit_except_personality (rtx personality
)
22619 fputs ("\t.personality\t", asm_out_file
);
22620 output_addr_const (asm_out_file
, personality
);
22621 fputc ('\n', asm_out_file
);
22624 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22627 arm_asm_init_sections (void)
22629 exception_section
= get_unnamed_section (0, output_section_asm_op
,
22632 #endif /* ARM_UNWIND_INFO */
22634 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22636 static enum unwind_info_type
22637 arm_except_unwind_info (struct gcc_options
*opts
)
22639 /* Honor the --enable-sjlj-exceptions configure switch. */
22640 #ifdef CONFIG_SJLJ_EXCEPTIONS
22641 if (CONFIG_SJLJ_EXCEPTIONS
)
22645 /* If not using ARM EABI unwind tables... */
22646 if (ARM_UNWIND_INFO
)
22648 /* For simplicity elsewhere in this file, indicate that all unwind
22649 info is disabled if we're not emitting unwind tables. */
22650 if (!opts
->x_flag_exceptions
&& !opts
->x_flag_unwind_tables
)
22656 /* ... we use sjlj exceptions for backwards compatibility. */
22661 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
22662 stack alignment. */
22665 arm_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
22667 rtx unspec
= SET_SRC (pattern
);
22668 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
22672 case UNSPEC_STACK_ALIGN
:
22673 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
22674 put anything on the stack, so hopefully it won't matter.
22675 CFA = SP will be correct after alignment. */
22676 dwarf2out_reg_save_reg (label
, stack_pointer_rtx
,
22677 SET_DEST (pattern
));
22680 gcc_unreachable ();
22685 /* Output unwind directives for the start/end of a function. */
22688 arm_output_fn_unwind (FILE * f
, bool prologue
)
22690 if (arm_except_unwind_info (&global_options
) != UI_TARGET
)
22694 fputs ("\t.fnstart\n", f
);
22697 /* If this function will never be unwound, then mark it as such.
22698 The came condition is used in arm_unwind_emit to suppress
22699 the frame annotations. */
22700 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
22701 && (TREE_NOTHROW (current_function_decl
)
22702 || crtl
->all_throwers_are_sibcalls
))
22703 fputs("\t.cantunwind\n", f
);
22705 fputs ("\t.fnend\n", f
);
22710 arm_emit_tls_decoration (FILE *fp
, rtx x
)
22712 enum tls_reloc reloc
;
22715 val
= XVECEXP (x
, 0, 0);
22716 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
22718 output_addr_const (fp
, val
);
22723 fputs ("(tlsgd)", fp
);
22726 fputs ("(tlsldm)", fp
);
22729 fputs ("(tlsldo)", fp
);
22732 fputs ("(gottpoff)", fp
);
22735 fputs ("(tpoff)", fp
);
22738 gcc_unreachable ();
22746 fputs (" + (. - ", fp
);
22747 output_addr_const (fp
, XVECEXP (x
, 0, 2));
22749 output_addr_const (fp
, XVECEXP (x
, 0, 3));
22759 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
22762 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
22764 gcc_assert (size
== 4);
22765 fputs ("\t.word\t", file
);
22766 output_addr_const (file
, x
);
22767 fputs ("(tlsldo)", file
);
22770 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22773 arm_output_addr_const_extra (FILE *fp
, rtx x
)
22775 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
22776 return arm_emit_tls_decoration (fp
, x
);
22777 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
22780 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
22782 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
22783 assemble_name_raw (fp
, label
);
22787 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
22789 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
22793 output_addr_const (fp
, XVECEXP (x
, 0, 0));
22797 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
22799 output_addr_const (fp
, XVECEXP (x
, 0, 0));
22803 output_addr_const (fp
, XVECEXP (x
, 0, 1));
22807 else if (GET_CODE (x
) == CONST_VECTOR
)
22808 return arm_emit_vector_const (fp
, x
);
22813 /* Output assembly for a shift instruction.
22814 SET_FLAGS determines how the instruction modifies the condition codes.
22815 0 - Do not set condition codes.
22816 1 - Set condition codes.
22817 2 - Use smallest instruction. */
22819 arm_output_shift(rtx
* operands
, int set_flags
)
22822 static const char flag_chars
[3] = {'?', '.', '!'};
22827 c
= flag_chars
[set_flags
];
22828 if (TARGET_UNIFIED_ASM
)
22830 shift
= shift_op(operands
[3], &val
);
22834 operands
[2] = GEN_INT(val
);
22835 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
22838 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
22841 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
22842 output_asm_insn (pattern
, operands
);
22846 /* Output a Thumb-1 casesi dispatch sequence. */
22848 thumb1_output_casesi (rtx
*operands
)
22850 rtx diff_vec
= PATTERN (next_real_insn (operands
[0]));
22852 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
22854 switch (GET_MODE(diff_vec
))
22857 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
22858 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
22860 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
22861 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
22863 return "bl\t%___gnu_thumb1_case_si";
22865 gcc_unreachable ();
22869 /* Output a Thumb-2 casesi instruction. */
22871 thumb2_output_casesi (rtx
*operands
)
22873 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
22875 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
22877 output_asm_insn ("cmp\t%0, %1", operands
);
22878 output_asm_insn ("bhi\t%l3", operands
);
22879 switch (GET_MODE(diff_vec
))
22882 return "tbb\t[%|pc, %0]";
22884 return "tbh\t[%|pc, %0, lsl #1]";
22888 output_asm_insn ("adr\t%4, %l2", operands
);
22889 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
22890 output_asm_insn ("add\t%4, %4, %5", operands
);
22895 output_asm_insn ("adr\t%4, %l2", operands
);
22896 return "ldr\t%|pc, [%4, %0, lsl #2]";
22899 gcc_unreachable ();
22903 /* Most ARM cores are single issue, but some newer ones can dual issue.
22904 The scheduler descriptions rely on this being correct. */
22906 arm_issue_rate (void)
22923 /* A table and a function to perform ARM-specific name mangling for
22924 NEON vector types in order to conform to the AAPCS (see "Procedure
22925 Call Standard for the ARM Architecture", Appendix A). To qualify
22926 for emission with the mangled names defined in that document, a
22927 vector type must not only be of the correct mode but also be
22928 composed of NEON vector element types (e.g. __builtin_neon_qi). */
22931 enum machine_mode mode
;
22932 const char *element_type_name
;
22933 const char *aapcs_name
;
22934 } arm_mangle_map_entry
;
22936 static arm_mangle_map_entry arm_mangle_map
[] = {
22937 /* 64-bit containerized types. */
22938 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
22939 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
22940 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
22941 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
22942 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
22943 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
22944 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
22945 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
22946 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
22947 /* 128-bit containerized types. */
22948 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
22949 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
22950 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
22951 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
22952 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
22953 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
22954 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
22955 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
22956 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
22957 { VOIDmode
, NULL
, NULL
}
22961 arm_mangle_type (const_tree type
)
22963 arm_mangle_map_entry
*pos
= arm_mangle_map
;
22965 /* The ARM ABI documents (10th October 2008) say that "__va_list"
22966 has to be managled as if it is in the "std" namespace. */
22967 if (TARGET_AAPCS_BASED
22968 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
22970 static bool warned
;
22971 if (!warned
&& warn_psabi
&& !in_system_header
)
22974 inform (input_location
,
22975 "the mangling of %<va_list%> has changed in GCC 4.4");
22977 return "St9__va_list";
22980 /* Half-precision float. */
22981 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
22984 if (TREE_CODE (type
) != VECTOR_TYPE
)
22987 /* Check the mode of the vector type, and the name of the vector
22988 element type, against the table. */
22989 while (pos
->mode
!= VOIDmode
)
22991 tree elt_type
= TREE_TYPE (type
);
22993 if (pos
->mode
== TYPE_MODE (type
)
22994 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
22995 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
22996 pos
->element_type_name
))
22997 return pos
->aapcs_name
;
23002 /* Use the default mangling for unrecognized (possibly user-defined)
23007 /* Order of allocation of core registers for Thumb: this allocation is
23008 written over the corresponding initial entries of the array
23009 initialized with REG_ALLOC_ORDER. We allocate all low registers
23010 first. Saving and restoring a low register is usually cheaper than
23011 using a call-clobbered high register. */
23013 static const int thumb_core_reg_alloc_order
[] =
23015 3, 2, 1, 0, 4, 5, 6, 7,
23016 14, 12, 8, 9, 10, 11, 13, 15
23019 /* Adjust register allocation order when compiling for Thumb. */
23022 arm_order_regs_for_local_alloc (void)
23024 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
23025 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
23027 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
23028 sizeof (thumb_core_reg_alloc_order
));
23031 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
23034 arm_frame_pointer_required (void)
23036 return (cfun
->has_nonlocal_label
23037 || SUBTARGET_FRAME_POINTER_REQUIRED
23038 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
23041 /* Only thumb1 can't support conditional execution, so return true if
23042 the target is not thumb1. */
23044 arm_have_conditional_execution (void)
23046 return !TARGET_THUMB1
;
23049 /* Legitimize a memory reference for sync primitive implemented using
23050 ldrex / strex. We currently force the form of the reference to be
23051 indirect without offset. We do not yet support the indirect offset
23052 addressing supported by some ARM targets for these
23055 arm_legitimize_sync_memory (rtx memory
)
23057 rtx addr
= force_reg (Pmode
, XEXP (memory
, 0));
23058 rtx legitimate_memory
= gen_rtx_MEM (GET_MODE (memory
), addr
);
23060 set_mem_alias_set (legitimate_memory
, ALIAS_SET_MEMORY_BARRIER
);
23061 MEM_VOLATILE_P (legitimate_memory
) = MEM_VOLATILE_P (memory
);
23062 return legitimate_memory
;
23065 /* An instruction emitter. */
23066 typedef void (* emit_f
) (int label
, const char *, rtx
*);
23068 /* An instruction emitter that emits via the conventional
23069 output_asm_insn. */
23071 arm_emit (int label ATTRIBUTE_UNUSED
, const char *pattern
, rtx
*operands
)
23073 output_asm_insn (pattern
, operands
);
23076 /* Count the number of emitted synchronization instructions. */
23077 static unsigned arm_insn_count
;
23079 /* An emitter that counts emitted instructions but does not actually
23080 emit instruction into the the instruction stream. */
23082 arm_count (int label
,
23083 const char *pattern ATTRIBUTE_UNUSED
,
23084 rtx
*operands ATTRIBUTE_UNUSED
)
23090 /* Construct a pattern using conventional output formatting and feed
23091 it to output_asm_insn. Provides a mechanism to construct the
23092 output pattern on the fly. Note the hard limit on the pattern
23094 static void ATTRIBUTE_PRINTF_4
23095 arm_output_asm_insn (emit_f emit
, int label
, rtx
*operands
,
23096 const char *pattern
, ...)
23101 va_start (ap
, pattern
);
23102 vsprintf (buffer
, pattern
, ap
);
23104 emit (label
, buffer
, operands
);
23107 /* Emit the memory barrier instruction, if any, provided by this
23108 target to a specified emitter. */
23110 arm_process_output_memory_barrier (emit_f emit
, rtx
*operands
)
23112 if (TARGET_HAVE_DMB
)
23114 /* Note we issue a system level barrier. We should consider
23115 issuing a inner shareabilty zone barrier here instead, ie.
23117 emit (0, "dmb\tsy", operands
);
23121 if (TARGET_HAVE_DMB_MCR
)
23123 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands
);
23127 gcc_unreachable ();
23130 /* Emit the memory barrier instruction, if any, provided by this
23133 arm_output_memory_barrier (rtx
*operands
)
23135 arm_process_output_memory_barrier (arm_emit
, operands
);
23139 /* Helper to figure out the instruction suffix required on ldrex/strex
23140 for operations on an object of the specified mode. */
23141 static const char *
23142 arm_ldrex_suffix (enum machine_mode mode
)
23146 case QImode
: return "b";
23147 case HImode
: return "h";
23148 case SImode
: return "";
23149 case DImode
: return "d";
23151 gcc_unreachable ();
23156 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23159 arm_output_ldrex (emit_f emit
,
23160 enum machine_mode mode
,
23164 const char *suffix
= arm_ldrex_suffix (mode
);
23167 operands
[0] = target
;
23168 operands
[1] = memory
;
23169 arm_output_asm_insn (emit
, 0, operands
, "ldrex%s\t%%0, %%C1", suffix
);
23172 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23175 arm_output_strex (emit_f emit
,
23176 enum machine_mode mode
,
23182 const char *suffix
= arm_ldrex_suffix (mode
);
23185 operands
[0] = result
;
23186 operands
[1] = value
;
23187 operands
[2] = memory
;
23188 arm_output_asm_insn (emit
, 0, operands
, "strex%s%s\t%%0, %%1, %%C2", suffix
,
23192 /* Helper to emit a two operand instruction. */
23194 arm_output_op2 (emit_f emit
, const char *mnemonic
, rtx d
, rtx s
)
23200 arm_output_asm_insn (emit
, 0, operands
, "%s\t%%0, %%1", mnemonic
);
23203 /* Helper to emit a three operand instruction. */
23205 arm_output_op3 (emit_f emit
, const char *mnemonic
, rtx d
, rtx a
, rtx b
)
23212 arm_output_asm_insn (emit
, 0, operands
, "%s\t%%0, %%1, %%2", mnemonic
);
23215 /* Emit a load store exclusive synchronization loop.
23219 if old_value != required_value
23221 t1 = sync_op (old_value, new_value)
23222 [mem] = t1, t2 = [0|1]
23226 t1 == t2 is not permitted
23227 t1 == old_value is permitted
23231 RTX register or const_int representing the required old_value for
23232 the modify to continue, if NULL no comparsion is performed. */
23234 arm_output_sync_loop (emit_f emit
,
23235 enum machine_mode mode
,
23238 rtx required_value
,
23242 enum attr_sync_op sync_op
,
23243 int early_barrier_required
)
23247 gcc_assert (t1
!= t2
);
23249 if (early_barrier_required
)
23250 arm_process_output_memory_barrier (emit
, NULL
);
23252 arm_output_asm_insn (emit
, 1, operands
, "%sLSYT%%=:", LOCAL_LABEL_PREFIX
);
23254 arm_output_ldrex (emit
, mode
, old_value
, memory
);
23256 if (required_value
)
23260 operands
[0] = old_value
;
23261 operands
[1] = required_value
;
23262 arm_output_asm_insn (emit
, 0, operands
, "cmp\t%%0, %%1");
23263 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX
);
23269 arm_output_op3 (emit
, "add", t1
, old_value
, new_value
);
23273 arm_output_op3 (emit
, "sub", t1
, old_value
, new_value
);
23277 arm_output_op3 (emit
, "orr", t1
, old_value
, new_value
);
23281 arm_output_op3 (emit
, "eor", t1
, old_value
, new_value
);
23285 arm_output_op3 (emit
,"and", t1
, old_value
, new_value
);
23289 arm_output_op3 (emit
, "and", t1
, old_value
, new_value
);
23290 arm_output_op2 (emit
, "mvn", t1
, t1
);
23300 arm_output_strex (emit
, mode
, "", t2
, t1
, memory
);
23302 arm_output_asm_insn (emit
, 0, operands
, "teq\t%%0, #0");
23303 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYT%%=",
23304 LOCAL_LABEL_PREFIX
);
23308 /* Use old_value for the return value because for some operations
23309 the old_value can easily be restored. This saves one register. */
23310 arm_output_strex (emit
, mode
, "", old_value
, t1
, memory
);
23311 operands
[0] = old_value
;
23312 arm_output_asm_insn (emit
, 0, operands
, "teq\t%%0, #0");
23313 arm_output_asm_insn (emit
, 0, operands
, "bne\t%sLSYT%%=",
23314 LOCAL_LABEL_PREFIX
);
23319 arm_output_op3 (emit
, "sub", old_value
, t1
, new_value
);
23323 arm_output_op3 (emit
, "add", old_value
, t1
, new_value
);
23327 arm_output_op3 (emit
, "eor", old_value
, t1
, new_value
);
23331 arm_output_op2 (emit
, "mov", old_value
, required_value
);
23335 gcc_unreachable ();
23339 arm_process_output_memory_barrier (emit
, NULL
);
23340 arm_output_asm_insn (emit
, 1, operands
, "%sLSYB%%=:", LOCAL_LABEL_PREFIX
);
23344 arm_get_sync_operand (rtx
*operands
, int index
, rtx default_value
)
23347 default_value
= operands
[index
- 1];
23349 return default_value
;
23352 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23353 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23355 /* Extract the operands for a synchroniztion instruction from the
23356 instructions attributes and emit the instruction. */
23358 arm_process_output_sync_insn (emit_f emit
, rtx insn
, rtx
*operands
)
23360 rtx result
, memory
, required_value
, new_value
, t1
, t2
;
23362 enum machine_mode mode
;
23363 enum attr_sync_op sync_op
;
23365 result
= FETCH_SYNC_OPERAND(result
, 0);
23366 memory
= FETCH_SYNC_OPERAND(memory
, 0);
23367 required_value
= FETCH_SYNC_OPERAND(required_value
, 0);
23368 new_value
= FETCH_SYNC_OPERAND(new_value
, 0);
23369 t1
= FETCH_SYNC_OPERAND(t1
, 0);
23370 t2
= FETCH_SYNC_OPERAND(t2
, 0);
23372 get_attr_sync_release_barrier (insn
) == SYNC_RELEASE_BARRIER_YES
;
23373 sync_op
= get_attr_sync_op (insn
);
23374 mode
= GET_MODE (memory
);
23376 arm_output_sync_loop (emit
, mode
, result
, memory
, required_value
,
23377 new_value
, t1
, t2
, sync_op
, early_barrier
);
23380 /* Emit a synchronization instruction loop. */
23382 arm_output_sync_insn (rtx insn
, rtx
*operands
)
23384 arm_process_output_sync_insn (arm_emit
, insn
, operands
);
23388 /* Count the number of machine instruction that will be emitted for a
23389 synchronization instruction. Note that the emitter used does not
23390 emit instructions, it just counts instructions being carefull not
23391 to count labels. */
23393 arm_sync_loop_insns (rtx insn
, rtx
*operands
)
23395 arm_insn_count
= 0;
23396 arm_process_output_sync_insn (arm_count
, insn
, operands
);
23397 return arm_insn_count
;
23400 /* Helper to call a target sync instruction generator, dealing with
23401 the variation in operands required by the different generators. */
23403 arm_call_generator (struct arm_sync_generator
*generator
, rtx old_value
,
23404 rtx memory
, rtx required_value
, rtx new_value
)
23406 switch (generator
->op
)
23408 case arm_sync_generator_omn
:
23409 gcc_assert (! required_value
);
23410 return generator
->u
.omn (old_value
, memory
, new_value
);
23412 case arm_sync_generator_omrn
:
23413 gcc_assert (required_value
);
23414 return generator
->u
.omrn (old_value
, memory
, required_value
, new_value
);
23420 /* Expand a synchronization loop. The synchronization loop is expanded
23421 as an opaque block of instructions in order to ensure that we do
23422 not subsequently get extraneous memory accesses inserted within the
23423 critical region. The exclusive access property of ldrex/strex is
23424 only guaranteed in there are no intervening memory accesses. */
23426 arm_expand_sync (enum machine_mode mode
,
23427 struct arm_sync_generator
*generator
,
23428 rtx target
, rtx memory
, rtx required_value
, rtx new_value
)
23430 if (target
== NULL
)
23431 target
= gen_reg_rtx (mode
);
23433 memory
= arm_legitimize_sync_memory (memory
);
23434 if (mode
!= SImode
)
23436 rtx load_temp
= gen_reg_rtx (SImode
);
23438 if (required_value
)
23439 required_value
= convert_modes (SImode
, mode
, required_value
, true);
23441 new_value
= convert_modes (SImode
, mode
, new_value
, true);
23442 emit_insn (arm_call_generator (generator
, load_temp
, memory
,
23443 required_value
, new_value
));
23444 emit_move_insn (target
, gen_lowpart (mode
, load_temp
));
23448 emit_insn (arm_call_generator (generator
, target
, memory
, required_value
,
23454 arm_vector_alignment_reachable (const_tree type
, bool is_packed
)
23456 /* Vectors which aren't in packed structures will not be less aligned than
23457 the natural alignment of their element type, so this is safe. */
23458 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
23461 return default_builtin_vector_alignment_reachable (type
, is_packed
);
23465 arm_builtin_support_vector_misalignment (enum machine_mode mode
,
23466 const_tree type
, int misalignment
,
23469 if (TARGET_NEON
&& !BYTES_BIG_ENDIAN
)
23471 HOST_WIDE_INT align
= TYPE_ALIGN_UNIT (type
);
23476 /* If the misalignment is unknown, we should be able to handle the access
23477 so long as it is not to a member of a packed data structure. */
23478 if (misalignment
== -1)
23481 /* Return true if the misalignment is a multiple of the natural alignment
23482 of the vector's element type. This is probably always going to be
23483 true in practice, since we've already established that this isn't a
23485 return ((misalignment
% align
) == 0);
23488 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
23493 arm_conditional_register_usage (void)
23497 if (TARGET_SOFT_FLOAT
|| TARGET_THUMB1
|| !TARGET_FPA
)
23499 for (regno
= FIRST_FPA_REGNUM
;
23500 regno
<= LAST_FPA_REGNUM
; ++regno
)
23501 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
23504 if (TARGET_THUMB1
&& optimize_size
)
23506 /* When optimizing for size on Thumb-1, it's better not
23507 to use the HI regs, because of the overhead of
23509 for (regno
= FIRST_HI_REGNUM
;
23510 regno
<= LAST_HI_REGNUM
; ++regno
)
23511 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
23514 /* The link register can be clobbered by any branch insn,
23515 but we have no way to track that at present, so mark
23516 it as unavailable. */
23518 fixed_regs
[LR_REGNUM
] = call_used_regs
[LR_REGNUM
] = 1;
23520 if (TARGET_32BIT
&& TARGET_HARD_FLOAT
)
23522 if (TARGET_MAVERICK
)
23524 for (regno
= FIRST_FPA_REGNUM
;
23525 regno
<= LAST_FPA_REGNUM
; ++ regno
)
23526 fixed_regs
[regno
] = call_used_regs
[regno
] = 1;
23527 for (regno
= FIRST_CIRRUS_FP_REGNUM
;
23528 regno
<= LAST_CIRRUS_FP_REGNUM
; ++ regno
)
23530 fixed_regs
[regno
] = 0;
23531 call_used_regs
[regno
] = regno
< FIRST_CIRRUS_FP_REGNUM
+ 4;
23536 /* VFPv3 registers are disabled when earlier VFP
23537 versions are selected due to the definition of
23538 LAST_VFP_REGNUM. */
23539 for (regno
= FIRST_VFP_REGNUM
;
23540 regno
<= LAST_VFP_REGNUM
; ++ regno
)
23542 fixed_regs
[regno
] = 0;
23543 call_used_regs
[regno
] = regno
< FIRST_VFP_REGNUM
+ 16
23544 || regno
>= FIRST_VFP_REGNUM
+ 32;
23549 if (TARGET_REALLY_IWMMXT
)
23551 regno
= FIRST_IWMMXT_GR_REGNUM
;
23552 /* The 2002/10/09 revision of the XScale ABI has wCG0
23553 and wCG1 as call-preserved registers. The 2002/11/21
23554 revision changed this so that all wCG registers are
23555 scratch registers. */
23556 for (regno
= FIRST_IWMMXT_GR_REGNUM
;
23557 regno
<= LAST_IWMMXT_GR_REGNUM
; ++ regno
)
23558 fixed_regs
[regno
] = 0;
23559 /* The XScale ABI has wR0 - wR9 as scratch registers,
23560 the rest as call-preserved registers. */
23561 for (regno
= FIRST_IWMMXT_REGNUM
;
23562 regno
<= LAST_IWMMXT_REGNUM
; ++ regno
)
23564 fixed_regs
[regno
] = 0;
23565 call_used_regs
[regno
] = regno
< FIRST_IWMMXT_REGNUM
+ 10;
23569 if ((unsigned) PIC_OFFSET_TABLE_REGNUM
!= INVALID_REGNUM
)
23571 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
23572 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
23574 else if (TARGET_APCS_STACK
)
23576 fixed_regs
[10] = 1;
23577 call_used_regs
[10] = 1;
23579 /* -mcaller-super-interworking reserves r11 for calls to
23580 _interwork_r11_call_via_rN(). Making the register global
23581 is an easy way of ensuring that it remains valid for all
23583 if (TARGET_APCS_FRAME
|| TARGET_CALLER_INTERWORKING
23584 || TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
)
23586 fixed_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
23587 call_used_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
23588 if (TARGET_CALLER_INTERWORKING
)
23589 global_regs
[ARM_HARD_FRAME_POINTER_REGNUM
] = 1;
23591 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23595 arm_preferred_rename_class (reg_class_t rclass
)
23597 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23598 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23599 and code size can be reduced. */
23600 if (TARGET_THUMB2
&& rclass
== GENERAL_REGS
)
23606 #include "gt-arm.h"