1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
9 This file is part of GCC.
11 GCC is free software; you can redistribute it and/or modify it
12 under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 3, or (at your
14 option) any later version.
16 GCC is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
19 License for more details.
21 You should have received a copy of the GNU General Public License
22 along with GCC; see the file COPYING3. If not see
23 <http://www.gnu.org/licenses/>. */
27 #include "coretypes.h"
33 #include "hard-reg-set.h"
35 #include "insn-config.h"
36 #include "conditions.h"
38 #include "insn-attr.h"
50 #include "integrate.h"
53 #include "target-def.h"
55 #include "langhooks.h"
60 /* Forward definitions of types. */
61 typedef struct minipool_node Mnode
;
62 typedef struct minipool_fixup Mfix
;
64 void (*arm_lang_output_object_attributes_hook
)(void);
66 /* Forward function declarations. */
67 static int arm_compute_static_chain_stack_bytes (void);
68 static arm_stack_offsets
*arm_get_frame_offsets (void);
69 static void arm_add_gc_roots (void);
70 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
71 HOST_WIDE_INT
, rtx
, rtx
, int, int);
72 static unsigned bit_count (unsigned long);
73 static int arm_address_register_rtx_p (rtx
, int);
74 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
75 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
76 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
77 static rtx
arm_legitimize_address (rtx
, rtx
, enum machine_mode
);
78 static rtx
thumb_legitimize_address (rtx
, rtx
, enum machine_mode
);
79 inline static int thumb1_index_register_rtx_p (rtx
, int);
80 static bool arm_legitimate_address_p (enum machine_mode
, rtx
, bool);
81 static int thumb_far_jump_used_p (void);
82 static bool thumb_force_lr_save (void);
83 static int const_ok_for_op (HOST_WIDE_INT
, enum rtx_code
);
84 static rtx
emit_sfm (int, int);
85 static unsigned arm_size_return_regs (void);
86 static bool arm_assemble_integer (rtx
, unsigned int, int);
87 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
88 static arm_cc
get_arm_condition_code (rtx
);
89 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
90 static rtx
is_jump_table (rtx
);
91 static const char *output_multi_immediate (rtx
*, const char *, const char *,
93 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
94 static struct machine_function
*arm_init_machine_status (void);
95 static void thumb_exit (FILE *, int);
96 static rtx
is_jump_table (rtx
);
97 static HOST_WIDE_INT
get_jump_table_size (rtx
);
98 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
99 static Mnode
*add_minipool_forward_ref (Mfix
*);
100 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
101 static Mnode
*add_minipool_backward_ref (Mfix
*);
102 static void assign_minipool_offsets (Mfix
*);
103 static void arm_print_value (FILE *, rtx
);
104 static void dump_minipool (rtx
);
105 static int arm_barrier_cost (rtx
);
106 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
107 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
108 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
110 static void arm_reorg (void);
111 static bool note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
112 static unsigned long arm_compute_save_reg0_reg12_mask (void);
113 static unsigned long arm_compute_save_reg_mask (void);
114 static unsigned long arm_isr_value (tree
);
115 static unsigned long arm_compute_func_type (void);
116 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
117 static tree
arm_handle_pcs_attribute (tree
*, tree
, tree
, int, bool *);
118 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
119 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
120 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
122 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
123 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
124 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT
);
125 static int arm_comp_type_attributes (const_tree
, const_tree
);
126 static void arm_set_default_type_attributes (tree
);
127 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
128 static int count_insns_for_constant (HOST_WIDE_INT
, int);
129 static int arm_get_strip_length (int);
130 static bool arm_function_ok_for_sibcall (tree
, tree
);
131 static enum machine_mode
arm_promote_function_mode (const_tree
,
132 enum machine_mode
, int *,
134 static bool arm_return_in_memory (const_tree
, const_tree
);
135 static rtx
arm_function_value (const_tree
, const_tree
, bool);
136 static rtx
arm_libcall_value (enum machine_mode
, const_rtx
);
138 static void arm_internal_label (FILE *, const char *, unsigned long);
139 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
141 static bool arm_have_conditional_execution (void);
142 static bool arm_rtx_costs_1 (rtx
, enum rtx_code
, int*, bool);
143 static bool arm_size_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *);
144 static bool arm_slowmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
145 static bool arm_fastmul_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
146 static bool arm_xscale_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
147 static bool arm_9e_rtx_costs (rtx
, enum rtx_code
, enum rtx_code
, int *, bool);
148 static bool arm_rtx_costs (rtx
, int, int, int *, bool);
149 static int arm_address_cost (rtx
, bool);
150 static bool arm_memory_load_p (rtx
);
151 static bool arm_cirrus_insn_p (rtx
);
152 static void cirrus_reorg (rtx
);
153 static void arm_init_builtins (void);
154 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
155 static void arm_init_iwmmxt_builtins (void);
156 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
157 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
158 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
159 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
160 static void emit_constant_insn (rtx cond
, rtx pattern
);
161 static rtx
emit_set_insn (rtx
, rtx
);
162 static int arm_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
164 static rtx
aapcs_allocate_return_reg (enum machine_mode
, const_tree
,
166 static int aapcs_select_return_coproc (const_tree
, const_tree
);
168 #ifdef OBJECT_FORMAT_ELF
169 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
170 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
173 static void arm_encode_section_info (tree
, rtx
, int);
176 static void arm_file_end (void);
177 static void arm_file_start (void);
179 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
181 static bool arm_pass_by_reference (CUMULATIVE_ARGS
*,
182 enum machine_mode
, const_tree
, bool);
183 static bool arm_promote_prototypes (const_tree
);
184 static bool arm_default_short_enums (void);
185 static bool arm_align_anon_bitfield (void);
186 static bool arm_return_in_msb (const_tree
);
187 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
188 static bool arm_return_in_memory (const_tree
, const_tree
);
189 #ifdef TARGET_UNWIND_INFO
190 static void arm_unwind_emit (FILE *, rtx
);
191 static bool arm_output_ttype (rtx
);
193 static void arm_dwarf_handle_frame_unspec (const char *, rtx
, int);
194 static rtx
arm_dwarf_register_span (rtx
);
196 static tree
arm_cxx_guard_type (void);
197 static bool arm_cxx_guard_mask_bit (void);
198 static tree
arm_get_cookie_size (tree
);
199 static bool arm_cookie_has_size (void);
200 static bool arm_cxx_cdtor_returns_this (void);
201 static bool arm_cxx_key_method_may_be_inline (void);
202 static void arm_cxx_determine_class_data_visibility (tree
);
203 static bool arm_cxx_class_data_always_comdat (void);
204 static bool arm_cxx_use_aeabi_atexit (void);
205 static void arm_init_libfuncs (void);
206 static tree
arm_build_builtin_va_list (void);
207 static void arm_expand_builtin_va_start (tree
, rtx
);
208 static tree
arm_gimplify_va_arg_expr (tree
, tree
, gimple_seq
*, gimple_seq
*);
209 static bool arm_handle_option (size_t, const char *, int);
210 static void arm_target_help (void);
211 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
212 static bool arm_cannot_copy_insn_p (rtx
);
213 static bool arm_tls_symbol_p (rtx x
);
214 static int arm_issue_rate (void);
215 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
216 static bool arm_allocate_stack_slots_for_args (void);
217 static const char *arm_invalid_parameter_type (const_tree t
);
218 static const char *arm_invalid_return_type (const_tree t
);
219 static tree
arm_promoted_type (const_tree t
);
220 static tree
arm_convert_to_type (tree type
, tree expr
);
221 static bool arm_scalar_mode_supported_p (enum machine_mode
);
222 static bool arm_frame_pointer_required (void);
223 static bool arm_can_eliminate (const int, const int);
224 static void arm_asm_trampoline_template (FILE *);
225 static void arm_trampoline_init (rtx
, tree
, rtx
);
226 static rtx
arm_trampoline_adjust_address (rtx
);
227 static rtx
arm_pic_static_addr (rtx orig
, rtx reg
);
230 /* Table of machine attributes. */
231 static const struct attribute_spec arm_attribute_table
[] =
233 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
234 /* Function calls made to this symbol must be done indirectly, because
235 it may lie outside of the 26 bit addressing range of a normal function
237 { "long_call", 0, 0, false, true, true, NULL
},
238 /* Whereas these functions are always known to reside within the 26 bit
240 { "short_call", 0, 0, false, true, true, NULL
},
241 /* Specify the procedure call conventions for a function. */
242 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute
},
243 /* Interrupt Service Routines have special prologue and epilogue requirements. */
244 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
},
245 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
},
246 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
},
248 /* ARM/PE has three new attributes:
250 dllexport - for exporting a function/variable that will live in a dll
251 dllimport - for importing a function/variable from a dll
253 Microsoft allows multiple declspecs in one __declspec, separating
254 them with spaces. We do NOT support this. Instead, use __declspec
257 { "dllimport", 0, 0, true, false, false, NULL
},
258 { "dllexport", 0, 0, true, false, false, NULL
},
259 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
},
260 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
261 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
262 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
263 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
},
265 { NULL
, 0, 0, false, false, false, NULL
}
268 /* Initialize the GCC target structure. */
269 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
270 #undef TARGET_MERGE_DECL_ATTRIBUTES
271 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
274 #undef TARGET_LEGITIMIZE_ADDRESS
275 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
280 #undef TARGET_ASM_FILE_START
281 #define TARGET_ASM_FILE_START arm_file_start
282 #undef TARGET_ASM_FILE_END
283 #define TARGET_ASM_FILE_END arm_file_end
285 #undef TARGET_ASM_ALIGNED_SI_OP
286 #define TARGET_ASM_ALIGNED_SI_OP NULL
287 #undef TARGET_ASM_INTEGER
288 #define TARGET_ASM_INTEGER arm_assemble_integer
290 #undef TARGET_ASM_FUNCTION_PROLOGUE
291 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
293 #undef TARGET_ASM_FUNCTION_EPILOGUE
294 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
296 #undef TARGET_DEFAULT_TARGET_FLAGS
297 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
298 #undef TARGET_HANDLE_OPTION
299 #define TARGET_HANDLE_OPTION arm_handle_option
301 #define TARGET_HELP arm_target_help
303 #undef TARGET_COMP_TYPE_ATTRIBUTES
304 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
306 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
307 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
309 #undef TARGET_SCHED_ADJUST_COST
310 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
312 #undef TARGET_ENCODE_SECTION_INFO
314 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
316 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
319 #undef TARGET_STRIP_NAME_ENCODING
320 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
322 #undef TARGET_ASM_INTERNAL_LABEL
323 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
325 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
326 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
328 #undef TARGET_FUNCTION_VALUE
329 #define TARGET_FUNCTION_VALUE arm_function_value
331 #undef TARGET_LIBCALL_VALUE
332 #define TARGET_LIBCALL_VALUE arm_libcall_value
334 #undef TARGET_ASM_OUTPUT_MI_THUNK
335 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
336 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
337 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
339 #undef TARGET_RTX_COSTS
340 #define TARGET_RTX_COSTS arm_rtx_costs
341 #undef TARGET_ADDRESS_COST
342 #define TARGET_ADDRESS_COST arm_address_cost
344 #undef TARGET_SHIFT_TRUNCATION_MASK
345 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
346 #undef TARGET_VECTOR_MODE_SUPPORTED_P
347 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
349 #undef TARGET_MACHINE_DEPENDENT_REORG
350 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
352 #undef TARGET_INIT_BUILTINS
353 #define TARGET_INIT_BUILTINS arm_init_builtins
354 #undef TARGET_EXPAND_BUILTIN
355 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
357 #undef TARGET_INIT_LIBFUNCS
358 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
360 #undef TARGET_PROMOTE_FUNCTION_MODE
361 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
362 #undef TARGET_PROMOTE_PROTOTYPES
363 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
364 #undef TARGET_PASS_BY_REFERENCE
365 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
366 #undef TARGET_ARG_PARTIAL_BYTES
367 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
369 #undef TARGET_SETUP_INCOMING_VARARGS
370 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
372 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
373 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
375 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
376 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
377 #undef TARGET_TRAMPOLINE_INIT
378 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
379 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
380 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
382 #undef TARGET_DEFAULT_SHORT_ENUMS
383 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
385 #undef TARGET_ALIGN_ANON_BITFIELD
386 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
388 #undef TARGET_NARROW_VOLATILE_BITFIELD
389 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
391 #undef TARGET_CXX_GUARD_TYPE
392 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
394 #undef TARGET_CXX_GUARD_MASK_BIT
395 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
397 #undef TARGET_CXX_GET_COOKIE_SIZE
398 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
400 #undef TARGET_CXX_COOKIE_HAS_SIZE
401 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
403 #undef TARGET_CXX_CDTOR_RETURNS_THIS
404 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
406 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
407 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
409 #undef TARGET_CXX_USE_AEABI_ATEXIT
410 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
412 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
413 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
414 arm_cxx_determine_class_data_visibility
416 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
417 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
419 #undef TARGET_RETURN_IN_MSB
420 #define TARGET_RETURN_IN_MSB arm_return_in_msb
422 #undef TARGET_RETURN_IN_MEMORY
423 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
425 #undef TARGET_MUST_PASS_IN_STACK
426 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
428 #ifdef TARGET_UNWIND_INFO
429 #undef TARGET_UNWIND_EMIT
430 #define TARGET_UNWIND_EMIT arm_unwind_emit
432 /* EABI unwinding tables use a different format for the typeinfo tables. */
433 #undef TARGET_ASM_TTYPE
434 #define TARGET_ASM_TTYPE arm_output_ttype
436 #undef TARGET_ARM_EABI_UNWINDER
437 #define TARGET_ARM_EABI_UNWINDER true
438 #endif /* TARGET_UNWIND_INFO */
440 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
441 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
443 #undef TARGET_DWARF_REGISTER_SPAN
444 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
446 #undef TARGET_CANNOT_COPY_INSN_P
447 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
450 #undef TARGET_HAVE_TLS
451 #define TARGET_HAVE_TLS true
454 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
455 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
457 #undef TARGET_CANNOT_FORCE_CONST_MEM
458 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
460 #undef TARGET_MAX_ANCHOR_OFFSET
461 #define TARGET_MAX_ANCHOR_OFFSET 4095
463 /* The minimum is set such that the total size of the block
464 for a particular anchor is -4088 + 1 + 4095 bytes, which is
465 divisible by eight, ensuring natural spacing of anchors. */
466 #undef TARGET_MIN_ANCHOR_OFFSET
467 #define TARGET_MIN_ANCHOR_OFFSET -4088
469 #undef TARGET_SCHED_ISSUE_RATE
470 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
472 #undef TARGET_MANGLE_TYPE
473 #define TARGET_MANGLE_TYPE arm_mangle_type
475 #undef TARGET_BUILD_BUILTIN_VA_LIST
476 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
477 #undef TARGET_EXPAND_BUILTIN_VA_START
478 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
479 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
480 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
483 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
484 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
487 #undef TARGET_LEGITIMATE_ADDRESS_P
488 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
490 #undef TARGET_INVALID_PARAMETER_TYPE
491 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
493 #undef TARGET_INVALID_RETURN_TYPE
494 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
496 #undef TARGET_PROMOTED_TYPE
497 #define TARGET_PROMOTED_TYPE arm_promoted_type
499 #undef TARGET_CONVERT_TO_TYPE
500 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
502 #undef TARGET_SCALAR_MODE_SUPPORTED_P
503 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
505 #undef TARGET_FRAME_POINTER_REQUIRED
506 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
508 #undef TARGET_CAN_ELIMINATE
509 #define TARGET_CAN_ELIMINATE arm_can_eliminate
511 struct gcc_target targetm
= TARGET_INITIALIZER
;
513 /* Obstack for minipool constant handling. */
514 static struct obstack minipool_obstack
;
515 static char * minipool_startobj
;
517 /* The maximum number of insns skipped which
518 will be conditionalised if possible. */
519 static int max_insns_skipped
= 5;
521 extern FILE * asm_out_file
;
523 /* True if we are currently building a constant table. */
524 int making_const_table
;
526 /* The processor for which instructions should be scheduled. */
527 enum processor_type arm_tune
= arm_none
;
529 /* The current tuning set. */
530 const struct tune_params
*current_tune
;
532 /* The default processor used if not overridden by commandline. */
533 static enum processor_type arm_default_cpu
= arm_none
;
535 /* Which floating point hardware to schedule for. */
538 /* Which floating popint hardware to use. */
539 const struct arm_fpu_desc
*arm_fpu_desc
;
541 /* Whether to use floating point hardware. */
542 enum float_abi_type arm_float_abi
;
544 /* Which __fp16 format to use. */
545 enum arm_fp16_format_type arm_fp16_format
;
547 /* Which ABI to use. */
548 enum arm_abi_type arm_abi
;
550 /* Which thread pointer model to use. */
551 enum arm_tp_type target_thread_pointer
= TP_AUTO
;
553 /* Used to parse -mstructure_size_boundary command line option. */
554 int arm_structure_size_boundary
= DEFAULT_STRUCTURE_SIZE_BOUNDARY
;
556 /* Used for Thumb call_via trampolines. */
557 rtx thumb_call_via_label
[14];
558 static int thumb_call_reg_needed
;
560 /* Bit values used to identify processor capabilities. */
561 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
562 #define FL_ARCH3M (1 << 1) /* Extended multiply */
563 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
564 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
565 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
566 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
567 #define FL_THUMB (1 << 6) /* Thumb aware */
568 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
569 #define FL_STRONG (1 << 8) /* StrongARM */
570 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
571 #define FL_XSCALE (1 << 10) /* XScale */
572 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
573 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
574 media instructions. */
575 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
576 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
577 Note: ARM6 & 7 derivatives only. */
578 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
579 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
580 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
582 #define FL_DIV (1 << 18) /* Hardware divide. */
583 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
584 #define FL_NEON (1 << 20) /* Neon instructions. */
585 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
588 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
590 #define FL_FOR_ARCH2 FL_NOTM
591 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
592 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
593 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
594 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
595 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
596 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
597 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
598 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
599 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
600 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
601 #define FL_FOR_ARCH6J FL_FOR_ARCH6
602 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
603 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
604 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
605 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
606 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
607 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
608 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
609 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
610 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
611 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
613 /* The bits in this mask specify which
614 instructions we are allowed to generate. */
615 static unsigned long insn_flags
= 0;
617 /* The bits in this mask specify which instruction scheduling options should
619 static unsigned long tune_flags
= 0;
621 /* The following are used in the arm.md file as equivalents to bits
622 in the above two flag variables. */
624 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
627 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
630 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
633 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
636 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
639 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
642 /* Nonzero if this chip supports the ARM 6K extensions. */
645 /* Nonzero if instructions not present in the 'M' profile can be used. */
646 int arm_arch_notm
= 0;
648 /* Nonzero if instructions present in ARMv7E-M can be used. */
651 /* Nonzero if this chip can benefit from load scheduling. */
652 int arm_ld_sched
= 0;
654 /* Nonzero if this chip is a StrongARM. */
655 int arm_tune_strongarm
= 0;
657 /* Nonzero if this chip is a Cirrus variant. */
658 int arm_arch_cirrus
= 0;
660 /* Nonzero if this chip supports Intel Wireless MMX technology. */
661 int arm_arch_iwmmxt
= 0;
663 /* Nonzero if this chip is an XScale. */
664 int arm_arch_xscale
= 0;
666 /* Nonzero if tuning for XScale */
667 int arm_tune_xscale
= 0;
669 /* Nonzero if we want to tune for stores that access the write-buffer.
670 This typically means an ARM6 or ARM7 with MMU or MPU. */
671 int arm_tune_wbuf
= 0;
673 /* Nonzero if tuning for Cortex-A9. */
674 int arm_tune_cortex_a9
= 0;
676 /* Nonzero if generating Thumb instructions. */
679 /* Nonzero if we should define __THUMB_INTERWORK__ in the
681 XXX This is a bit of a hack, it's intended to help work around
682 problems in GLD which doesn't understand that armv5t code is
683 interworking clean. */
684 int arm_cpp_interwork
= 0;
686 /* Nonzero if chip supports Thumb 2. */
689 /* Nonzero if chip supports integer division instruction. */
692 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
693 must report the mode of the memory reference from PRINT_OPERAND to
694 PRINT_OPERAND_ADDRESS. */
695 enum machine_mode output_memory_reference_mode
;
697 /* The register number to be used for the PIC offset register. */
698 unsigned arm_pic_register
= INVALID_REGNUM
;
700 /* Set to 1 after arm_reorg has started. Reset to start at the start of
701 the next function. */
702 static int after_arm_reorg
= 0;
704 static enum arm_pcs arm_pcs_default
;
706 /* For an explanation of these variables, see final_prescan_insn below. */
708 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
709 enum arm_cond_code arm_current_cc
;
711 int arm_target_label
;
712 /* The number of conditionally executed insns, including the current insn. */
713 int arm_condexec_count
= 0;
714 /* A bitmask specifying the patterns for the IT block.
715 Zero means do not output an IT block before this insn. */
716 int arm_condexec_mask
= 0;
717 /* The number of bits used in arm_condexec_mask. */
718 int arm_condexec_masklen
= 0;
720 /* The condition codes of the ARM, and the inverse function. */
721 static const char * const arm_condition_codes
[] =
723 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
724 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
727 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
728 #define streq(string1, string2) (strcmp (string1, string2) == 0)
730 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
731 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
732 | (1 << PIC_OFFSET_TABLE_REGNUM)))
734 /* Initialization code. */
738 const char *const name
;
739 enum processor_type core
;
741 const unsigned long flags
;
742 const struct tune_params
*const tune
;
745 const struct tune_params arm_slowmul_tune
=
747 arm_slowmul_rtx_costs
,
751 const struct tune_params arm_fastmul_tune
=
753 arm_fastmul_rtx_costs
,
757 const struct tune_params arm_xscale_tune
=
759 arm_xscale_rtx_costs
,
763 const struct tune_params arm_9e_tune
=
769 /* Not all of these give usefully different compilation alternatives,
770 but there is no simple way of generalizing them. */
771 static const struct processors all_cores
[] =
774 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
775 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
776 #include "arm-cores.def"
778 {NULL
, arm_none
, NULL
, 0, NULL
}
781 static const struct processors all_architectures
[] =
783 /* ARM Architectures */
784 /* We don't specify tuning costs here as it will be figured out
787 {"armv2", arm2
, "2", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH2
, NULL
},
788 {"armv2a", arm2
, "2", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH2
, NULL
},
789 {"armv3", arm6
, "3", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH3
, NULL
},
790 {"armv3m", arm7m
, "3M", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH3M
, NULL
},
791 {"armv4", arm7tdmi
, "4", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH4
, NULL
},
792 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
793 implementations that support it, so we will leave it out for now. */
794 {"armv4t", arm7tdmi
, "4T", FL_CO_PROC
| FL_FOR_ARCH4T
, NULL
},
795 {"armv5", arm10tdmi
, "5", FL_CO_PROC
| FL_FOR_ARCH5
, NULL
},
796 {"armv5t", arm10tdmi
, "5T", FL_CO_PROC
| FL_FOR_ARCH5T
, NULL
},
797 {"armv5e", arm1026ejs
, "5E", FL_CO_PROC
| FL_FOR_ARCH5E
, NULL
},
798 {"armv5te", arm1026ejs
, "5TE", FL_CO_PROC
| FL_FOR_ARCH5TE
, NULL
},
799 {"armv6", arm1136js
, "6", FL_CO_PROC
| FL_FOR_ARCH6
, NULL
},
800 {"armv6j", arm1136js
, "6J", FL_CO_PROC
| FL_FOR_ARCH6J
, NULL
},
801 {"armv6k", mpcore
, "6K", FL_CO_PROC
| FL_FOR_ARCH6K
, NULL
},
802 {"armv6z", arm1176jzs
, "6Z", FL_CO_PROC
| FL_FOR_ARCH6Z
, NULL
},
803 {"armv6zk", arm1176jzs
, "6ZK", FL_CO_PROC
| FL_FOR_ARCH6ZK
, NULL
},
804 {"armv6t2", arm1156t2s
, "6T2", FL_CO_PROC
| FL_FOR_ARCH6T2
, NULL
},
805 {"armv6-m", cortexm1
, "6M", FL_FOR_ARCH6M
, NULL
},
806 {"armv7", cortexa8
, "7", FL_CO_PROC
| FL_FOR_ARCH7
, NULL
},
807 {"armv7-a", cortexa8
, "7A", FL_CO_PROC
| FL_FOR_ARCH7A
, NULL
},
808 {"armv7-r", cortexr4
, "7R", FL_CO_PROC
| FL_FOR_ARCH7R
, NULL
},
809 {"armv7-m", cortexm3
, "7M", FL_CO_PROC
| FL_FOR_ARCH7M
, NULL
},
810 {"armv7e-m", cortexm3
, "7EM", FL_CO_PROC
| FL_FOR_ARCH7EM
, NULL
},
811 {"ep9312", ep9312
, "4T", FL_LDSCHED
| FL_CIRRUS
| FL_FOR_ARCH4
, NULL
},
812 {"iwmmxt", iwmmxt
, "5TE", FL_LDSCHED
| FL_STRONG
| FL_FOR_ARCH5TE
| FL_XSCALE
| FL_IWMMXT
, NULL
},
813 {"iwmmxt2", iwmmxt2
, "5TE", FL_LDSCHED
| FL_STRONG
| FL_FOR_ARCH5TE
| FL_XSCALE
| FL_IWMMXT
, NULL
},
814 {NULL
, arm_none
, NULL
, 0 , NULL
}
817 struct arm_cpu_select
821 const struct processors
* processors
;
824 /* This is a magic structure. The 'string' field is magically filled in
825 with a pointer to the value specified by the user on the command line
826 assuming that the user has specified such a value. */
828 static struct arm_cpu_select arm_select
[] =
830 /* string name processors */
831 { NULL
, "-mcpu=", all_cores
},
832 { NULL
, "-march=", all_architectures
},
833 { NULL
, "-mtune=", all_cores
}
836 /* Defines representing the indexes into the above table. */
837 #define ARM_OPT_SET_CPU 0
838 #define ARM_OPT_SET_ARCH 1
839 #define ARM_OPT_SET_TUNE 2
841 /* The name of the preprocessor macro to define for this architecture. */
843 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
845 /* Available values for -mfpu=. */
847 static const struct arm_fpu_desc all_fpus
[] =
849 {"fpa", ARM_FP_MODEL_FPA
, 0, VFP_NONE
, false, false},
850 {"fpe2", ARM_FP_MODEL_FPA
, 2, VFP_NONE
, false, false},
851 {"fpe3", ARM_FP_MODEL_FPA
, 3, VFP_NONE
, false, false},
852 {"maverick", ARM_FP_MODEL_MAVERICK
, 0, VFP_NONE
, false, false},
853 {"vfp", ARM_FP_MODEL_VFP
, 2, VFP_REG_D16
, false, false},
854 {"vfpv3", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, false},
855 {"vfpv3-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, true},
856 {"vfpv3-d16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D16
, false, false},
857 {"vfpv3-d16-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D16
, false, true},
858 {"vfpv3xd", ARM_FP_MODEL_VFP
, 3, VFP_REG_SINGLE
, false, false},
859 {"vfpv3xd-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_SINGLE
, false, true},
860 {"neon", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, true , false},
861 {"neon-fp16", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, true , true },
862 {"vfpv4", ARM_FP_MODEL_VFP
, 4, VFP_REG_D32
, false, true},
863 {"vfpv4-d16", ARM_FP_MODEL_VFP
, 4, VFP_REG_D16
, false, true},
864 {"fpv4-sp-d16", ARM_FP_MODEL_VFP
, 4, VFP_REG_SINGLE
, false, true},
865 {"neon-vfpv4", ARM_FP_MODEL_VFP
, 4, VFP_REG_D32
, true, true},
866 /* Compatibility aliases. */
867 {"vfp3", ARM_FP_MODEL_VFP
, 3, VFP_REG_D32
, false, false},
874 enum float_abi_type abi_type
;
878 /* Available values for -mfloat-abi=. */
880 static const struct float_abi all_float_abis
[] =
882 {"soft", ARM_FLOAT_ABI_SOFT
},
883 {"softfp", ARM_FLOAT_ABI_SOFTFP
},
884 {"hard", ARM_FLOAT_ABI_HARD
}
891 enum arm_fp16_format_type fp16_format_type
;
895 /* Available values for -mfp16-format=. */
897 static const struct fp16_format all_fp16_formats
[] =
899 {"none", ARM_FP16_FORMAT_NONE
},
900 {"ieee", ARM_FP16_FORMAT_IEEE
},
901 {"alternative", ARM_FP16_FORMAT_ALTERNATIVE
}
908 enum arm_abi_type abi_type
;
912 /* Available values for -mabi=. */
914 static const struct abi_name arm_all_abis
[] =
916 {"apcs-gnu", ARM_ABI_APCS
},
917 {"atpcs", ARM_ABI_ATPCS
},
918 {"aapcs", ARM_ABI_AAPCS
},
919 {"iwmmxt", ARM_ABI_IWMMXT
},
920 {"aapcs-linux", ARM_ABI_AAPCS_LINUX
}
923 /* Supported TLS relocations. */
933 /* The maximum number of insns to be used when loading a constant. */
935 arm_constant_limit (bool size_p
)
937 return size_p
? 1 : current_tune
->constant_limit
;
940 /* Emit an insn that's a simple single-set. Both the operands must be known
943 emit_set_insn (rtx x
, rtx y
)
945 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
948 /* Return the number of bits set in VALUE. */
950 bit_count (unsigned long value
)
952 unsigned long count
= 0;
957 value
&= value
- 1; /* Clear the least-significant set bit. */
963 /* Set up library functions unique to ARM. */
966 arm_init_libfuncs (void)
968 /* There are no special library functions unless we are using the
973 /* The functions below are described in Section 4 of the "Run-Time
974 ABI for the ARM architecture", Version 1.0. */
976 /* Double-precision floating-point arithmetic. Table 2. */
977 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
978 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
979 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
980 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
981 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
983 /* Double-precision comparisons. Table 3. */
984 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
985 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
986 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
987 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
988 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
989 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
990 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
992 /* Single-precision floating-point arithmetic. Table 4. */
993 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
994 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
995 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
996 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
997 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
999 /* Single-precision comparisons. Table 5. */
1000 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
1001 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
1002 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
1003 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
1004 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
1005 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
1006 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
1008 /* Floating-point to integer conversions. Table 6. */
1009 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
1010 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
1011 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
1012 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
1013 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
1014 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
1015 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
1016 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
1018 /* Conversions between floating types. Table 7. */
1019 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
1020 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
1022 /* Integer to floating-point conversions. Table 8. */
1023 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
1024 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
1025 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
1026 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
1027 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
1028 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
1029 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
1030 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
1032 /* Long long. Table 9. */
1033 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
1034 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
1035 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
1036 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
1037 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
1038 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
1039 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
1040 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
1042 /* Integer (32/32->32) division. \S 4.3.1. */
1043 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
1044 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
1046 /* The divmod functions are designed so that they can be used for
1047 plain division, even though they return both the quotient and the
1048 remainder. The quotient is returned in the usual location (i.e.,
1049 r0 for SImode, {r0, r1} for DImode), just as would be expected
1050 for an ordinary division routine. Because the AAPCS calling
1051 conventions specify that all of { r0, r1, r2, r3 } are
1052 callee-saved registers, there is no need to tell the compiler
1053 explicitly that those registers are clobbered by these
1055 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
1056 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
1058 /* For SImode division the ABI provides div-without-mod routines,
1059 which are faster. */
1060 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
1061 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
1063 /* We don't have mod libcalls. Fortunately gcc knows how to use the
1064 divmod libcalls instead. */
1065 set_optab_libfunc (smod_optab
, DImode
, NULL
);
1066 set_optab_libfunc (umod_optab
, DImode
, NULL
);
1067 set_optab_libfunc (smod_optab
, SImode
, NULL
);
1068 set_optab_libfunc (umod_optab
, SImode
, NULL
);
1070 /* Half-precision float operations. The compiler handles all operations
1071 with NULL libfuncs by converting the SFmode. */
1072 switch (arm_fp16_format
)
1074 case ARM_FP16_FORMAT_IEEE
:
1075 case ARM_FP16_FORMAT_ALTERNATIVE
:
1078 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
,
1079 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1081 : "__gnu_f2h_alternative"));
1082 set_conv_libfunc (sext_optab
, SFmode
, HFmode
,
1083 (arm_fp16_format
== ARM_FP16_FORMAT_IEEE
1085 : "__gnu_h2f_alternative"));
1088 set_optab_libfunc (add_optab
, HFmode
, NULL
);
1089 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
1090 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
1091 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
1092 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
1095 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
1096 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
1097 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
1098 set_optab_libfunc (le_optab
, HFmode
, NULL
);
1099 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
1100 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
1101 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
1108 if (TARGET_AAPCS_BASED
)
1109 synchronize_libfunc
= init_one_libfunc ("__sync_synchronize");
1112 /* On AAPCS systems, this is the "struct __va_list". */
1113 static GTY(()) tree va_list_type
;
1115 /* Return the type to use as __builtin_va_list. */
1117 arm_build_builtin_va_list (void)
1122 if (!TARGET_AAPCS_BASED
)
1123 return std_build_builtin_va_list ();
1125 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
1133 The C Library ABI further reinforces this definition in \S
1136 We must follow this definition exactly. The structure tag
1137 name is visible in C++ mangled names, and thus forms a part
1138 of the ABI. The field name may be used by people who
1139 #include <stdarg.h>. */
1140 /* Create the type. */
1141 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
1142 /* Give it the required name. */
1143 va_list_name
= build_decl (BUILTINS_LOCATION
,
1145 get_identifier ("__va_list"),
1147 DECL_ARTIFICIAL (va_list_name
) = 1;
1148 TYPE_NAME (va_list_type
) = va_list_name
;
1149 /* Create the __ap field. */
1150 ap_field
= build_decl (BUILTINS_LOCATION
,
1152 get_identifier ("__ap"),
1154 DECL_ARTIFICIAL (ap_field
) = 1;
1155 DECL_FIELD_CONTEXT (ap_field
) = va_list_type
;
1156 TYPE_FIELDS (va_list_type
) = ap_field
;
1157 /* Compute its layout. */
1158 layout_type (va_list_type
);
1160 return va_list_type
;
1163 /* Return an expression of type "void *" pointing to the next
1164 available argument in a variable-argument list. VALIST is the
1165 user-level va_list object, of type __builtin_va_list. */
1167 arm_extract_valist_ptr (tree valist
)
1169 if (TREE_TYPE (valist
) == error_mark_node
)
1170 return error_mark_node
;
1172 /* On an AAPCS target, the pointer is stored within "struct
1174 if (TARGET_AAPCS_BASED
)
1176 tree ap_field
= TYPE_FIELDS (TREE_TYPE (valist
));
1177 valist
= build3 (COMPONENT_REF
, TREE_TYPE (ap_field
),
1178 valist
, ap_field
, NULL_TREE
);
1184 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
1186 arm_expand_builtin_va_start (tree valist
, rtx nextarg
)
1188 valist
= arm_extract_valist_ptr (valist
);
1189 std_expand_builtin_va_start (valist
, nextarg
);
1192 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
1194 arm_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
1197 valist
= arm_extract_valist_ptr (valist
);
1198 return std_gimplify_va_arg_expr (valist
, type
, pre_p
, post_p
);
1201 /* Implement TARGET_HANDLE_OPTION. */
1204 arm_handle_option (size_t code
, const char *arg
, int value ATTRIBUTE_UNUSED
)
1209 arm_select
[1].string
= arg
;
1213 arm_select
[0].string
= arg
;
1216 case OPT_mhard_float
:
1217 target_float_abi_name
= "hard";
1220 case OPT_msoft_float
:
1221 target_float_abi_name
= "soft";
1225 arm_select
[2].string
= arg
;
1234 arm_target_help (void)
1237 static int columns
= 0;
1240 /* If we have not done so already, obtain the desired maximum width of
1241 the output. Note - this is a duplication of the code at the start of
1242 gcc/opts.c:print_specific_help() - the two copies should probably be
1243 replaced by a single function. */
1248 GET_ENVIRONMENT (p
, "COLUMNS");
1251 int value
= atoi (p
);
1258 /* Use a reasonable default. */
1262 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
1264 /* The - 2 is because we know that the last entry in the array is NULL. */
1265 i
= ARRAY_SIZE (all_cores
) - 2;
1267 printf (" %s", all_cores
[i
].name
);
1268 remaining
= columns
- (strlen (all_cores
[i
].name
) + 4);
1269 gcc_assert (remaining
>= 0);
1273 int len
= strlen (all_cores
[i
].name
);
1275 if (remaining
> len
+ 2)
1277 printf (", %s", all_cores
[i
].name
);
1278 remaining
-= len
+ 2;
1284 printf ("\n %s", all_cores
[i
].name
);
1285 remaining
= columns
- (len
+ 4);
1289 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
1291 i
= ARRAY_SIZE (all_architectures
) - 2;
1294 printf (" %s", all_architectures
[i
].name
);
1295 remaining
= columns
- (strlen (all_architectures
[i
].name
) + 4);
1296 gcc_assert (remaining
>= 0);
1300 int len
= strlen (all_architectures
[i
].name
);
1302 if (remaining
> len
+ 2)
1304 printf (", %s", all_architectures
[i
].name
);
1305 remaining
-= len
+ 2;
1311 printf ("\n %s", all_architectures
[i
].name
);
1312 remaining
= columns
- (len
+ 4);
1319 /* Fix up any incompatible options that the user has specified.
1320 This has now turned into a maze. */
1322 arm_override_options (void)
1325 enum processor_type target_arch_cpu
= arm_none
;
1326 enum processor_type selected_cpu
= arm_none
;
1328 /* Set up the flags based on the cpu/architecture selected by the user. */
1329 for (i
= ARRAY_SIZE (arm_select
); i
--;)
1331 struct arm_cpu_select
* ptr
= arm_select
+ i
;
1333 if (ptr
->string
!= NULL
&& ptr
->string
[0] != '\0')
1335 const struct processors
* sel
;
1337 for (sel
= ptr
->processors
; sel
->name
!= NULL
; sel
++)
1338 if (streq (ptr
->string
, sel
->name
))
1340 /* Set the architecture define. */
1341 if (i
!= ARM_OPT_SET_TUNE
)
1342 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", sel
->arch
);
1344 /* Determine the processor core for which we should
1345 tune code-generation. */
1346 if (/* -mcpu= is a sensible default. */
1347 i
== ARM_OPT_SET_CPU
1348 /* -mtune= overrides -mcpu= and -march=. */
1349 || i
== ARM_OPT_SET_TUNE
)
1350 arm_tune
= (enum processor_type
) (sel
- ptr
->processors
);
1352 /* Remember the CPU associated with this architecture.
1353 If no other option is used to set the CPU type,
1354 we'll use this to guess the most suitable tuning
1356 if (i
== ARM_OPT_SET_ARCH
)
1357 target_arch_cpu
= sel
->core
;
1359 if (i
== ARM_OPT_SET_CPU
)
1360 selected_cpu
= (enum processor_type
) (sel
- ptr
->processors
);
1362 if (i
!= ARM_OPT_SET_TUNE
)
1364 /* If we have been given an architecture and a processor
1365 make sure that they are compatible. We only generate
1366 a warning though, and we prefer the CPU over the
1368 if (insn_flags
!= 0 && (insn_flags
^ sel
->flags
))
1369 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1372 insn_flags
= sel
->flags
;
1378 if (sel
->name
== NULL
)
1379 error ("bad value (%s) for %s switch", ptr
->string
, ptr
->name
);
1383 /* Guess the tuning options from the architecture if necessary. */
1384 if (arm_tune
== arm_none
)
1385 arm_tune
= target_arch_cpu
;
1387 /* If the user did not specify a processor, choose one for them. */
1388 if (insn_flags
== 0)
1390 const struct processors
* sel
;
1391 unsigned int sought
;
1393 selected_cpu
= (enum processor_type
) TARGET_CPU_DEFAULT
;
1394 if (selected_cpu
== arm_none
)
1396 #ifdef SUBTARGET_CPU_DEFAULT
1397 /* Use the subtarget default CPU if none was specified by
1399 selected_cpu
= (enum processor_type
) SUBTARGET_CPU_DEFAULT
;
1401 /* Default to ARM6. */
1402 if (selected_cpu
== arm_none
)
1403 selected_cpu
= arm6
;
1405 sel
= &all_cores
[selected_cpu
];
1407 insn_flags
= sel
->flags
;
1409 /* Now check to see if the user has specified some command line
1410 switch that require certain abilities from the cpu. */
1413 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1415 sought
|= (FL_THUMB
| FL_MODE32
);
1417 /* There are no ARM processors that support both APCS-26 and
1418 interworking. Therefore we force FL_MODE26 to be removed
1419 from insn_flags here (if it was set), so that the search
1420 below will always be able to find a compatible processor. */
1421 insn_flags
&= ~FL_MODE26
;
1424 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1426 /* Try to locate a CPU type that supports all of the abilities
1427 of the default CPU, plus the extra abilities requested by
1429 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1430 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1433 if (sel
->name
== NULL
)
1435 unsigned current_bit_count
= 0;
1436 const struct processors
* best_fit
= NULL
;
1438 /* Ideally we would like to issue an error message here
1439 saying that it was not possible to find a CPU compatible
1440 with the default CPU, but which also supports the command
1441 line options specified by the programmer, and so they
1442 ought to use the -mcpu=<name> command line option to
1443 override the default CPU type.
1445 If we cannot find a cpu that has both the
1446 characteristics of the default cpu and the given
1447 command line options we scan the array again looking
1448 for a best match. */
1449 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1450 if ((sel
->flags
& sought
) == sought
)
1454 count
= bit_count (sel
->flags
& insn_flags
);
1456 if (count
>= current_bit_count
)
1459 current_bit_count
= count
;
1463 gcc_assert (best_fit
);
1467 insn_flags
= sel
->flags
;
1469 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", sel
->arch
);
1470 arm_default_cpu
= (enum processor_type
) (sel
- all_cores
);
1471 if (arm_tune
== arm_none
)
1472 arm_tune
= arm_default_cpu
;
1475 /* The processor for which we should tune should now have been
1477 gcc_assert (arm_tune
!= arm_none
);
1479 tune_flags
= all_cores
[(int)arm_tune
].flags
;
1480 current_tune
= all_cores
[(int)arm_tune
].tune
;
1482 if (target_fp16_format_name
)
1484 for (i
= 0; i
< ARRAY_SIZE (all_fp16_formats
); i
++)
1486 if (streq (all_fp16_formats
[i
].name
, target_fp16_format_name
))
1488 arm_fp16_format
= all_fp16_formats
[i
].fp16_format_type
;
1492 if (i
== ARRAY_SIZE (all_fp16_formats
))
1493 error ("invalid __fp16 format option: -mfp16-format=%s",
1494 target_fp16_format_name
);
1497 arm_fp16_format
= ARM_FP16_FORMAT_NONE
;
1499 if (target_abi_name
)
1501 for (i
= 0; i
< ARRAY_SIZE (arm_all_abis
); i
++)
1503 if (streq (arm_all_abis
[i
].name
, target_abi_name
))
1505 arm_abi
= arm_all_abis
[i
].abi_type
;
1509 if (i
== ARRAY_SIZE (arm_all_abis
))
1510 error ("invalid ABI option: -mabi=%s", target_abi_name
);
1513 arm_abi
= ARM_DEFAULT_ABI
;
1515 /* Make sure that the processor choice does not conflict with any of the
1516 other command line choices. */
1517 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
1518 error ("target CPU does not support ARM mode");
1520 /* BPABI targets use linker tricks to allow interworking on cores
1521 without thumb support. */
1522 if (TARGET_INTERWORK
&& !((insn_flags
& FL_THUMB
) || TARGET_BPABI
))
1524 warning (0, "target CPU does not support interworking" );
1525 target_flags
&= ~MASK_INTERWORK
;
1528 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
1530 warning (0, "target CPU does not support THUMB instructions");
1531 target_flags
&= ~MASK_THUMB
;
1534 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
1536 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1537 target_flags
&= ~MASK_APCS_FRAME
;
1540 /* Callee super interworking implies thumb interworking. Adding
1541 this to the flags here simplifies the logic elsewhere. */
1542 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
1543 target_flags
|= MASK_INTERWORK
;
1545 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1546 from here where no function is being compiled currently. */
1547 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
1548 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1550 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
1551 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1553 if (TARGET_ARM
&& TARGET_CALLER_INTERWORKING
)
1554 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1556 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
1558 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1559 target_flags
|= MASK_APCS_FRAME
;
1562 if (TARGET_POKE_FUNCTION_NAME
)
1563 target_flags
|= MASK_APCS_FRAME
;
1565 if (TARGET_APCS_REENT
&& flag_pic
)
1566 error ("-fpic and -mapcs-reent are incompatible");
1568 if (TARGET_APCS_REENT
)
1569 warning (0, "APCS reentrant code not supported. Ignored");
1571 /* If this target is normally configured to use APCS frames, warn if they
1572 are turned off and debugging is turned on. */
1574 && write_symbols
!= NO_DEBUG
1575 && !TARGET_APCS_FRAME
1576 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
1577 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1579 if (TARGET_APCS_FLOAT
)
1580 warning (0, "passing floating point arguments in fp regs not yet supported");
1582 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1583 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
1584 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
1585 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
1586 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
1587 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
1588 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
1589 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
1590 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
1591 arm_arch7em
= (insn_flags
& FL_ARCH7EM
) != 0;
1592 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
1593 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
1594 arm_arch_cirrus
= (insn_flags
& FL_CIRRUS
) != 0;
1596 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
1597 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
1598 thumb_code
= (TARGET_ARM
== 0);
1599 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
1600 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
1601 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
1602 arm_arch_hwdiv
= (insn_flags
& FL_DIV
) != 0;
1603 arm_tune_cortex_a9
= (arm_tune
== cortexa9
) != 0;
1605 /* If we are not using the default (ARM mode) section anchor offset
1606 ranges, then set the correct ranges now. */
1609 /* Thumb-1 LDR instructions cannot have negative offsets.
1610 Permissible positive offset ranges are 5-bit (for byte loads),
1611 6-bit (for halfword loads), or 7-bit (for word loads).
1612 Empirical results suggest a 7-bit anchor range gives the best
1613 overall code size. */
1614 targetm
.min_anchor_offset
= 0;
1615 targetm
.max_anchor_offset
= 127;
1617 else if (TARGET_THUMB2
)
1619 /* The minimum is set such that the total size of the block
1620 for a particular anchor is 248 + 1 + 4095 bytes, which is
1621 divisible by eight, ensuring natural spacing of anchors. */
1622 targetm
.min_anchor_offset
= -248;
1623 targetm
.max_anchor_offset
= 4095;
1626 /* V5 code we generate is completely interworking capable, so we turn off
1627 TARGET_INTERWORK here to avoid many tests later on. */
1629 /* XXX However, we must pass the right pre-processor defines to CPP
1630 or GLD can get confused. This is a hack. */
1631 if (TARGET_INTERWORK
)
1632 arm_cpp_interwork
= 1;
1635 target_flags
&= ~MASK_INTERWORK
;
1637 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
1638 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1640 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
1641 error ("iwmmxt abi requires an iwmmxt capable cpu");
1643 if (target_fpu_name
== NULL
&& target_fpe_name
!= NULL
)
1645 if (streq (target_fpe_name
, "2"))
1646 target_fpu_name
= "fpe2";
1647 else if (streq (target_fpe_name
, "3"))
1648 target_fpu_name
= "fpe3";
1650 error ("invalid floating point emulation option: -mfpe=%s",
1654 if (target_fpu_name
== NULL
)
1656 #ifdef FPUTYPE_DEFAULT
1657 target_fpu_name
= FPUTYPE_DEFAULT
;
1659 if (arm_arch_cirrus
)
1660 target_fpu_name
= "maverick";
1662 target_fpu_name
= "fpe2";
1666 arm_fpu_desc
= NULL
;
1667 for (i
= 0; i
< ARRAY_SIZE (all_fpus
); i
++)
1669 if (streq (all_fpus
[i
].name
, target_fpu_name
))
1671 arm_fpu_desc
= &all_fpus
[i
];
1678 error ("invalid floating point option: -mfpu=%s", target_fpu_name
);
1682 switch (arm_fpu_desc
->model
)
1684 case ARM_FP_MODEL_FPA
:
1685 if (arm_fpu_desc
->rev
== 2)
1686 arm_fpu_attr
= FPU_FPE2
;
1687 else if (arm_fpu_desc
->rev
== 3)
1688 arm_fpu_attr
= FPU_FPE3
;
1690 arm_fpu_attr
= FPU_FPA
;
1693 case ARM_FP_MODEL_MAVERICK
:
1694 arm_fpu_attr
= FPU_MAVERICK
;
1697 case ARM_FP_MODEL_VFP
:
1698 arm_fpu_attr
= FPU_VFP
;
1705 if (target_float_abi_name
!= NULL
)
1707 /* The user specified a FP ABI. */
1708 for (i
= 0; i
< ARRAY_SIZE (all_float_abis
); i
++)
1710 if (streq (all_float_abis
[i
].name
, target_float_abi_name
))
1712 arm_float_abi
= all_float_abis
[i
].abi_type
;
1716 if (i
== ARRAY_SIZE (all_float_abis
))
1717 error ("invalid floating point abi: -mfloat-abi=%s",
1718 target_float_abi_name
);
1721 arm_float_abi
= TARGET_DEFAULT_FLOAT_ABI
;
1723 if (TARGET_AAPCS_BASED
1724 && (arm_fpu_desc
->model
== ARM_FP_MODEL_FPA
))
1725 error ("FPA is unsupported in the AAPCS");
1727 if (TARGET_AAPCS_BASED
)
1729 if (TARGET_CALLER_INTERWORKING
)
1730 error ("AAPCS does not support -mcaller-super-interworking");
1732 if (TARGET_CALLEE_INTERWORKING
)
1733 error ("AAPCS does not support -mcallee-super-interworking");
1736 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1737 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1738 will ever exist. GCC makes no attempt to support this combination. */
1739 if (TARGET_IWMMXT
&& !TARGET_SOFT_FLOAT
)
1740 sorry ("iWMMXt and hardware floating point");
1742 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1743 if (TARGET_THUMB2
&& TARGET_IWMMXT
)
1744 sorry ("Thumb-2 iWMMXt");
1746 /* __fp16 support currently assumes the core has ldrh. */
1747 if (!arm_arch4
&& arm_fp16_format
!= ARM_FP16_FORMAT_NONE
)
1748 sorry ("__fp16 and no ldrh");
1750 /* If soft-float is specified then don't use FPU. */
1751 if (TARGET_SOFT_FLOAT
)
1752 arm_fpu_attr
= FPU_NONE
;
1754 if (TARGET_AAPCS_BASED
)
1756 if (arm_abi
== ARM_ABI_IWMMXT
)
1757 arm_pcs_default
= ARM_PCS_AAPCS_IWMMXT
;
1758 else if (arm_float_abi
== ARM_FLOAT_ABI_HARD
1759 && TARGET_HARD_FLOAT
1761 arm_pcs_default
= ARM_PCS_AAPCS_VFP
;
1763 arm_pcs_default
= ARM_PCS_AAPCS
;
1767 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
1768 sorry ("-mfloat-abi=hard and VFP");
1770 if (arm_abi
== ARM_ABI_APCS
)
1771 arm_pcs_default
= ARM_PCS_APCS
;
1773 arm_pcs_default
= ARM_PCS_ATPCS
;
1776 /* For arm2/3 there is no need to do any scheduling if there is only
1777 a floating point emulator, or we are doing software floating-point. */
1778 if ((TARGET_SOFT_FLOAT
1779 || (TARGET_FPA
&& arm_fpu_desc
->rev
))
1780 && (tune_flags
& FL_MODE32
) == 0)
1781 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
1783 if (target_thread_switch
)
1785 if (strcmp (target_thread_switch
, "soft") == 0)
1786 target_thread_pointer
= TP_SOFT
;
1787 else if (strcmp (target_thread_switch
, "auto") == 0)
1788 target_thread_pointer
= TP_AUTO
;
1789 else if (strcmp (target_thread_switch
, "cp15") == 0)
1790 target_thread_pointer
= TP_CP15
;
1792 error ("invalid thread pointer option: -mtp=%s", target_thread_switch
);
1795 /* Use the cp15 method if it is available. */
1796 if (target_thread_pointer
== TP_AUTO
)
1798 if (arm_arch6k
&& !TARGET_THUMB1
)
1799 target_thread_pointer
= TP_CP15
;
1801 target_thread_pointer
= TP_SOFT
;
1804 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
1805 error ("can not use -mtp=cp15 with 16-bit Thumb");
1807 /* Override the default structure alignment for AAPCS ABI. */
1808 if (TARGET_AAPCS_BASED
)
1809 arm_structure_size_boundary
= 8;
1811 if (structure_size_string
!= NULL
)
1813 int size
= strtol (structure_size_string
, NULL
, 0);
1815 if (size
== 8 || size
== 32
1816 || (ARM_DOUBLEWORD_ALIGN
&& size
== 64))
1817 arm_structure_size_boundary
= size
;
1819 warning (0, "structure size boundary can only be set to %s",
1820 ARM_DOUBLEWORD_ALIGN
? "8, 32 or 64": "8 or 32");
1823 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
1825 error ("RTP PIC is incompatible with Thumb");
1829 /* If stack checking is disabled, we can use r10 as the PIC register,
1830 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1831 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
1833 if (TARGET_VXWORKS_RTP
)
1834 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1835 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
1838 if (flag_pic
&& TARGET_VXWORKS_RTP
)
1839 arm_pic_register
= 9;
1841 if (arm_pic_register_string
!= NULL
)
1843 int pic_register
= decode_reg_name (arm_pic_register_string
);
1846 warning (0, "-mpic-register= is useless without -fpic");
1848 /* Prevent the user from choosing an obviously stupid PIC register. */
1849 else if (pic_register
< 0 || call_used_regs
[pic_register
]
1850 || pic_register
== HARD_FRAME_POINTER_REGNUM
1851 || pic_register
== STACK_POINTER_REGNUM
1852 || pic_register
>= PC_REGNUM
1853 || (TARGET_VXWORKS_RTP
1854 && (unsigned int) pic_register
!= arm_pic_register
))
1855 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
1857 arm_pic_register
= pic_register
;
1860 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1861 if (fix_cm3_ldrd
== 2)
1863 if (selected_cpu
== cortexm3
)
1869 if (TARGET_THUMB1
&& flag_schedule_insns
)
1871 /* Don't warn since it's on by default in -O2. */
1872 flag_schedule_insns
= 0;
1877 /* If optimizing for size, bump the number of instructions that we
1878 are prepared to conditionally execute (even on a StrongARM). */
1879 max_insns_skipped
= 6;
1883 /* StrongARM has early execution of branches, so a sequence
1884 that is worth skipping is shorter. */
1885 if (arm_tune_strongarm
)
1886 max_insns_skipped
= 3;
1889 /* Hot/Cold partitioning is not currently supported, since we can't
1890 handle literal pool placement in that case. */
1891 if (flag_reorder_blocks_and_partition
)
1893 inform (input_location
,
1894 "-freorder-blocks-and-partition not supported on this architecture");
1895 flag_reorder_blocks_and_partition
= 0;
1896 flag_reorder_blocks
= 1;
1899 /* Register global variables with the garbage collector. */
1900 arm_add_gc_roots ();
1904 arm_add_gc_roots (void)
1906 gcc_obstack_init(&minipool_obstack
);
1907 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
1910 /* A table of known ARM exception types.
1911 For use with the interrupt function attribute. */
1915 const char *const arg
;
1916 const unsigned long return_value
;
1920 static const isr_attribute_arg isr_attribute_args
[] =
1922 { "IRQ", ARM_FT_ISR
},
1923 { "irq", ARM_FT_ISR
},
1924 { "FIQ", ARM_FT_FIQ
},
1925 { "fiq", ARM_FT_FIQ
},
1926 { "ABORT", ARM_FT_ISR
},
1927 { "abort", ARM_FT_ISR
},
1928 { "ABORT", ARM_FT_ISR
},
1929 { "abort", ARM_FT_ISR
},
1930 { "UNDEF", ARM_FT_EXCEPTION
},
1931 { "undef", ARM_FT_EXCEPTION
},
1932 { "SWI", ARM_FT_EXCEPTION
},
1933 { "swi", ARM_FT_EXCEPTION
},
1934 { NULL
, ARM_FT_NORMAL
}
1937 /* Returns the (interrupt) function type of the current
1938 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1940 static unsigned long
1941 arm_isr_value (tree argument
)
1943 const isr_attribute_arg
* ptr
;
1947 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
1949 /* No argument - default to IRQ. */
1950 if (argument
== NULL_TREE
)
1953 /* Get the value of the argument. */
1954 if (TREE_VALUE (argument
) == NULL_TREE
1955 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
1956 return ARM_FT_UNKNOWN
;
1958 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
1960 /* Check it against the list of known arguments. */
1961 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
1962 if (streq (arg
, ptr
->arg
))
1963 return ptr
->return_value
;
1965 /* An unrecognized interrupt type. */
1966 return ARM_FT_UNKNOWN
;
1969 /* Computes the type of the current function. */
1971 static unsigned long
1972 arm_compute_func_type (void)
1974 unsigned long type
= ARM_FT_UNKNOWN
;
1978 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
1980 /* Decide if the current function is volatile. Such functions
1981 never return, and many memory cycles can be saved by not storing
1982 register values that will never be needed again. This optimization
1983 was added to speed up context switching in a kernel application. */
1985 && (TREE_NOTHROW (current_function_decl
)
1986 || !(flag_unwind_tables
1987 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
)))
1988 && TREE_THIS_VOLATILE (current_function_decl
))
1989 type
|= ARM_FT_VOLATILE
;
1991 if (cfun
->static_chain_decl
!= NULL
)
1992 type
|= ARM_FT_NESTED
;
1994 attr
= DECL_ATTRIBUTES (current_function_decl
);
1996 a
= lookup_attribute ("naked", attr
);
1998 type
|= ARM_FT_NAKED
;
2000 a
= lookup_attribute ("isr", attr
);
2002 a
= lookup_attribute ("interrupt", attr
);
2005 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
2007 type
|= arm_isr_value (TREE_VALUE (a
));
2012 /* Returns the type of the current function. */
2015 arm_current_func_type (void)
2017 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
2018 cfun
->machine
->func_type
= arm_compute_func_type ();
2020 return cfun
->machine
->func_type
;
2024 arm_allocate_stack_slots_for_args (void)
2026 /* Naked functions should not allocate stack slots for arguments. */
2027 return !IS_NAKED (arm_current_func_type ());
2031 /* Output assembler code for a block containing the constant parts
2032 of a trampoline, leaving space for the variable parts.
2034 On the ARM, (if r8 is the static chain regnum, and remembering that
2035 referencing pc adds an offset of 8) the trampoline looks like:
2038 .word static chain value
2039 .word function's address
2040 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
2043 arm_asm_trampoline_template (FILE *f
)
2047 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2048 asm_fprintf (f
, "\tldr\t%r, [%r, #0]\n", PC_REGNUM
, PC_REGNUM
);
2050 else if (TARGET_THUMB2
)
2052 /* The Thumb-2 trampoline is similar to the arm implementation.
2053 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
2054 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n",
2055 STATIC_CHAIN_REGNUM
, PC_REGNUM
);
2056 asm_fprintf (f
, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM
, PC_REGNUM
);
2060 ASM_OUTPUT_ALIGN (f
, 2);
2061 fprintf (f
, "\t.code\t16\n");
2062 fprintf (f
, ".Ltrampoline_start:\n");
2063 asm_fprintf (f
, "\tpush\t{r0, r1}\n");
2064 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2065 asm_fprintf (f
, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM
);
2066 asm_fprintf (f
, "\tldr\tr0, [%r, #8]\n", PC_REGNUM
);
2067 asm_fprintf (f
, "\tstr\tr0, [%r, #4]\n", SP_REGNUM
);
2068 asm_fprintf (f
, "\tpop\t{r0, %r}\n", PC_REGNUM
);
2070 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2071 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
2074 /* Emit RTL insns to initialize the variable parts of a trampoline. */
2077 arm_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
2079 rtx fnaddr
, mem
, a_tramp
;
2081 emit_block_move (m_tramp
, assemble_trampoline_template (),
2082 GEN_INT (TRAMPOLINE_SIZE
), BLOCK_OP_NORMAL
);
2084 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 8 : 12);
2085 emit_move_insn (mem
, chain_value
);
2087 mem
= adjust_address (m_tramp
, SImode
, TARGET_32BIT
? 12 : 16);
2088 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
2089 emit_move_insn (mem
, fnaddr
);
2091 a_tramp
= XEXP (m_tramp
, 0);
2092 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
2093 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
2094 plus_constant (a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
2097 /* Thumb trampolines should be entered in thumb mode, so set
2098 the bottom bit of the address. */
2101 arm_trampoline_adjust_address (rtx addr
)
2104 addr
= expand_simple_binop (Pmode
, IOR
, addr
, const1_rtx
,
2105 NULL
, 0, OPTAB_LIB_WIDEN
);
2109 /* Return 1 if it is possible to return using a single instruction.
2110 If SIBLING is non-null, this is a test for a return before a sibling
2111 call. SIBLING is the call insn, so we can examine its register usage. */
2114 use_return_insn (int iscond
, rtx sibling
)
2117 unsigned int func_type
;
2118 unsigned long saved_int_regs
;
2119 unsigned HOST_WIDE_INT stack_adjust
;
2120 arm_stack_offsets
*offsets
;
2122 /* Never use a return instruction before reload has run. */
2123 if (!reload_completed
)
2126 func_type
= arm_current_func_type ();
2128 /* Naked, volatile and stack alignment functions need special
2130 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
2133 /* So do interrupt functions that use the frame pointer and Thumb
2134 interrupt functions. */
2135 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
2138 offsets
= arm_get_frame_offsets ();
2139 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
2141 /* As do variadic functions. */
2142 if (crtl
->args
.pretend_args_size
2143 || cfun
->machine
->uses_anonymous_args
2144 /* Or if the function calls __builtin_eh_return () */
2145 || crtl
->calls_eh_return
2146 /* Or if the function calls alloca */
2147 || cfun
->calls_alloca
2148 /* Or if there is a stack adjustment. However, if the stack pointer
2149 is saved on the stack, we can use a pre-incrementing stack load. */
2150 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
2151 && stack_adjust
== 4)))
2154 saved_int_regs
= offsets
->saved_regs_mask
;
2156 /* Unfortunately, the insn
2158 ldmib sp, {..., sp, ...}
2160 triggers a bug on most SA-110 based devices, such that the stack
2161 pointer won't be correctly restored if the instruction takes a
2162 page fault. We work around this problem by popping r3 along with
2163 the other registers, since that is never slower than executing
2164 another instruction.
2166 We test for !arm_arch5 here, because code for any architecture
2167 less than this could potentially be run on one of the buggy
2169 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
2171 /* Validate that r3 is a call-clobbered register (always true in
2172 the default abi) ... */
2173 if (!call_used_regs
[3])
2176 /* ... that it isn't being used for a return value ... */
2177 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
2180 /* ... or for a tail-call argument ... */
2183 gcc_assert (GET_CODE (sibling
) == CALL_INSN
);
2185 if (find_regno_fusage (sibling
, USE
, 3))
2189 /* ... and that there are no call-saved registers in r0-r2
2190 (always true in the default ABI). */
2191 if (saved_int_regs
& 0x7)
2195 /* Can't be done if interworking with Thumb, and any registers have been
2197 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
2200 /* On StrongARM, conditional returns are expensive if they aren't
2201 taken and multiple registers have been stacked. */
2202 if (iscond
&& arm_tune_strongarm
)
2204 /* Conditional return when just the LR is stored is a simple
2205 conditional-load instruction, that's not expensive. */
2206 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
2210 && arm_pic_register
!= INVALID_REGNUM
2211 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
2215 /* If there are saved registers but the LR isn't saved, then we need
2216 two instructions for the return. */
2217 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
2220 /* Can't be done if any of the FPA regs are pushed,
2221 since this also requires an insn. */
2222 if (TARGET_HARD_FLOAT
&& TARGET_FPA
)
2223 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
2224 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2227 /* Likewise VFP regs. */
2228 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
2229 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
2230 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
2233 if (TARGET_REALLY_IWMMXT
)
2234 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
2235 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
2241 /* Return TRUE if int I is a valid immediate ARM constant. */
2244 const_ok_for_arm (HOST_WIDE_INT i
)
2248 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
2249 be all zero, or all one. */
2250 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
2251 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
2252 != ((~(unsigned HOST_WIDE_INT
) 0)
2253 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
2256 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
2258 /* Fast return for 0 and small values. We must do this for zero, since
2259 the code below can't handle that one case. */
2260 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
2263 /* Get the number of trailing zeros. */
2264 lowbit
= ffs((int) i
) - 1;
2266 /* Only even shifts are allowed in ARM mode so round down to the
2267 nearest even number. */
2271 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
2276 /* Allow rotated constants in ARM mode. */
2278 && ((i
& ~0xc000003f) == 0
2279 || (i
& ~0xf000000f) == 0
2280 || (i
& ~0xfc000003) == 0))
2287 /* Allow repeated pattern. */
2290 if (i
== v
|| i
== (v
| (v
<< 8)))
2297 /* Return true if I is a valid constant for the operation CODE. */
2299 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
2301 if (const_ok_for_arm (i
))
2325 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
2327 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
2333 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2337 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
2344 /* Emit a sequence of insns to handle a large constant.
2345 CODE is the code of the operation required, it can be any of SET, PLUS,
2346 IOR, AND, XOR, MINUS;
2347 MODE is the mode in which the operation is being performed;
2348 VAL is the integer to operate on;
2349 SOURCE is the other operand (a register, or a null-pointer for SET);
2350 SUBTARGETS means it is safe to create scratch registers if that will
2351 either produce a simpler sequence, or we will want to cse the values.
2352 Return value is the number of insns emitted. */
2354 /* ??? Tweak this for thumb2. */
2356 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
2357 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
2361 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
2362 cond
= COND_EXEC_TEST (PATTERN (insn
));
2366 if (subtargets
|| code
== SET
2367 || (GET_CODE (target
) == REG
&& GET_CODE (source
) == REG
2368 && REGNO (target
) != REGNO (source
)))
2370 /* After arm_reorg has been called, we can't fix up expensive
2371 constants by pushing them into memory so we must synthesize
2372 them in-line, regardless of the cost. This is only likely to
2373 be more costly on chips that have load delay slots and we are
2374 compiling without running the scheduler (so no splitting
2375 occurred before the final instruction emission).
2377 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
2379 if (!after_arm_reorg
2381 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
2383 > (arm_constant_limit (optimize_function_for_size_p (cfun
))
2388 /* Currently SET is the only monadic value for CODE, all
2389 the rest are diadic. */
2390 if (TARGET_USE_MOVT
)
2391 arm_emit_movpair (target
, GEN_INT (val
));
2393 emit_set_insn (target
, GEN_INT (val
));
2399 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
2401 if (TARGET_USE_MOVT
)
2402 arm_emit_movpair (temp
, GEN_INT (val
));
2404 emit_set_insn (temp
, GEN_INT (val
));
2406 /* For MINUS, the value is subtracted from, since we never
2407 have subtraction of a constant. */
2409 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
2411 emit_set_insn (target
,
2412 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
2418 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
2422 /* Return the number of instructions required to synthesize the given
2423 constant, if we start emitting them from bit-position I. */
2425 count_insns_for_constant (HOST_WIDE_INT remainder
, int i
)
2427 HOST_WIDE_INT temp1
;
2428 int step_size
= TARGET_ARM
? 2 : 1;
2431 gcc_assert (TARGET_ARM
|| i
== 0);
2439 if (remainder
& (((1 << step_size
) - 1) << (i
- step_size
)))
2444 temp1
= remainder
& ((0x0ff << end
)
2445 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
2446 remainder
&= ~temp1
;
2451 } while (remainder
);
2456 find_best_start (unsigned HOST_WIDE_INT remainder
)
2458 int best_consecutive_zeros
= 0;
2462 /* If we aren't targetting ARM, the best place to start is always at
2467 for (i
= 0; i
< 32; i
+= 2)
2469 int consecutive_zeros
= 0;
2471 if (!(remainder
& (3 << i
)))
2473 while ((i
< 32) && !(remainder
& (3 << i
)))
2475 consecutive_zeros
+= 2;
2478 if (consecutive_zeros
> best_consecutive_zeros
)
2480 best_consecutive_zeros
= consecutive_zeros
;
2481 best_start
= i
- consecutive_zeros
;
2487 /* So long as it won't require any more insns to do so, it's
2488 desirable to emit a small constant (in bits 0...9) in the last
2489 insn. This way there is more chance that it can be combined with
2490 a later addressing insn to form a pre-indexed load or store
2491 operation. Consider:
2493 *((volatile int *)0xe0000100) = 1;
2494 *((volatile int *)0xe0000110) = 2;
2496 We want this to wind up as:
2500 str rB, [rA, #0x100]
2502 str rB, [rA, #0x110]
2504 rather than having to synthesize both large constants from scratch.
2506 Therefore, we calculate how many insns would be required to emit
2507 the constant starting from `best_start', and also starting from
2508 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2509 yield a shorter sequence, we may as well use zero. */
2511 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < remainder
)
2512 && (count_insns_for_constant (remainder
, 0) <=
2513 count_insns_for_constant (remainder
, best_start
)))
2519 /* Emit an instruction with the indicated PATTERN. If COND is
2520 non-NULL, conditionalize the execution of the instruction on COND
2524 emit_constant_insn (rtx cond
, rtx pattern
)
2527 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
2528 emit_insn (pattern
);
2531 /* As above, but extra parameter GENERATE which, if clear, suppresses
2533 /* ??? This needs more work for thumb2. */
2536 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
2537 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
2542 int final_invert
= 0;
2543 int can_negate_initial
= 0;
2545 int num_bits_set
= 0;
2546 int set_sign_bit_copies
= 0;
2547 int clear_sign_bit_copies
= 0;
2548 int clear_zero_bit_copies
= 0;
2549 int set_zero_bit_copies
= 0;
2551 unsigned HOST_WIDE_INT temp1
, temp2
;
2552 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
2553 int step_size
= TARGET_ARM
? 2 : 1;
2555 /* Find out which operations are safe for a given CODE. Also do a quick
2556 check for degenerate cases; these can occur when DImode operations
2567 can_negate_initial
= 1;
2571 if (remainder
== 0xffffffff)
2574 emit_constant_insn (cond
,
2575 gen_rtx_SET (VOIDmode
, target
,
2576 GEN_INT (ARM_SIGN_EXTEND (val
))));
2582 if (reload_completed
&& rtx_equal_p (target
, source
))
2586 emit_constant_insn (cond
,
2587 gen_rtx_SET (VOIDmode
, target
, source
));
2599 emit_constant_insn (cond
,
2600 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
2603 if (remainder
== 0xffffffff)
2605 if (reload_completed
&& rtx_equal_p (target
, source
))
2608 emit_constant_insn (cond
,
2609 gen_rtx_SET (VOIDmode
, target
, source
));
2618 if (reload_completed
&& rtx_equal_p (target
, source
))
2621 emit_constant_insn (cond
,
2622 gen_rtx_SET (VOIDmode
, target
, source
));
2626 if (remainder
== 0xffffffff)
2629 emit_constant_insn (cond
,
2630 gen_rtx_SET (VOIDmode
, target
,
2631 gen_rtx_NOT (mode
, source
)));
2637 /* We treat MINUS as (val - source), since (source - val) is always
2638 passed as (source + (-val)). */
2642 emit_constant_insn (cond
,
2643 gen_rtx_SET (VOIDmode
, target
,
2644 gen_rtx_NEG (mode
, source
)));
2647 if (const_ok_for_arm (val
))
2650 emit_constant_insn (cond
,
2651 gen_rtx_SET (VOIDmode
, target
,
2652 gen_rtx_MINUS (mode
, GEN_INT (val
),
2664 /* If we can do it in one insn get out quickly. */
2665 if (const_ok_for_arm (val
)
2666 || (can_negate_initial
&& const_ok_for_arm (-val
))
2667 || (can_invert
&& const_ok_for_arm (~val
)))
2670 emit_constant_insn (cond
,
2671 gen_rtx_SET (VOIDmode
, target
,
2673 ? gen_rtx_fmt_ee (code
, mode
, source
,
2679 /* Calculate a few attributes that may be useful for specific
2681 /* Count number of leading zeros. */
2682 for (i
= 31; i
>= 0; i
--)
2684 if ((remainder
& (1 << i
)) == 0)
2685 clear_sign_bit_copies
++;
2690 /* Count number of leading 1's. */
2691 for (i
= 31; i
>= 0; i
--)
2693 if ((remainder
& (1 << i
)) != 0)
2694 set_sign_bit_copies
++;
2699 /* Count number of trailing zero's. */
2700 for (i
= 0; i
<= 31; i
++)
2702 if ((remainder
& (1 << i
)) == 0)
2703 clear_zero_bit_copies
++;
2708 /* Count number of trailing 1's. */
2709 for (i
= 0; i
<= 31; i
++)
2711 if ((remainder
& (1 << i
)) != 0)
2712 set_zero_bit_copies
++;
2720 /* See if we can use movw. */
2721 if (arm_arch_thumb2
&& (remainder
& 0xffff0000) == 0)
2724 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
2729 /* See if we can do this by sign_extending a constant that is known
2730 to be negative. This is a good, way of doing it, since the shift
2731 may well merge into a subsequent insn. */
2732 if (set_sign_bit_copies
> 1)
2734 if (const_ok_for_arm
2735 (temp1
= ARM_SIGN_EXTEND (remainder
2736 << (set_sign_bit_copies
- 1))))
2740 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2741 emit_constant_insn (cond
,
2742 gen_rtx_SET (VOIDmode
, new_src
,
2744 emit_constant_insn (cond
,
2745 gen_ashrsi3 (target
, new_src
,
2746 GEN_INT (set_sign_bit_copies
- 1)));
2750 /* For an inverted constant, we will need to set the low bits,
2751 these will be shifted out of harm's way. */
2752 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
2753 if (const_ok_for_arm (~temp1
))
2757 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2758 emit_constant_insn (cond
,
2759 gen_rtx_SET (VOIDmode
, new_src
,
2761 emit_constant_insn (cond
,
2762 gen_ashrsi3 (target
, new_src
,
2763 GEN_INT (set_sign_bit_copies
- 1)));
2769 /* See if we can calculate the value as the difference between two
2770 valid immediates. */
2771 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
2773 int topshift
= clear_sign_bit_copies
& ~1;
2775 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
2776 & (0xff000000 >> topshift
));
2778 /* If temp1 is zero, then that means the 9 most significant
2779 bits of remainder were 1 and we've caused it to overflow.
2780 When topshift is 0 we don't need to do anything since we
2781 can borrow from 'bit 32'. */
2782 if (temp1
== 0 && topshift
!= 0)
2783 temp1
= 0x80000000 >> (topshift
- 1);
2785 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
2787 if (const_ok_for_arm (temp2
))
2791 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2792 emit_constant_insn (cond
,
2793 gen_rtx_SET (VOIDmode
, new_src
,
2795 emit_constant_insn (cond
,
2796 gen_addsi3 (target
, new_src
,
2804 /* See if we can generate this by setting the bottom (or the top)
2805 16 bits, and then shifting these into the other half of the
2806 word. We only look for the simplest cases, to do more would cost
2807 too much. Be careful, however, not to generate this when the
2808 alternative would take fewer insns. */
2809 if (val
& 0xffff0000)
2811 temp1
= remainder
& 0xffff0000;
2812 temp2
= remainder
& 0x0000ffff;
2814 /* Overlaps outside this range are best done using other methods. */
2815 for (i
= 9; i
< 24; i
++)
2817 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
2818 && !const_ok_for_arm (temp2
))
2820 rtx new_src
= (subtargets
2821 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2823 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
2824 source
, subtargets
, generate
);
2832 gen_rtx_ASHIFT (mode
, source
,
2839 /* Don't duplicate cases already considered. */
2840 for (i
= 17; i
< 24; i
++)
2842 if (((temp1
| (temp1
>> i
)) == remainder
)
2843 && !const_ok_for_arm (temp1
))
2845 rtx new_src
= (subtargets
2846 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2848 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
2849 source
, subtargets
, generate
);
2854 gen_rtx_SET (VOIDmode
, target
,
2857 gen_rtx_LSHIFTRT (mode
, source
,
2868 /* If we have IOR or XOR, and the constant can be loaded in a
2869 single instruction, and we can find a temporary to put it in,
2870 then this can be done in two instructions instead of 3-4. */
2872 /* TARGET can't be NULL if SUBTARGETS is 0 */
2873 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
2875 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
2879 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2881 emit_constant_insn (cond
,
2882 gen_rtx_SET (VOIDmode
, sub
,
2884 emit_constant_insn (cond
,
2885 gen_rtx_SET (VOIDmode
, target
,
2886 gen_rtx_fmt_ee (code
, mode
,
2897 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
2898 and the remainder 0s for e.g. 0xfff00000)
2899 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
2901 This can be done in 2 instructions by using shifts with mov or mvn.
2906 mvn r0, r0, lsr #12 */
2907 if (set_sign_bit_copies
> 8
2908 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
2912 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2913 rtx shift
= GEN_INT (set_sign_bit_copies
);
2917 gen_rtx_SET (VOIDmode
, sub
,
2919 gen_rtx_ASHIFT (mode
,
2924 gen_rtx_SET (VOIDmode
, target
,
2926 gen_rtx_LSHIFTRT (mode
, sub
,
2933 x = y | constant (which has set_zero_bit_copies number of trailing ones).
2935 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
2937 For eg. r0 = r0 | 0xfff
2942 if (set_zero_bit_copies
> 8
2943 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
2947 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2948 rtx shift
= GEN_INT (set_zero_bit_copies
);
2952 gen_rtx_SET (VOIDmode
, sub
,
2954 gen_rtx_LSHIFTRT (mode
,
2959 gen_rtx_SET (VOIDmode
, target
,
2961 gen_rtx_ASHIFT (mode
, sub
,
2967 /* This will never be reached for Thumb2 because orn is a valid
2968 instruction. This is for Thumb1 and the ARM 32 bit cases.
2970 x = y | constant (such that ~constant is a valid constant)
2972 x = ~(~y & ~constant).
2974 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
2978 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2979 emit_constant_insn (cond
,
2980 gen_rtx_SET (VOIDmode
, sub
,
2981 gen_rtx_NOT (mode
, source
)));
2984 sub
= gen_reg_rtx (mode
);
2985 emit_constant_insn (cond
,
2986 gen_rtx_SET (VOIDmode
, sub
,
2987 gen_rtx_AND (mode
, source
,
2989 emit_constant_insn (cond
,
2990 gen_rtx_SET (VOIDmode
, target
,
2991 gen_rtx_NOT (mode
, sub
)));
2998 /* See if two shifts will do 2 or more insn's worth of work. */
2999 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
3001 HOST_WIDE_INT shift_mask
= ((0xffffffff
3002 << (32 - clear_sign_bit_copies
))
3005 if ((remainder
| shift_mask
) != 0xffffffff)
3009 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3010 insns
= arm_gen_constant (AND
, mode
, cond
,
3011 remainder
| shift_mask
,
3012 new_src
, source
, subtargets
, 1);
3017 rtx targ
= subtargets
? NULL_RTX
: target
;
3018 insns
= arm_gen_constant (AND
, mode
, cond
,
3019 remainder
| shift_mask
,
3020 targ
, source
, subtargets
, 0);
3026 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3027 rtx shift
= GEN_INT (clear_sign_bit_copies
);
3029 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
3030 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
3036 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
3038 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
3040 if ((remainder
| shift_mask
) != 0xffffffff)
3044 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3046 insns
= arm_gen_constant (AND
, mode
, cond
,
3047 remainder
| shift_mask
,
3048 new_src
, source
, subtargets
, 1);
3053 rtx targ
= subtargets
? NULL_RTX
: target
;
3055 insns
= arm_gen_constant (AND
, mode
, cond
,
3056 remainder
| shift_mask
,
3057 targ
, source
, subtargets
, 0);
3063 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
3064 rtx shift
= GEN_INT (clear_zero_bit_copies
);
3066 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
3067 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
3079 for (i
= 0; i
< 32; i
++)
3080 if (remainder
& (1 << i
))
3084 || (code
!= IOR
&& can_invert
&& num_bits_set
> 16))
3085 remainder
^= 0xffffffff;
3086 else if (code
== PLUS
&& num_bits_set
> 16)
3087 remainder
= (-remainder
) & 0xffffffff;
3089 /* For XOR, if more than half the bits are set and there's a sequence
3090 of more than 8 consecutive ones in the pattern then we can XOR by the
3091 inverted constant and then invert the final result; this may save an
3092 instruction and might also lead to the final mvn being merged with
3093 some other operation. */
3094 else if (code
== XOR
&& num_bits_set
> 16
3095 && (count_insns_for_constant (remainder
^ 0xffffffff,
3097 (remainder
^ 0xffffffff))
3098 < count_insns_for_constant (remainder
,
3099 find_best_start (remainder
))))
3101 remainder
^= 0xffffffff;
3110 /* Now try and find a way of doing the job in either two or three
3112 We start by looking for the largest block of zeros that are aligned on
3113 a 2-bit boundary, we then fill up the temps, wrapping around to the
3114 top of the word when we drop off the bottom.
3115 In the worst case this code should produce no more than four insns.
3116 Thumb-2 constants are shifted, not rotated, so the MSB is always the
3117 best place to start. */
3119 /* ??? Use thumb2 replicated constants when the high and low halfwords are
3122 /* Now start emitting the insns. */
3123 i
= find_best_start (remainder
);
3130 if (remainder
& (3 << (i
- 2)))
3135 temp1
= remainder
& ((0x0ff << end
)
3136 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
3137 remainder
&= ~temp1
;
3141 rtx new_src
, temp1_rtx
;
3143 if (code
== SET
|| code
== MINUS
)
3145 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
3146 if (can_invert
&& code
!= MINUS
)
3151 if ((final_invert
|| remainder
) && subtargets
)
3152 new_src
= gen_reg_rtx (mode
);
3157 else if (can_negate
)
3161 temp1
= trunc_int_for_mode (temp1
, mode
);
3162 temp1_rtx
= GEN_INT (temp1
);
3166 else if (code
== MINUS
)
3167 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
3169 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
3171 emit_constant_insn (cond
,
3172 gen_rtx_SET (VOIDmode
, new_src
,
3182 else if (code
== MINUS
)
3188 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
3198 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
3199 gen_rtx_NOT (mode
, source
)));
3206 /* Canonicalize a comparison so that we are more likely to recognize it.
3207 This can be done for a few constant compares, where we can make the
3208 immediate value easier to load. */
3211 arm_canonicalize_comparison (enum rtx_code code
, enum machine_mode mode
,
3214 unsigned HOST_WIDE_INT i
= INTVAL (*op1
);
3215 unsigned HOST_WIDE_INT maxval
;
3216 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
3227 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3229 *op1
= GEN_INT (i
+ 1);
3230 return code
== GT
? GE
: LT
;
3237 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3239 *op1
= GEN_INT (i
- 1);
3240 return code
== GE
? GT
: LE
;
3246 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
3247 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
3249 *op1
= GEN_INT (i
+ 1);
3250 return code
== GTU
? GEU
: LTU
;
3257 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
3259 *op1
= GEN_INT (i
- 1);
3260 return code
== GEU
? GTU
: LEU
;
3272 /* Define how to find the value returned by a function. */
3275 arm_function_value(const_tree type
, const_tree func
,
3276 bool outgoing ATTRIBUTE_UNUSED
)
3278 enum machine_mode mode
;
3279 int unsignedp ATTRIBUTE_UNUSED
;
3280 rtx r ATTRIBUTE_UNUSED
;
3282 mode
= TYPE_MODE (type
);
3284 if (TARGET_AAPCS_BASED
)
3285 return aapcs_allocate_return_reg (mode
, type
, func
);
3287 /* Promote integer types. */
3288 if (INTEGRAL_TYPE_P (type
))
3289 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
3291 /* Promotes small structs returned in a register to full-word size
3292 for big-endian AAPCS. */
3293 if (arm_return_in_msb (type
))
3295 HOST_WIDE_INT size
= int_size_in_bytes (type
);
3296 if (size
% UNITS_PER_WORD
!= 0)
3298 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
3299 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
3303 return LIBCALL_VALUE (mode
);
3307 libcall_eq (const void *p1
, const void *p2
)
3309 return rtx_equal_p ((const_rtx
) p1
, (const_rtx
) p2
);
3313 libcall_hash (const void *p1
)
3315 return hash_rtx ((const_rtx
) p1
, VOIDmode
, NULL
, NULL
, FALSE
);
3319 add_libcall (htab_t htab
, rtx libcall
)
3321 *htab_find_slot (htab
, libcall
, INSERT
) = libcall
;
3325 arm_libcall_uses_aapcs_base (const_rtx libcall
)
3327 static bool init_done
= false;
3328 static htab_t libcall_htab
;
3334 libcall_htab
= htab_create (31, libcall_hash
, libcall_eq
,
3336 add_libcall (libcall_htab
,
3337 convert_optab_libfunc (sfloat_optab
, SFmode
, SImode
));
3338 add_libcall (libcall_htab
,
3339 convert_optab_libfunc (sfloat_optab
, DFmode
, SImode
));
3340 add_libcall (libcall_htab
,
3341 convert_optab_libfunc (sfloat_optab
, SFmode
, DImode
));
3342 add_libcall (libcall_htab
,
3343 convert_optab_libfunc (sfloat_optab
, DFmode
, DImode
));
3345 add_libcall (libcall_htab
,
3346 convert_optab_libfunc (ufloat_optab
, SFmode
, SImode
));
3347 add_libcall (libcall_htab
,
3348 convert_optab_libfunc (ufloat_optab
, DFmode
, SImode
));
3349 add_libcall (libcall_htab
,
3350 convert_optab_libfunc (ufloat_optab
, SFmode
, DImode
));
3351 add_libcall (libcall_htab
,
3352 convert_optab_libfunc (ufloat_optab
, DFmode
, DImode
));
3354 add_libcall (libcall_htab
,
3355 convert_optab_libfunc (sext_optab
, SFmode
, HFmode
));
3356 add_libcall (libcall_htab
,
3357 convert_optab_libfunc (trunc_optab
, HFmode
, SFmode
));
3358 add_libcall (libcall_htab
,
3359 convert_optab_libfunc (sfix_optab
, DImode
, DFmode
));
3360 add_libcall (libcall_htab
,
3361 convert_optab_libfunc (ufix_optab
, DImode
, DFmode
));
3362 add_libcall (libcall_htab
,
3363 convert_optab_libfunc (sfix_optab
, DImode
, SFmode
));
3364 add_libcall (libcall_htab
,
3365 convert_optab_libfunc (ufix_optab
, DImode
, SFmode
));
3368 return libcall
&& htab_find (libcall_htab
, libcall
) != NULL
;
3372 arm_libcall_value (enum machine_mode mode
, const_rtx libcall
)
3374 if (TARGET_AAPCS_BASED
&& arm_pcs_default
!= ARM_PCS_AAPCS
3375 && GET_MODE_CLASS (mode
) == MODE_FLOAT
)
3377 /* The following libcalls return their result in integer registers,
3378 even though they return a floating point value. */
3379 if (arm_libcall_uses_aapcs_base (libcall
))
3380 return gen_rtx_REG (mode
, ARG_REGISTER(1));
3384 return LIBCALL_VALUE (mode
);
3387 /* Determine the amount of memory needed to store the possible return
3388 registers of an untyped call. */
3390 arm_apply_result_size (void)
3396 if (TARGET_HARD_FLOAT_ABI
)
3402 if (TARGET_MAVERICK
)
3405 if (TARGET_IWMMXT_ABI
)
3412 /* Decide whether TYPE should be returned in memory (true)
3413 or in a register (false). FNTYPE is the type of the function making
3416 arm_return_in_memory (const_tree type
, const_tree fntype
)
3420 size
= int_size_in_bytes (type
); /* Negative if not fixed size. */
3422 if (TARGET_AAPCS_BASED
)
3424 /* Simple, non-aggregate types (ie not including vectors and
3425 complex) are always returned in a register (or registers).
3426 We don't care about which register here, so we can short-cut
3427 some of the detail. */
3428 if (!AGGREGATE_TYPE_P (type
)
3429 && TREE_CODE (type
) != VECTOR_TYPE
3430 && TREE_CODE (type
) != COMPLEX_TYPE
)
3433 /* Any return value that is no larger than one word can be
3435 if (((unsigned HOST_WIDE_INT
) size
) <= UNITS_PER_WORD
)
3438 /* Check any available co-processors to see if they accept the
3439 type as a register candidate (VFP, for example, can return
3440 some aggregates in consecutive registers). These aren't
3441 available if the call is variadic. */
3442 if (aapcs_select_return_coproc (type
, fntype
) >= 0)
3445 /* Vector values should be returned using ARM registers, not
3446 memory (unless they're over 16 bytes, which will break since
3447 we only have four call-clobbered registers to play with). */
3448 if (TREE_CODE (type
) == VECTOR_TYPE
)
3449 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3451 /* The rest go in memory. */
3455 if (TREE_CODE (type
) == VECTOR_TYPE
)
3456 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
3458 if (!AGGREGATE_TYPE_P (type
) &&
3459 (TREE_CODE (type
) != VECTOR_TYPE
))
3460 /* All simple types are returned in registers. */
3463 if (arm_abi
!= ARM_ABI_APCS
)
3465 /* ATPCS and later return aggregate types in memory only if they are
3466 larger than a word (or are variable size). */
3467 return (size
< 0 || size
> UNITS_PER_WORD
);
3470 /* For the arm-wince targets we choose to be compatible with Microsoft's
3471 ARM and Thumb compilers, which always return aggregates in memory. */
3473 /* All structures/unions bigger than one word are returned in memory.
3474 Also catch the case where int_size_in_bytes returns -1. In this case
3475 the aggregate is either huge or of variable size, and in either case
3476 we will want to return it via memory and not in a register. */
3477 if (size
< 0 || size
> UNITS_PER_WORD
)
3480 if (TREE_CODE (type
) == RECORD_TYPE
)
3484 /* For a struct the APCS says that we only return in a register
3485 if the type is 'integer like' and every addressable element
3486 has an offset of zero. For practical purposes this means
3487 that the structure can have at most one non bit-field element
3488 and that this element must be the first one in the structure. */
3490 /* Find the first field, ignoring non FIELD_DECL things which will
3491 have been created by C++. */
3492 for (field
= TYPE_FIELDS (type
);
3493 field
&& TREE_CODE (field
) != FIELD_DECL
;
3494 field
= TREE_CHAIN (field
))
3498 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3500 /* Check that the first field is valid for returning in a register. */
3502 /* ... Floats are not allowed */
3503 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3506 /* ... Aggregates that are not themselves valid for returning in
3507 a register are not allowed. */
3508 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3511 /* Now check the remaining fields, if any. Only bitfields are allowed,
3512 since they are not addressable. */
3513 for (field
= TREE_CHAIN (field
);
3515 field
= TREE_CHAIN (field
))
3517 if (TREE_CODE (field
) != FIELD_DECL
)
3520 if (!DECL_BIT_FIELD_TYPE (field
))
3527 if (TREE_CODE (type
) == UNION_TYPE
)
3531 /* Unions can be returned in registers if every element is
3532 integral, or can be returned in an integer register. */
3533 for (field
= TYPE_FIELDS (type
);
3535 field
= TREE_CHAIN (field
))
3537 if (TREE_CODE (field
) != FIELD_DECL
)
3540 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
3543 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
3549 #endif /* not ARM_WINCE */
3551 /* Return all other types in memory. */
3555 /* Indicate whether or not words of a double are in big-endian order. */
3558 arm_float_words_big_endian (void)
3560 if (TARGET_MAVERICK
)
3563 /* For FPA, float words are always big-endian. For VFP, floats words
3564 follow the memory system mode. */
3572 return (TARGET_BIG_END
? 1 : 0);
3577 const struct pcs_attribute_arg
3581 } pcs_attribute_args
[] =
3583 {"aapcs", ARM_PCS_AAPCS
},
3584 {"aapcs-vfp", ARM_PCS_AAPCS_VFP
},
3586 /* We could recognize these, but changes would be needed elsewhere
3587 * to implement them. */
3588 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT
},
3589 {"atpcs", ARM_PCS_ATPCS
},
3590 {"apcs", ARM_PCS_APCS
},
3592 {NULL
, ARM_PCS_UNKNOWN
}
3596 arm_pcs_from_attribute (tree attr
)
3598 const struct pcs_attribute_arg
*ptr
;
3601 /* Get the value of the argument. */
3602 if (TREE_VALUE (attr
) == NULL_TREE
3603 || TREE_CODE (TREE_VALUE (attr
)) != STRING_CST
)
3604 return ARM_PCS_UNKNOWN
;
3606 arg
= TREE_STRING_POINTER (TREE_VALUE (attr
));
3608 /* Check it against the list of known arguments. */
3609 for (ptr
= pcs_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
3610 if (streq (arg
, ptr
->arg
))
3613 /* An unrecognized interrupt type. */
3614 return ARM_PCS_UNKNOWN
;
3617 /* Get the PCS variant to use for this call. TYPE is the function's type
3618 specification, DECL is the specific declartion. DECL may be null if
3619 the call could be indirect or if this is a library call. */
3621 arm_get_pcs_model (const_tree type
, const_tree decl
)
3623 bool user_convention
= false;
3624 enum arm_pcs user_pcs
= arm_pcs_default
;
3629 attr
= lookup_attribute ("pcs", TYPE_ATTRIBUTES (type
));
3632 user_pcs
= arm_pcs_from_attribute (TREE_VALUE (attr
));
3633 user_convention
= true;
3636 if (TARGET_AAPCS_BASED
)
3638 /* Detect varargs functions. These always use the base rules
3639 (no argument is ever a candidate for a co-processor
3641 bool base_rules
= (TYPE_ARG_TYPES (type
) != 0
3642 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type
)))
3643 != void_type_node
));
3645 if (user_convention
)
3647 if (user_pcs
> ARM_PCS_AAPCS_LOCAL
)
3648 sorry ("Non-AAPCS derived PCS variant");
3649 else if (base_rules
&& user_pcs
!= ARM_PCS_AAPCS
)
3650 error ("Variadic functions must use the base AAPCS variant");
3654 return ARM_PCS_AAPCS
;
3655 else if (user_convention
)
3657 else if (decl
&& flag_unit_at_a_time
)
3659 /* Local functions never leak outside this compilation unit,
3660 so we are free to use whatever conventions are
3662 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
3663 struct cgraph_local_info
*i
= cgraph_local_info (CONST_CAST_TREE(decl
));
3665 return ARM_PCS_AAPCS_LOCAL
;
3668 else if (user_convention
&& user_pcs
!= arm_pcs_default
)
3669 sorry ("PCS variant");
3671 /* For everything else we use the target's default. */
3672 return arm_pcs_default
;
3677 aapcs_vfp_cum_init (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
3678 const_tree fntype ATTRIBUTE_UNUSED
,
3679 rtx libcall ATTRIBUTE_UNUSED
,
3680 const_tree fndecl ATTRIBUTE_UNUSED
)
3682 /* Record the unallocated VFP registers. */
3683 pcum
->aapcs_vfp_regs_free
= (1 << NUM_VFP_ARG_REGS
) - 1;
3684 pcum
->aapcs_vfp_reg_alloc
= 0;
3687 /* Walk down the type tree of TYPE counting consecutive base elements.
3688 If *MODEP is VOIDmode, then set it to the first valid floating point
3689 type. If a non-floating point type is found, or if a floating point
3690 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
3691 otherwise return the count in the sub-tree. */
3693 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
3695 enum machine_mode mode
;
3698 switch (TREE_CODE (type
))
3701 mode
= TYPE_MODE (type
);
3702 if (mode
!= DFmode
&& mode
!= SFmode
)
3705 if (*modep
== VOIDmode
)
3714 mode
= TYPE_MODE (TREE_TYPE (type
));
3715 if (mode
!= DFmode
&& mode
!= SFmode
)
3718 if (*modep
== VOIDmode
)
3727 /* Use V2SImode and V4SImode as representatives of all 64-bit
3728 and 128-bit vector types, whether or not those modes are
3729 supported with the present options. */
3730 size
= int_size_in_bytes (type
);
3743 if (*modep
== VOIDmode
)
3746 /* Vector modes are considered to be opaque: two vectors are
3747 equivalent for the purposes of being homogeneous aggregates
3748 if they are the same size. */
3757 tree index
= TYPE_DOMAIN (type
);
3759 /* Can't handle incomplete types. */
3760 if (!COMPLETE_TYPE_P(type
))
3763 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
3766 || !TYPE_MAX_VALUE (index
)
3767 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
3768 || !TYPE_MIN_VALUE (index
)
3769 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
3773 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
3774 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
3776 /* There must be no padding. */
3777 if (!host_integerp (TYPE_SIZE (type
), 1)
3778 || (tree_low_cst (TYPE_SIZE (type
), 1)
3779 != count
* GET_MODE_BITSIZE (*modep
)))
3791 /* Can't handle incomplete types. */
3792 if (!COMPLETE_TYPE_P(type
))
3795 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3797 if (TREE_CODE (field
) != FIELD_DECL
)
3800 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
3806 /* There must be no padding. */
3807 if (!host_integerp (TYPE_SIZE (type
), 1)
3808 || (tree_low_cst (TYPE_SIZE (type
), 1)
3809 != count
* GET_MODE_BITSIZE (*modep
)))
3816 case QUAL_UNION_TYPE
:
3818 /* These aren't very interesting except in a degenerate case. */
3823 /* Can't handle incomplete types. */
3824 if (!COMPLETE_TYPE_P(type
))
3827 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
3829 if (TREE_CODE (field
) != FIELD_DECL
)
3832 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
3835 count
= count
> sub_count
? count
: sub_count
;
3838 /* There must be no padding. */
3839 if (!host_integerp (TYPE_SIZE (type
), 1)
3840 || (tree_low_cst (TYPE_SIZE (type
), 1)
3841 != count
* GET_MODE_BITSIZE (*modep
)))
3854 /* Return true if PCS_VARIANT should use VFP registers. */
3856 use_vfp_abi (enum arm_pcs pcs_variant
, bool is_double
)
3858 if (pcs_variant
== ARM_PCS_AAPCS_VFP
)
3861 if (pcs_variant
!= ARM_PCS_AAPCS_LOCAL
)
3864 return (TARGET_32BIT
&& TARGET_VFP
&& TARGET_HARD_FLOAT
&&
3865 (TARGET_VFP_DOUBLE
|| !is_double
));
3869 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant
,
3870 enum machine_mode mode
, const_tree type
,
3871 enum machine_mode
*base_mode
, int *count
)
3873 enum machine_mode new_mode
= VOIDmode
;
3875 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
3876 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
3877 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
3882 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
3885 new_mode
= (mode
== DCmode
? DFmode
: SFmode
);
3887 else if (type
&& (mode
== BLKmode
|| TREE_CODE (type
) == VECTOR_TYPE
))
3889 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
3891 if (ag_count
> 0 && ag_count
<= 4)
3900 if (!use_vfp_abi (pcs_variant
, ARM_NUM_REGS (new_mode
) > 1))
3903 *base_mode
= new_mode
;
3908 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant
,
3909 enum machine_mode mode
, const_tree type
)
3911 int count ATTRIBUTE_UNUSED
;
3912 enum machine_mode ag_mode ATTRIBUTE_UNUSED
;
3914 if (!use_vfp_abi (pcs_variant
, false))
3916 return aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
3921 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
3924 if (!use_vfp_abi (pcum
->pcs_variant
, false))
3927 return aapcs_vfp_is_call_or_return_candidate (pcum
->pcs_variant
, mode
, type
,
3928 &pcum
->aapcs_vfp_rmode
,
3929 &pcum
->aapcs_vfp_rcount
);
3933 aapcs_vfp_allocate (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
3934 const_tree type ATTRIBUTE_UNUSED
)
3936 int shift
= GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
) / GET_MODE_SIZE (SFmode
);
3937 unsigned mask
= (1 << (shift
* pcum
->aapcs_vfp_rcount
)) - 1;
3940 for (regno
= 0; regno
< NUM_VFP_ARG_REGS
; regno
+= shift
)
3941 if (((pcum
->aapcs_vfp_regs_free
>> regno
) & mask
) == mask
)
3943 pcum
->aapcs_vfp_reg_alloc
= mask
<< regno
;
3944 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
3947 int rcount
= pcum
->aapcs_vfp_rcount
;
3949 enum machine_mode rmode
= pcum
->aapcs_vfp_rmode
;
3953 /* Avoid using unsupported vector modes. */
3954 if (rmode
== V2SImode
)
3956 else if (rmode
== V4SImode
)
3963 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (rcount
));
3964 for (i
= 0; i
< rcount
; i
++)
3966 rtx tmp
= gen_rtx_REG (rmode
,
3967 FIRST_VFP_REGNUM
+ regno
+ i
* rshift
);
3968 tmp
= gen_rtx_EXPR_LIST
3970 GEN_INT (i
* GET_MODE_SIZE (rmode
)));
3971 XVECEXP (par
, 0, i
) = tmp
;
3974 pcum
->aapcs_reg
= par
;
3977 pcum
->aapcs_reg
= gen_rtx_REG (mode
, FIRST_VFP_REGNUM
+ regno
);
3984 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED
,
3985 enum machine_mode mode
,
3986 const_tree type ATTRIBUTE_UNUSED
)
3988 if (!use_vfp_abi (pcs_variant
, false))
3991 if (mode
== BLKmode
|| (mode
== TImode
&& !TARGET_NEON
))
3994 enum machine_mode ag_mode
;
3999 aapcs_vfp_is_call_or_return_candidate (pcs_variant
, mode
, type
,
4004 if (ag_mode
== V2SImode
)
4006 else if (ag_mode
== V4SImode
)
4012 shift
= GET_MODE_SIZE(ag_mode
) / GET_MODE_SIZE(SFmode
);
4013 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
4014 for (i
= 0; i
< count
; i
++)
4016 rtx tmp
= gen_rtx_REG (ag_mode
, FIRST_VFP_REGNUM
+ i
* shift
);
4017 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
4018 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
4019 XVECEXP (par
, 0, i
) = tmp
;
4025 return gen_rtx_REG (mode
, FIRST_VFP_REGNUM
);
4029 aapcs_vfp_advance (CUMULATIVE_ARGS
*pcum ATTRIBUTE_UNUSED
,
4030 enum machine_mode mode ATTRIBUTE_UNUSED
,
4031 const_tree type ATTRIBUTE_UNUSED
)
4033 pcum
->aapcs_vfp_regs_free
&= ~pcum
->aapcs_vfp_reg_alloc
;
4034 pcum
->aapcs_vfp_reg_alloc
= 0;
4038 #define AAPCS_CP(X) \
4040 aapcs_ ## X ## _cum_init, \
4041 aapcs_ ## X ## _is_call_candidate, \
4042 aapcs_ ## X ## _allocate, \
4043 aapcs_ ## X ## _is_return_candidate, \
4044 aapcs_ ## X ## _allocate_return_reg, \
4045 aapcs_ ## X ## _advance \
4048 /* Table of co-processors that can be used to pass arguments in
4049 registers. Idealy no arugment should be a candidate for more than
4050 one co-processor table entry, but the table is processed in order
4051 and stops after the first match. If that entry then fails to put
4052 the argument into a co-processor register, the argument will go on
4056 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
4057 void (*cum_init
) (CUMULATIVE_ARGS
*, const_tree
, rtx
, const_tree
);
4059 /* Return true if an argument of mode MODE (or type TYPE if MODE is
4060 BLKmode) is a candidate for this co-processor's registers; this
4061 function should ignore any position-dependent state in
4062 CUMULATIVE_ARGS and only use call-type dependent information. */
4063 bool (*is_call_candidate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4065 /* Return true if the argument does get a co-processor register; it
4066 should set aapcs_reg to an RTX of the register allocated as is
4067 required for a return from FUNCTION_ARG. */
4068 bool (*allocate
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4070 /* Return true if a result of mode MODE (or type TYPE if MODE is
4071 BLKmode) is can be returned in this co-processor's registers. */
4072 bool (*is_return_candidate
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4074 /* Allocate and return an RTX element to hold the return type of a
4075 call, this routine must not fail and will only be called if
4076 is_return_candidate returned true with the same parameters. */
4077 rtx (*allocate_return_reg
) (enum arm_pcs
, enum machine_mode
, const_tree
);
4079 /* Finish processing this argument and prepare to start processing
4081 void (*advance
) (CUMULATIVE_ARGS
*, enum machine_mode
, const_tree
);
4082 } aapcs_cp_arg_layout
[ARM_NUM_COPROC_SLOTS
] =
4090 aapcs_select_call_coproc (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4095 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4096 if (aapcs_cp_arg_layout
[i
].is_call_candidate (pcum
, mode
, type
))
4103 aapcs_select_return_coproc (const_tree type
, const_tree fntype
)
4105 /* We aren't passed a decl, so we can't check that a call is local.
4106 However, it isn't clear that that would be a win anyway, since it
4107 might limit some tail-calling opportunities. */
4108 enum arm_pcs pcs_variant
;
4112 const_tree fndecl
= NULL_TREE
;
4114 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4117 fntype
= TREE_TYPE (fntype
);
4120 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4123 pcs_variant
= arm_pcs_default
;
4125 if (pcs_variant
!= ARM_PCS_AAPCS
)
4129 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4130 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
,
4139 aapcs_allocate_return_reg (enum machine_mode mode
, const_tree type
,
4142 /* We aren't passed a decl, so we can't check that a call is local.
4143 However, it isn't clear that that would be a win anyway, since it
4144 might limit some tail-calling opportunities. */
4145 enum arm_pcs pcs_variant
;
4146 int unsignedp ATTRIBUTE_UNUSED
;
4150 const_tree fndecl
= NULL_TREE
;
4152 if (TREE_CODE (fntype
) == FUNCTION_DECL
)
4155 fntype
= TREE_TYPE (fntype
);
4158 pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4161 pcs_variant
= arm_pcs_default
;
4163 /* Promote integer types. */
4164 if (type
&& INTEGRAL_TYPE_P (type
))
4165 mode
= arm_promote_function_mode (type
, mode
, &unsignedp
, fntype
, 1);
4167 if (pcs_variant
!= ARM_PCS_AAPCS
)
4171 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4172 if (aapcs_cp_arg_layout
[i
].is_return_candidate (pcs_variant
, mode
,
4174 return aapcs_cp_arg_layout
[i
].allocate_return_reg (pcs_variant
,
4178 /* Promotes small structs returned in a register to full-word size
4179 for big-endian AAPCS. */
4180 if (type
&& arm_return_in_msb (type
))
4182 HOST_WIDE_INT size
= int_size_in_bytes (type
);
4183 if (size
% UNITS_PER_WORD
!= 0)
4185 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
4186 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
4190 return gen_rtx_REG (mode
, R0_REGNUM
);
4194 aapcs_libcall_value (enum machine_mode mode
)
4196 return aapcs_allocate_return_reg (mode
, NULL_TREE
, NULL_TREE
);
4199 /* Lay out a function argument using the AAPCS rules. The rule
4200 numbers referred to here are those in the AAPCS. */
4202 aapcs_layout_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4203 tree type
, int named
)
4208 /* We only need to do this once per argument. */
4209 if (pcum
->aapcs_arg_processed
)
4212 pcum
->aapcs_arg_processed
= true;
4214 /* Special case: if named is false then we are handling an incoming
4215 anonymous argument which is on the stack. */
4219 /* Is this a potential co-processor register candidate? */
4220 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4222 int slot
= aapcs_select_call_coproc (pcum
, mode
, type
);
4223 pcum
->aapcs_cprc_slot
= slot
;
4225 /* We don't have to apply any of the rules from part B of the
4226 preparation phase, these are handled elsewhere in the
4231 /* A Co-processor register candidate goes either in its own
4232 class of registers or on the stack. */
4233 if (!pcum
->aapcs_cprc_failed
[slot
])
4235 /* C1.cp - Try to allocate the argument to co-processor
4237 if (aapcs_cp_arg_layout
[slot
].allocate (pcum
, mode
, type
))
4240 /* C2.cp - Put the argument on the stack and note that we
4241 can't assign any more candidates in this slot. We also
4242 need to note that we have allocated stack space, so that
4243 we won't later try to split a non-cprc candidate between
4244 core registers and the stack. */
4245 pcum
->aapcs_cprc_failed
[slot
] = true;
4246 pcum
->can_split
= false;
4249 /* We didn't get a register, so this argument goes on the
4251 gcc_assert (pcum
->can_split
== false);
4256 /* C3 - For double-word aligned arguments, round the NCRN up to the
4257 next even number. */
4258 ncrn
= pcum
->aapcs_ncrn
;
4259 if ((ncrn
& 1) && arm_needs_doubleword_align (mode
, type
))
4262 nregs
= ARM_NUM_REGS2(mode
, type
);
4264 /* Sigh, this test should really assert that nregs > 0, but a GCC
4265 extension allows empty structs and then gives them empty size; it
4266 then allows such a structure to be passed by value. For some of
4267 the code below we have to pretend that such an argument has
4268 non-zero size so that we 'locate' it correctly either in
4269 registers or on the stack. */
4270 gcc_assert (nregs
>= 0);
4272 nregs2
= nregs
? nregs
: 1;
4274 /* C4 - Argument fits entirely in core registers. */
4275 if (ncrn
+ nregs2
<= NUM_ARG_REGS
)
4277 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4278 pcum
->aapcs_next_ncrn
= ncrn
+ nregs
;
4282 /* C5 - Some core registers left and there are no arguments already
4283 on the stack: split this argument between the remaining core
4284 registers and the stack. */
4285 if (ncrn
< NUM_ARG_REGS
&& pcum
->can_split
)
4287 pcum
->aapcs_reg
= gen_rtx_REG (mode
, ncrn
);
4288 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4289 pcum
->aapcs_partial
= (NUM_ARG_REGS
- ncrn
) * UNITS_PER_WORD
;
4293 /* C6 - NCRN is set to 4. */
4294 pcum
->aapcs_next_ncrn
= NUM_ARG_REGS
;
4296 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
4300 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4301 for a call to a function whose data type is FNTYPE.
4302 For a library call, FNTYPE is NULL. */
4304 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
4306 tree fndecl ATTRIBUTE_UNUSED
)
4308 /* Long call handling. */
4310 pcum
->pcs_variant
= arm_get_pcs_model (fntype
, fndecl
);
4312 pcum
->pcs_variant
= arm_pcs_default
;
4314 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4316 if (arm_libcall_uses_aapcs_base (libname
))
4317 pcum
->pcs_variant
= ARM_PCS_AAPCS
;
4319 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
= 0;
4320 pcum
->aapcs_reg
= NULL_RTX
;
4321 pcum
->aapcs_partial
= 0;
4322 pcum
->aapcs_arg_processed
= false;
4323 pcum
->aapcs_cprc_slot
= -1;
4324 pcum
->can_split
= true;
4326 if (pcum
->pcs_variant
!= ARM_PCS_AAPCS
)
4330 for (i
= 0; i
< ARM_NUM_COPROC_SLOTS
; i
++)
4332 pcum
->aapcs_cprc_failed
[i
] = false;
4333 aapcs_cp_arg_layout
[i
].cum_init (pcum
, fntype
, libname
, fndecl
);
4341 /* On the ARM, the offset starts at 0. */
4343 pcum
->iwmmxt_nregs
= 0;
4344 pcum
->can_split
= true;
4346 /* Varargs vectors are treated the same as long long.
4347 named_count avoids having to change the way arm handles 'named' */
4348 pcum
->named_count
= 0;
4351 if (TARGET_REALLY_IWMMXT
&& fntype
)
4355 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
4357 fn_arg
= TREE_CHAIN (fn_arg
))
4358 pcum
->named_count
+= 1;
4360 if (! pcum
->named_count
)
4361 pcum
->named_count
= INT_MAX
;
4366 /* Return true if mode/type need doubleword alignment. */
4368 arm_needs_doubleword_align (enum machine_mode mode
, tree type
)
4370 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
4371 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
4375 /* Determine where to put an argument to a function.
4376 Value is zero to push the argument on the stack,
4377 or a hard register in which to store the argument.
4379 MODE is the argument's machine mode.
4380 TYPE is the data type of the argument (as a tree).
4381 This is null for libcalls where that information may
4383 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4384 the preceding args and about the function being called.
4385 NAMED is nonzero if this argument is a named parameter
4386 (otherwise it is an extra parameter matching an ellipsis). */
4389 arm_function_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4390 tree type
, int named
)
4394 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4395 a call insn (op3 of a call_value insn). */
4396 if (mode
== VOIDmode
)
4399 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4401 aapcs_layout_arg (pcum
, mode
, type
, named
);
4402 return pcum
->aapcs_reg
;
4405 /* Varargs vectors are treated the same as long long.
4406 named_count avoids having to change the way arm handles 'named' */
4407 if (TARGET_IWMMXT_ABI
4408 && arm_vector_mode_supported_p (mode
)
4409 && pcum
->named_count
> pcum
->nargs
+ 1)
4411 if (pcum
->iwmmxt_nregs
<= 9)
4412 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
4415 pcum
->can_split
= false;
4420 /* Put doubleword aligned quantities in even register pairs. */
4422 && ARM_DOUBLEWORD_ALIGN
4423 && arm_needs_doubleword_align (mode
, type
))
4426 if (mode
== VOIDmode
)
4427 /* Pick an arbitrary value for operand 2 of the call insn. */
4430 /* Only allow splitting an arg between regs and memory if all preceding
4431 args were allocated to regs. For args passed by reference we only count
4432 the reference pointer. */
4433 if (pcum
->can_split
)
4436 nregs
= ARM_NUM_REGS2 (mode
, type
);
4438 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
4441 return gen_rtx_REG (mode
, pcum
->nregs
);
4445 arm_arg_partial_bytes (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4446 tree type
, bool named
)
4448 int nregs
= pcum
->nregs
;
4450 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4452 aapcs_layout_arg (pcum
, mode
, type
, named
);
4453 return pcum
->aapcs_partial
;
4456 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
4459 if (NUM_ARG_REGS
> nregs
4460 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
4462 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
4468 arm_function_arg_advance (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
4469 tree type
, bool named
)
4471 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
4473 aapcs_layout_arg (pcum
, mode
, type
, named
);
4475 if (pcum
->aapcs_cprc_slot
>= 0)
4477 aapcs_cp_arg_layout
[pcum
->aapcs_cprc_slot
].advance (pcum
, mode
,
4479 pcum
->aapcs_cprc_slot
= -1;
4482 /* Generic stuff. */
4483 pcum
->aapcs_arg_processed
= false;
4484 pcum
->aapcs_ncrn
= pcum
->aapcs_next_ncrn
;
4485 pcum
->aapcs_reg
= NULL_RTX
;
4486 pcum
->aapcs_partial
= 0;
4491 if (arm_vector_mode_supported_p (mode
)
4492 && pcum
->named_count
> pcum
->nargs
4493 && TARGET_IWMMXT_ABI
)
4494 pcum
->iwmmxt_nregs
+= 1;
4496 pcum
->nregs
+= ARM_NUM_REGS2 (mode
, type
);
4500 /* Variable sized types are passed by reference. This is a GCC
4501 extension to the ARM ABI. */
4504 arm_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
4505 enum machine_mode mode ATTRIBUTE_UNUSED
,
4506 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4508 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4511 /* Encode the current state of the #pragma [no_]long_calls. */
4514 OFF
, /* No #pragma [no_]long_calls is in effect. */
4515 LONG
, /* #pragma long_calls is in effect. */
4516 SHORT
/* #pragma no_long_calls is in effect. */
4519 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
4522 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4524 arm_pragma_long_calls
= LONG
;
4528 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4530 arm_pragma_long_calls
= SHORT
;
4534 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
4536 arm_pragma_long_calls
= OFF
;
4539 /* Handle an attribute requiring a FUNCTION_DECL;
4540 arguments as in struct attribute_spec.handler. */
4542 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
4543 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4545 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4547 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4549 *no_add_attrs
= true;
4555 /* Handle an "interrupt" or "isr" attribute;
4556 arguments as in struct attribute_spec.handler. */
4558 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
4563 if (TREE_CODE (*node
) != FUNCTION_DECL
)
4565 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
4567 *no_add_attrs
= true;
4569 /* FIXME: the argument if any is checked for type attributes;
4570 should it be checked for decl ones? */
4574 if (TREE_CODE (*node
) == FUNCTION_TYPE
4575 || TREE_CODE (*node
) == METHOD_TYPE
)
4577 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
4579 warning (OPT_Wattributes
, "%qE attribute ignored",
4581 *no_add_attrs
= true;
4584 else if (TREE_CODE (*node
) == POINTER_TYPE
4585 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
4586 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
4587 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
4589 *node
= build_variant_type_copy (*node
);
4590 TREE_TYPE (*node
) = build_type_attribute_variant
4592 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
4593 *no_add_attrs
= true;
4597 /* Possibly pass this attribute on from the type to a decl. */
4598 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
4599 | (int) ATTR_FLAG_FUNCTION_NEXT
4600 | (int) ATTR_FLAG_ARRAY_NEXT
))
4602 *no_add_attrs
= true;
4603 return tree_cons (name
, args
, NULL_TREE
);
4607 warning (OPT_Wattributes
, "%qE attribute ignored",
4616 /* Handle a "pcs" attribute; arguments as in struct
4617 attribute_spec.handler. */
4619 arm_handle_pcs_attribute (tree
*node ATTRIBUTE_UNUSED
, tree name
, tree args
,
4620 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
4622 if (arm_pcs_from_attribute (args
) == ARM_PCS_UNKNOWN
)
4624 warning (OPT_Wattributes
, "%qE attribute ignored", name
);
4625 *no_add_attrs
= true;
4630 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
4631 /* Handle the "notshared" attribute. This attribute is another way of
4632 requesting hidden visibility. ARM's compiler supports
4633 "__declspec(notshared)"; we support the same thing via an
4637 arm_handle_notshared_attribute (tree
*node
,
4638 tree name ATTRIBUTE_UNUSED
,
4639 tree args ATTRIBUTE_UNUSED
,
4640 int flags ATTRIBUTE_UNUSED
,
4643 tree decl
= TYPE_NAME (*node
);
4647 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
4648 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
4649 *no_add_attrs
= false;
4655 /* Return 0 if the attributes for two types are incompatible, 1 if they
4656 are compatible, and 2 if they are nearly compatible (which causes a
4657 warning to be generated). */
4659 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
4663 /* Check for mismatch of non-default calling convention. */
4664 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
4667 /* Check for mismatched call attributes. */
4668 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4669 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4670 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
4671 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
4673 /* Only bother to check if an attribute is defined. */
4674 if (l1
| l2
| s1
| s2
)
4676 /* If one type has an attribute, the other must have the same attribute. */
4677 if ((l1
!= l2
) || (s1
!= s2
))
4680 /* Disallow mixed attributes. */
4681 if ((l1
& s2
) || (l2
& s1
))
4685 /* Check for mismatched ISR attribute. */
4686 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
4688 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
4689 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
4691 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
4698 /* Assigns default attributes to newly defined type. This is used to
4699 set short_call/long_call attributes for function types of
4700 functions defined inside corresponding #pragma scopes. */
4702 arm_set_default_type_attributes (tree type
)
4704 /* Add __attribute__ ((long_call)) to all functions, when
4705 inside #pragma long_calls or __attribute__ ((short_call)),
4706 when inside #pragma no_long_calls. */
4707 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
4709 tree type_attr_list
, attr_name
;
4710 type_attr_list
= TYPE_ATTRIBUTES (type
);
4712 if (arm_pragma_long_calls
== LONG
)
4713 attr_name
= get_identifier ("long_call");
4714 else if (arm_pragma_long_calls
== SHORT
)
4715 attr_name
= get_identifier ("short_call");
4719 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
4720 TYPE_ATTRIBUTES (type
) = type_attr_list
;
4724 /* Return true if DECL is known to be linked into section SECTION. */
4727 arm_function_in_section_p (tree decl
, section
*section
)
4729 /* We can only be certain about functions defined in the same
4730 compilation unit. */
4731 if (!TREE_STATIC (decl
))
4734 /* Make sure that SYMBOL always binds to the definition in this
4735 compilation unit. */
4736 if (!targetm
.binds_local_p (decl
))
4739 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
4740 if (!DECL_SECTION_NAME (decl
))
4742 /* Make sure that we will not create a unique section for DECL. */
4743 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
4747 return function_section (decl
) == section
;
4750 /* Return nonzero if a 32-bit "long_call" should be generated for
4751 a call from the current function to DECL. We generate a long_call
4754 a. has an __attribute__((long call))
4755 or b. is within the scope of a #pragma long_calls
4756 or c. the -mlong-calls command line switch has been specified
4758 However we do not generate a long call if the function:
4760 d. has an __attribute__ ((short_call))
4761 or e. is inside the scope of a #pragma no_long_calls
4762 or f. is defined in the same section as the current function. */
4765 arm_is_long_call_p (tree decl
)
4770 return TARGET_LONG_CALLS
;
4772 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
4773 if (lookup_attribute ("short_call", attrs
))
4776 /* For "f", be conservative, and only cater for cases in which the
4777 whole of the current function is placed in the same section. */
4778 if (!flag_reorder_blocks_and_partition
4779 && TREE_CODE (decl
) == FUNCTION_DECL
4780 && arm_function_in_section_p (decl
, current_function_section ()))
4783 if (lookup_attribute ("long_call", attrs
))
4786 return TARGET_LONG_CALLS
;
4789 /* Return nonzero if it is ok to make a tail-call to DECL. */
4791 arm_function_ok_for_sibcall (tree decl
, tree exp
)
4793 unsigned long func_type
;
4795 if (cfun
->machine
->sibcall_blocked
)
4798 /* Never tailcall something for which we have no decl, or if we
4799 are in Thumb mode. */
4800 if (decl
== NULL
|| TARGET_THUMB
)
4803 /* The PIC register is live on entry to VxWorks PLT entries, so we
4804 must make the call before restoring the PIC register. */
4805 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
4808 /* Cannot tail-call to long calls, since these are out of range of
4809 a branch instruction. */
4810 if (arm_is_long_call_p (decl
))
4813 /* If we are interworking and the function is not declared static
4814 then we can't tail-call it unless we know that it exists in this
4815 compilation unit (since it might be a Thumb routine). */
4816 if (TARGET_INTERWORK
&& TREE_PUBLIC (decl
) && !TREE_ASM_WRITTEN (decl
))
4819 func_type
= arm_current_func_type ();
4820 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
4821 if (IS_INTERRUPT (func_type
))
4824 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun
->decl
))))
4826 /* Check that the return value locations are the same. For
4827 example that we aren't returning a value from the sibling in
4828 a VFP register but then need to transfer it to a core
4832 a
= arm_function_value (TREE_TYPE (exp
), decl
, false);
4833 b
= arm_function_value (TREE_TYPE (DECL_RESULT (cfun
->decl
)),
4835 if (!rtx_equal_p (a
, b
))
4839 /* Never tailcall if function may be called with a misaligned SP. */
4840 if (IS_STACKALIGN (func_type
))
4843 /* Everything else is ok. */
4848 /* Addressing mode support functions. */
4850 /* Return nonzero if X is a legitimate immediate operand when compiling
4851 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
4853 legitimate_pic_operand_p (rtx x
)
4855 if (GET_CODE (x
) == SYMBOL_REF
4856 || (GET_CODE (x
) == CONST
4857 && GET_CODE (XEXP (x
, 0)) == PLUS
4858 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
4864 /* Record that the current function needs a PIC register. Initialize
4865 cfun->machine->pic_reg if we have not already done so. */
4868 require_pic_register (void)
4870 /* A lot of the logic here is made obscure by the fact that this
4871 routine gets called as part of the rtx cost estimation process.
4872 We don't want those calls to affect any assumptions about the real
4873 function; and further, we can't call entry_of_function() until we
4874 start the real expansion process. */
4875 if (!crtl
->uses_pic_offset_table
)
4877 gcc_assert (can_create_pseudo_p ());
4878 if (arm_pic_register
!= INVALID_REGNUM
)
4880 if (!cfun
->machine
->pic_reg
)
4881 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
4883 /* Play games to avoid marking the function as needing pic
4884 if we are being called as part of the cost-estimation
4886 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
4887 crtl
->uses_pic_offset_table
= 1;
4893 if (!cfun
->machine
->pic_reg
)
4894 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
4896 /* Play games to avoid marking the function as needing pic
4897 if we are being called as part of the cost-estimation
4899 if (current_ir_type () != IR_GIMPLE
|| currently_expanding_to_rtl
)
4901 crtl
->uses_pic_offset_table
= 1;
4904 arm_load_pic_register (0UL);
4908 /* We can be called during expansion of PHI nodes, where
4909 we can't yet emit instructions directly in the final
4910 insn stream. Queue the insns on the entry edge, they will
4911 be committed after everything else is expanded. */
4912 insert_insn_on_edge (seq
, single_succ_edge (ENTRY_BLOCK_PTR
));
4919 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
4921 if (GET_CODE (orig
) == SYMBOL_REF
4922 || GET_CODE (orig
) == LABEL_REF
)
4924 rtx pic_ref
, address
;
4929 gcc_assert (can_create_pseudo_p ());
4930 reg
= gen_reg_rtx (Pmode
);
4931 address
= gen_reg_rtx (Pmode
);
4936 /* VxWorks does not impose a fixed gap between segments; the run-time
4937 gap can be different from the object-file gap. We therefore can't
4938 use GOTOFF unless we are absolutely sure that the symbol is in the
4939 same segment as the GOT. Unfortunately, the flexibility of linker
4940 scripts means that we can't be sure of that in general, so assume
4941 that GOTOFF is never valid on VxWorks. */
4942 if ((GET_CODE (orig
) == LABEL_REF
4943 || (GET_CODE (orig
) == SYMBOL_REF
&&
4944 SYMBOL_REF_LOCAL_P (orig
)))
4946 && !TARGET_VXWORKS_RTP
)
4947 insn
= arm_pic_static_addr (orig
, reg
);
4950 /* If this function doesn't have a pic register, create one now. */
4951 require_pic_register ();
4954 emit_insn (gen_pic_load_addr_32bit (address
, orig
));
4955 else /* TARGET_THUMB1 */
4956 emit_insn (gen_pic_load_addr_thumb1 (address
, orig
));
4958 pic_ref
= gen_const_mem (Pmode
,
4959 gen_rtx_PLUS (Pmode
, cfun
->machine
->pic_reg
,
4961 insn
= emit_move_insn (reg
, pic_ref
);
4964 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4966 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
4970 else if (GET_CODE (orig
) == CONST
)
4974 if (GET_CODE (XEXP (orig
, 0)) == PLUS
4975 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
4978 /* Handle the case where we have: const (UNSPEC_TLS). */
4979 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
4980 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
4983 /* Handle the case where we have:
4984 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
4986 if (GET_CODE (XEXP (orig
, 0)) == PLUS
4987 && GET_CODE (XEXP (XEXP (orig
, 0), 0)) == UNSPEC
4988 && XINT (XEXP (XEXP (orig
, 0), 0), 1) == UNSPEC_TLS
)
4990 gcc_assert (GET_CODE (XEXP (XEXP (orig
, 0), 1)) == CONST_INT
);
4996 gcc_assert (can_create_pseudo_p ());
4997 reg
= gen_reg_rtx (Pmode
);
5000 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
5002 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
5003 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
5004 base
== reg
? 0 : reg
);
5006 if (GET_CODE (offset
) == CONST_INT
)
5008 /* The base register doesn't really matter, we only want to
5009 test the index for the appropriate mode. */
5010 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
5012 gcc_assert (can_create_pseudo_p ());
5013 offset
= force_reg (Pmode
, offset
);
5016 if (GET_CODE (offset
) == CONST_INT
)
5017 return plus_constant (base
, INTVAL (offset
));
5020 if (GET_MODE_SIZE (mode
) > 4
5021 && (GET_MODE_CLASS (mode
) == MODE_INT
5022 || TARGET_SOFT_FLOAT
))
5024 emit_insn (gen_addsi3 (reg
, base
, offset
));
5028 return gen_rtx_PLUS (Pmode
, base
, offset
);
5035 /* Find a spare register to use during the prolog of a function. */
5038 thumb_find_work_register (unsigned long pushed_regs_mask
)
5042 /* Check the argument registers first as these are call-used. The
5043 register allocation order means that sometimes r3 might be used
5044 but earlier argument registers might not, so check them all. */
5045 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
5046 if (!df_regs_ever_live_p (reg
))
5049 /* Before going on to check the call-saved registers we can try a couple
5050 more ways of deducing that r3 is available. The first is when we are
5051 pushing anonymous arguments onto the stack and we have less than 4
5052 registers worth of fixed arguments(*). In this case r3 will be part of
5053 the variable argument list and so we can be sure that it will be
5054 pushed right at the start of the function. Hence it will be available
5055 for the rest of the prologue.
5056 (*): ie crtl->args.pretend_args_size is greater than 0. */
5057 if (cfun
->machine
->uses_anonymous_args
5058 && crtl
->args
.pretend_args_size
> 0)
5059 return LAST_ARG_REGNUM
;
5061 /* The other case is when we have fixed arguments but less than 4 registers
5062 worth. In this case r3 might be used in the body of the function, but
5063 it is not being used to convey an argument into the function. In theory
5064 we could just check crtl->args.size to see how many bytes are
5065 being passed in argument registers, but it seems that it is unreliable.
5066 Sometimes it will have the value 0 when in fact arguments are being
5067 passed. (See testcase execute/20021111-1.c for an example). So we also
5068 check the args_info.nregs field as well. The problem with this field is
5069 that it makes no allowances for arguments that are passed to the
5070 function but which are not used. Hence we could miss an opportunity
5071 when a function has an unused argument in r3. But it is better to be
5072 safe than to be sorry. */
5073 if (! cfun
->machine
->uses_anonymous_args
5074 && crtl
->args
.size
>= 0
5075 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
5076 && crtl
->args
.info
.nregs
< 4)
5077 return LAST_ARG_REGNUM
;
5079 /* Otherwise look for a call-saved register that is going to be pushed. */
5080 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
5081 if (pushed_regs_mask
& (1 << reg
))
5086 /* Thumb-2 can use high regs. */
5087 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
5088 if (pushed_regs_mask
& (1 << reg
))
5091 /* Something went wrong - thumb_compute_save_reg_mask()
5092 should have arranged for a suitable register to be pushed. */
5096 static GTY(()) int pic_labelno
;
5098 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
5102 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
5104 rtx l1
, labelno
, pic_tmp
, pic_rtx
, pic_reg
;
5106 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
5109 gcc_assert (flag_pic
);
5111 pic_reg
= cfun
->machine
->pic_reg
;
5112 if (TARGET_VXWORKS_RTP
)
5114 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
5115 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5116 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5118 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
5120 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
5121 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
5125 /* We use an UNSPEC rather than a LABEL_REF because this label
5126 never appears in the code stream. */
5128 labelno
= GEN_INT (pic_labelno
++);
5129 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5130 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5132 /* On the ARM the PC register contains 'dot + 8' at the time of the
5133 addition, on the Thumb it is 'dot + 4'. */
5134 pic_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5135 pic_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, pic_rtx
),
5137 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
5141 emit_insn (gen_pic_load_addr_32bit (pic_reg
, pic_rtx
));
5143 emit_insn (gen_pic_add_dot_plus_eight (pic_reg
, pic_reg
, labelno
));
5145 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5147 else /* TARGET_THUMB1 */
5149 if (arm_pic_register
!= INVALID_REGNUM
5150 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
5152 /* We will have pushed the pic register, so we should always be
5153 able to find a work register. */
5154 pic_tmp
= gen_rtx_REG (SImode
,
5155 thumb_find_work_register (saved_regs
));
5156 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
5157 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
5160 emit_insn (gen_pic_load_addr_thumb1 (pic_reg
, pic_rtx
));
5161 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
5165 /* Need to emit this whether or not we obey regdecls,
5166 since setjmp/longjmp can cause life info to screw up. */
5170 /* Generate code to load the address of a static var when flag_pic is set. */
5172 arm_pic_static_addr (rtx orig
, rtx reg
)
5174 rtx l1
, labelno
, offset_rtx
, insn
;
5176 gcc_assert (flag_pic
);
5178 /* We use an UNSPEC rather than a LABEL_REF because this label
5179 never appears in the code stream. */
5180 labelno
= GEN_INT (pic_labelno
++);
5181 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5182 l1
= gen_rtx_CONST (VOIDmode
, l1
);
5184 /* On the ARM the PC register contains 'dot + 8' at the time of the
5185 addition, on the Thumb it is 'dot + 4'. */
5186 offset_rtx
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
5187 offset_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, orig
, offset_rtx
),
5188 UNSPEC_SYMBOL_OFFSET
);
5189 offset_rtx
= gen_rtx_CONST (Pmode
, offset_rtx
);
5193 emit_insn (gen_pic_load_addr_32bit (reg
, offset_rtx
));
5195 insn
= emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5197 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5199 else /* TARGET_THUMB1 */
5201 emit_insn (gen_pic_load_addr_thumb1 (reg
, offset_rtx
));
5202 insn
= emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5208 /* Return nonzero if X is valid as an ARM state addressing register. */
5210 arm_address_register_rtx_p (rtx x
, int strict_p
)
5214 if (GET_CODE (x
) != REG
)
5220 return ARM_REGNO_OK_FOR_BASE_P (regno
);
5222 return (regno
<= LAST_ARM_REGNUM
5223 || regno
>= FIRST_PSEUDO_REGISTER
5224 || regno
== FRAME_POINTER_REGNUM
5225 || regno
== ARG_POINTER_REGNUM
);
5228 /* Return TRUE if this rtx is the difference of a symbol and a label,
5229 and will reduce to a PC-relative relocation in the object file.
5230 Expressions like this can be left alone when generating PIC, rather
5231 than forced through the GOT. */
5233 pcrel_constant_p (rtx x
)
5235 if (GET_CODE (x
) == MINUS
)
5236 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
5241 /* Return nonzero if X is a valid ARM state address operand. */
5243 arm_legitimate_address_outer_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
5247 enum rtx_code code
= GET_CODE (x
);
5249 if (arm_address_register_rtx_p (x
, strict_p
))
5252 use_ldrd
= (TARGET_LDRD
5254 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5256 if (code
== POST_INC
|| code
== PRE_DEC
5257 || ((code
== PRE_INC
|| code
== POST_DEC
)
5258 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5259 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5261 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5262 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5263 && GET_CODE (XEXP (x
, 1)) == PLUS
5264 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5266 rtx addend
= XEXP (XEXP (x
, 1), 1);
5268 /* Don't allow ldrd post increment by register because it's hard
5269 to fixup invalid register choices. */
5271 && GET_CODE (x
) == POST_MODIFY
5272 && GET_CODE (addend
) == REG
)
5275 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
5276 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
5279 /* After reload constants split into minipools will have addresses
5280 from a LABEL_REF. */
5281 else if (reload_completed
5282 && (code
== LABEL_REF
5284 && GET_CODE (XEXP (x
, 0)) == PLUS
5285 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5286 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5289 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5292 else if (code
== PLUS
)
5294 rtx xop0
= XEXP (x
, 0);
5295 rtx xop1
= XEXP (x
, 1);
5297 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5298 && GET_CODE(xop1
) == CONST_INT
5299 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
5300 || (arm_address_register_rtx_p (xop1
, strict_p
)
5301 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
5305 /* Reload currently can't handle MINUS, so disable this for now */
5306 else if (GET_CODE (x
) == MINUS
)
5308 rtx xop0
= XEXP (x
, 0);
5309 rtx xop1
= XEXP (x
, 1);
5311 return (arm_address_register_rtx_p (xop0
, strict_p
)
5312 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
5316 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5317 && code
== SYMBOL_REF
5318 && CONSTANT_POOL_ADDRESS_P (x
)
5320 && symbol_mentioned_p (get_pool_constant (x
))
5321 && ! pcrel_constant_p (get_pool_constant (x
))))
5327 /* Return nonzero if X is a valid Thumb-2 address operand. */
5329 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5332 enum rtx_code code
= GET_CODE (x
);
5334 if (arm_address_register_rtx_p (x
, strict_p
))
5337 use_ldrd
= (TARGET_LDRD
5339 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
5341 if (code
== POST_INC
|| code
== PRE_DEC
5342 || ((code
== PRE_INC
|| code
== POST_DEC
)
5343 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
5344 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
5346 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
5347 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
5348 && GET_CODE (XEXP (x
, 1)) == PLUS
5349 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
5351 /* Thumb-2 only has autoincrement by constant. */
5352 rtx addend
= XEXP (XEXP (x
, 1), 1);
5353 HOST_WIDE_INT offset
;
5355 if (GET_CODE (addend
) != CONST_INT
)
5358 offset
= INTVAL(addend
);
5359 if (GET_MODE_SIZE (mode
) <= 4)
5360 return (offset
> -256 && offset
< 256);
5362 return (use_ldrd
&& offset
> -1024 && offset
< 1024
5363 && (offset
& 3) == 0);
5366 /* After reload constants split into minipools will have addresses
5367 from a LABEL_REF. */
5368 else if (reload_completed
5369 && (code
== LABEL_REF
5371 && GET_CODE (XEXP (x
, 0)) == PLUS
5372 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5373 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5376 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
5379 else if (code
== PLUS
)
5381 rtx xop0
= XEXP (x
, 0);
5382 rtx xop1
= XEXP (x
, 1);
5384 return ((arm_address_register_rtx_p (xop0
, strict_p
)
5385 && thumb2_legitimate_index_p (mode
, xop1
, strict_p
))
5386 || (arm_address_register_rtx_p (xop1
, strict_p
)
5387 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
5390 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5391 && code
== SYMBOL_REF
5392 && CONSTANT_POOL_ADDRESS_P (x
)
5394 && symbol_mentioned_p (get_pool_constant (x
))
5395 && ! pcrel_constant_p (get_pool_constant (x
))))
5401 /* Return nonzero if INDEX is valid for an address index operand in
5404 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
5407 HOST_WIDE_INT range
;
5408 enum rtx_code code
= GET_CODE (index
);
5410 /* Standard coprocessor addressing modes. */
5411 if (TARGET_HARD_FLOAT
5412 && (TARGET_FPA
|| TARGET_MAVERICK
)
5413 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
5414 || (TARGET_MAVERICK
&& mode
== DImode
)))
5415 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5416 && INTVAL (index
) > -1024
5417 && (INTVAL (index
) & 3) == 0);
5420 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
5421 return (code
== CONST_INT
5422 && INTVAL (index
) < 1016
5423 && INTVAL (index
) > -1024
5424 && (INTVAL (index
) & 3) == 0);
5426 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5427 return (code
== CONST_INT
5428 && INTVAL (index
) < 1024
5429 && INTVAL (index
) > -1024
5430 && (INTVAL (index
) & 3) == 0);
5432 if (arm_address_register_rtx_p (index
, strict_p
)
5433 && (GET_MODE_SIZE (mode
) <= 4))
5436 if (mode
== DImode
|| mode
== DFmode
)
5438 if (code
== CONST_INT
)
5440 HOST_WIDE_INT val
= INTVAL (index
);
5443 return val
> -256 && val
< 256;
5445 return val
> -4096 && val
< 4092;
5448 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
5451 if (GET_MODE_SIZE (mode
) <= 4
5455 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
5459 rtx xiop0
= XEXP (index
, 0);
5460 rtx xiop1
= XEXP (index
, 1);
5462 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5463 && power_of_two_operand (xiop1
, SImode
))
5464 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5465 && power_of_two_operand (xiop0
, SImode
)));
5467 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
5468 || code
== ASHIFT
|| code
== ROTATERT
)
5470 rtx op
= XEXP (index
, 1);
5472 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5473 && GET_CODE (op
) == CONST_INT
5475 && INTVAL (op
) <= 31);
5479 /* For ARM v4 we may be doing a sign-extend operation during the
5485 || (outer
== SIGN_EXTEND
&& mode
== QImode
))
5491 range
= (mode
== HImode
|| mode
== HFmode
) ? 4095 : 4096;
5493 return (code
== CONST_INT
5494 && INTVAL (index
) < range
5495 && INTVAL (index
) > -range
);
5498 /* Return true if OP is a valid index scaling factor for Thumb-2 address
5499 index operand. i.e. 1, 2, 4 or 8. */
5501 thumb2_index_mul_operand (rtx op
)
5505 if (GET_CODE(op
) != CONST_INT
)
5509 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
5512 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
5514 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
5516 enum rtx_code code
= GET_CODE (index
);
5518 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5519 /* Standard coprocessor addressing modes. */
5520 if (TARGET_HARD_FLOAT
5521 && (TARGET_FPA
|| TARGET_MAVERICK
)
5522 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
5523 || (TARGET_MAVERICK
&& mode
== DImode
)))
5524 return (code
== CONST_INT
&& INTVAL (index
) < 1024
5525 && INTVAL (index
) > -1024
5526 && (INTVAL (index
) & 3) == 0);
5528 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
5530 /* For DImode assume values will usually live in core regs
5531 and only allow LDRD addressing modes. */
5532 if (!TARGET_LDRD
|| mode
!= DImode
)
5533 return (code
== CONST_INT
5534 && INTVAL (index
) < 1024
5535 && INTVAL (index
) > -1024
5536 && (INTVAL (index
) & 3) == 0);
5540 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
5541 return (code
== CONST_INT
5542 && INTVAL (index
) < 1016
5543 && INTVAL (index
) > -1024
5544 && (INTVAL (index
) & 3) == 0);
5546 if (arm_address_register_rtx_p (index
, strict_p
)
5547 && (GET_MODE_SIZE (mode
) <= 4))
5550 if (mode
== DImode
|| mode
== DFmode
)
5552 if (code
== CONST_INT
)
5554 HOST_WIDE_INT val
= INTVAL (index
);
5555 /* ??? Can we assume ldrd for thumb2? */
5556 /* Thumb-2 ldrd only has reg+const addressing modes. */
5557 /* ldrd supports offsets of +-1020.
5558 However the ldr fallback does not. */
5559 return val
> -256 && val
< 256 && (val
& 3) == 0;
5567 rtx xiop0
= XEXP (index
, 0);
5568 rtx xiop1
= XEXP (index
, 1);
5570 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
5571 && thumb2_index_mul_operand (xiop1
))
5572 || (arm_address_register_rtx_p (xiop1
, strict_p
)
5573 && thumb2_index_mul_operand (xiop0
)));
5575 else if (code
== ASHIFT
)
5577 rtx op
= XEXP (index
, 1);
5579 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
5580 && GET_CODE (op
) == CONST_INT
5582 && INTVAL (op
) <= 3);
5585 return (code
== CONST_INT
5586 && INTVAL (index
) < 4096
5587 && INTVAL (index
) > -256);
5590 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
5592 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
5596 if (GET_CODE (x
) != REG
)
5602 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
5604 return (regno
<= LAST_LO_REGNUM
5605 || regno
> LAST_VIRTUAL_REGISTER
5606 || regno
== FRAME_POINTER_REGNUM
5607 || (GET_MODE_SIZE (mode
) >= 4
5608 && (regno
== STACK_POINTER_REGNUM
5609 || regno
>= FIRST_PSEUDO_REGISTER
5610 || x
== hard_frame_pointer_rtx
5611 || x
== arg_pointer_rtx
)));
5614 /* Return nonzero if x is a legitimate index register. This is the case
5615 for any base register that can access a QImode object. */
5617 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
5619 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
5622 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
5624 The AP may be eliminated to either the SP or the FP, so we use the
5625 least common denominator, e.g. SImode, and offsets from 0 to 64.
5627 ??? Verify whether the above is the right approach.
5629 ??? Also, the FP may be eliminated to the SP, so perhaps that
5630 needs special handling also.
5632 ??? Look at how the mips16 port solves this problem. It probably uses
5633 better ways to solve some of these problems.
5635 Although it is not incorrect, we don't accept QImode and HImode
5636 addresses based on the frame pointer or arg pointer until the
5637 reload pass starts. This is so that eliminating such addresses
5638 into stack based ones won't produce impossible code. */
5640 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
5642 /* ??? Not clear if this is right. Experiment. */
5643 if (GET_MODE_SIZE (mode
) < 4
5644 && !(reload_in_progress
|| reload_completed
)
5645 && (reg_mentioned_p (frame_pointer_rtx
, x
)
5646 || reg_mentioned_p (arg_pointer_rtx
, x
)
5647 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
5648 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
5649 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
5650 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
5653 /* Accept any base register. SP only in SImode or larger. */
5654 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
5657 /* This is PC relative data before arm_reorg runs. */
5658 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
5659 && GET_CODE (x
) == SYMBOL_REF
5660 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
5663 /* This is PC relative data after arm_reorg runs. */
5664 else if ((GET_MODE_SIZE (mode
) >= 4 || mode
== HFmode
)
5666 && (GET_CODE (x
) == LABEL_REF
5667 || (GET_CODE (x
) == CONST
5668 && GET_CODE (XEXP (x
, 0)) == PLUS
5669 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
5670 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
5673 /* Post-inc indexing only supported for SImode and larger. */
5674 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
5675 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
5678 else if (GET_CODE (x
) == PLUS
)
5680 /* REG+REG address can be any two index registers. */
5681 /* We disallow FRAME+REG addressing since we know that FRAME
5682 will be replaced with STACK, and SP relative addressing only
5683 permits SP+OFFSET. */
5684 if (GET_MODE_SIZE (mode
) <= 4
5685 && XEXP (x
, 0) != frame_pointer_rtx
5686 && XEXP (x
, 1) != frame_pointer_rtx
5687 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5688 && thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
))
5691 /* REG+const has 5-7 bit offset for non-SP registers. */
5692 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
5693 || XEXP (x
, 0) == arg_pointer_rtx
)
5694 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5695 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
5698 /* REG+const has 10-bit offset for SP, but only SImode and
5699 larger is supported. */
5700 /* ??? Should probably check for DI/DFmode overflow here
5701 just like GO_IF_LEGITIMATE_OFFSET does. */
5702 else if (GET_CODE (XEXP (x
, 0)) == REG
5703 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
5704 && GET_MODE_SIZE (mode
) >= 4
5705 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5706 && INTVAL (XEXP (x
, 1)) >= 0
5707 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
5708 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5711 else if (GET_CODE (XEXP (x
, 0)) == REG
5712 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
5713 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
5714 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
5715 && REGNO (XEXP (x
, 0)) <= LAST_VIRTUAL_REGISTER
))
5716 && GET_MODE_SIZE (mode
) >= 4
5717 && GET_CODE (XEXP (x
, 1)) == CONST_INT
5718 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
5722 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
5723 && GET_MODE_SIZE (mode
) == 4
5724 && GET_CODE (x
) == SYMBOL_REF
5725 && CONSTANT_POOL_ADDRESS_P (x
)
5727 && symbol_mentioned_p (get_pool_constant (x
))
5728 && ! pcrel_constant_p (get_pool_constant (x
))))
5734 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
5735 instruction of mode MODE. */
5737 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
5739 switch (GET_MODE_SIZE (mode
))
5742 return val
>= 0 && val
< 32;
5745 return val
>= 0 && val
< 64 && (val
& 1) == 0;
5749 && (val
+ GET_MODE_SIZE (mode
)) <= 128
5755 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, bool strict_p
)
5758 return arm_legitimate_address_outer_p (mode
, x
, SET
, strict_p
);
5759 else if (TARGET_THUMB2
)
5760 return thumb2_legitimate_address_p (mode
, x
, strict_p
);
5761 else /* if (TARGET_THUMB1) */
5762 return thumb1_legitimate_address_p (mode
, x
, strict_p
);
5765 /* Build the SYMBOL_REF for __tls_get_addr. */
5767 static GTY(()) rtx tls_get_addr_libfunc
;
5770 get_tls_get_addr (void)
5772 if (!tls_get_addr_libfunc
)
5773 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
5774 return tls_get_addr_libfunc
;
5778 arm_load_tp (rtx target
)
5781 target
= gen_reg_rtx (SImode
);
5785 /* Can return in any reg. */
5786 emit_insn (gen_load_tp_hard (target
));
5790 /* Always returned in r0. Immediately copy the result into a pseudo,
5791 otherwise other uses of r0 (e.g. setting up function arguments) may
5792 clobber the value. */
5796 emit_insn (gen_load_tp_soft ());
5798 tmp
= gen_rtx_REG (SImode
, 0);
5799 emit_move_insn (target
, tmp
);
5805 load_tls_operand (rtx x
, rtx reg
)
5809 if (reg
== NULL_RTX
)
5810 reg
= gen_reg_rtx (SImode
);
5812 tmp
= gen_rtx_CONST (SImode
, x
);
5814 emit_move_insn (reg
, tmp
);
5820 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
5822 rtx insns
, label
, labelno
, sum
;
5826 labelno
= GEN_INT (pic_labelno
++);
5827 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5828 label
= gen_rtx_CONST (VOIDmode
, label
);
5830 sum
= gen_rtx_UNSPEC (Pmode
,
5831 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
5832 GEN_INT (TARGET_ARM
? 8 : 4)),
5834 reg
= load_tls_operand (sum
, reg
);
5837 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
5838 else if (TARGET_THUMB2
)
5839 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5840 else /* TARGET_THUMB1 */
5841 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5843 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
, LCT_PURE
, /* LCT_CONST? */
5844 Pmode
, 1, reg
, Pmode
);
5846 insns
= get_insns ();
5853 legitimize_tls_address (rtx x
, rtx reg
)
5855 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
5856 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
5860 case TLS_MODEL_GLOBAL_DYNAMIC
:
5861 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
5862 dest
= gen_reg_rtx (Pmode
);
5863 emit_libcall_block (insns
, dest
, ret
, x
);
5866 case TLS_MODEL_LOCAL_DYNAMIC
:
5867 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
5869 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
5870 share the LDM result with other LD model accesses. */
5871 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
5873 dest
= gen_reg_rtx (Pmode
);
5874 emit_libcall_block (insns
, dest
, ret
, eqv
);
5876 /* Load the addend. */
5877 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
, GEN_INT (TLS_LDO32
)),
5879 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
5880 return gen_rtx_PLUS (Pmode
, dest
, addend
);
5882 case TLS_MODEL_INITIAL_EXEC
:
5883 labelno
= GEN_INT (pic_labelno
++);
5884 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
5885 label
= gen_rtx_CONST (VOIDmode
, label
);
5886 sum
= gen_rtx_UNSPEC (Pmode
,
5887 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
5888 GEN_INT (TARGET_ARM
? 8 : 4)),
5890 reg
= load_tls_operand (sum
, reg
);
5893 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
5894 else if (TARGET_THUMB2
)
5895 emit_insn (gen_tls_load_dot_plus_four (reg
, NULL
, reg
, labelno
));
5898 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
5899 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
5902 tp
= arm_load_tp (NULL_RTX
);
5904 return gen_rtx_PLUS (Pmode
, tp
, reg
);
5906 case TLS_MODEL_LOCAL_EXEC
:
5907 tp
= arm_load_tp (NULL_RTX
);
5909 reg
= gen_rtx_UNSPEC (Pmode
,
5910 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
5912 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
5914 return gen_rtx_PLUS (Pmode
, tp
, reg
);
5921 /* Try machine-dependent ways of modifying an illegitimate address
5922 to be legitimate. If we find one, return the new, valid address. */
5924 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
5928 /* TODO: legitimize_address for Thumb2. */
5931 return thumb_legitimize_address (x
, orig_x
, mode
);
5934 if (arm_tls_symbol_p (x
))
5935 return legitimize_tls_address (x
, NULL_RTX
);
5937 if (GET_CODE (x
) == PLUS
)
5939 rtx xop0
= XEXP (x
, 0);
5940 rtx xop1
= XEXP (x
, 1);
5942 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
5943 xop0
= force_reg (SImode
, xop0
);
5945 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
5946 xop1
= force_reg (SImode
, xop1
);
5948 if (ARM_BASE_REGISTER_RTX_P (xop0
)
5949 && GET_CODE (xop1
) == CONST_INT
)
5951 HOST_WIDE_INT n
, low_n
;
5955 /* VFP addressing modes actually allow greater offsets, but for
5956 now we just stick with the lowest common denominator. */
5958 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
5970 low_n
= ((mode
) == TImode
? 0
5971 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
5975 base_reg
= gen_reg_rtx (SImode
);
5976 val
= force_operand (plus_constant (xop0
, n
), NULL_RTX
);
5977 emit_move_insn (base_reg
, val
);
5978 x
= plus_constant (base_reg
, low_n
);
5980 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
5981 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
5984 /* XXX We don't allow MINUS any more -- see comment in
5985 arm_legitimate_address_outer_p (). */
5986 else if (GET_CODE (x
) == MINUS
)
5988 rtx xop0
= XEXP (x
, 0);
5989 rtx xop1
= XEXP (x
, 1);
5991 if (CONSTANT_P (xop0
))
5992 xop0
= force_reg (SImode
, xop0
);
5994 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
5995 xop1
= force_reg (SImode
, xop1
);
5997 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
5998 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
6001 /* Make sure to take full advantage of the pre-indexed addressing mode
6002 with absolute addresses which often allows for the base register to
6003 be factorized for multiple adjacent memory references, and it might
6004 even allows for the mini pool to be avoided entirely. */
6005 else if (GET_CODE (x
) == CONST_INT
&& optimize
> 0)
6008 HOST_WIDE_INT mask
, base
, index
;
6011 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
6012 use a 8-bit index. So let's use a 12-bit index for SImode only and
6013 hope that arm_gen_constant will enable ldrb to use more bits. */
6014 bits
= (mode
== SImode
) ? 12 : 8;
6015 mask
= (1 << bits
) - 1;
6016 base
= INTVAL (x
) & ~mask
;
6017 index
= INTVAL (x
) & mask
;
6018 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
6020 /* It'll most probably be more efficient to generate the base
6021 with more bits set and use a negative index instead. */
6025 base_reg
= force_reg (SImode
, GEN_INT (base
));
6026 x
= plus_constant (base_reg
, index
);
6031 /* We need to find and carefully transform any SYMBOL and LABEL
6032 references; so go back to the original address expression. */
6033 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6035 if (new_x
!= orig_x
)
6043 /* Try machine-dependent ways of modifying an illegitimate Thumb address
6044 to be legitimate. If we find one, return the new, valid address. */
6046 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
6048 if (arm_tls_symbol_p (x
))
6049 return legitimize_tls_address (x
, NULL_RTX
);
6051 if (GET_CODE (x
) == PLUS
6052 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6053 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
6054 || INTVAL (XEXP (x
, 1)) < 0))
6056 rtx xop0
= XEXP (x
, 0);
6057 rtx xop1
= XEXP (x
, 1);
6058 HOST_WIDE_INT offset
= INTVAL (xop1
);
6060 /* Try and fold the offset into a biasing of the base register and
6061 then offsetting that. Don't do this when optimizing for space
6062 since it can cause too many CSEs. */
6063 if (optimize_size
&& offset
>= 0
6064 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
6066 HOST_WIDE_INT delta
;
6069 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
6070 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
6071 delta
= 31 * GET_MODE_SIZE (mode
);
6073 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
6075 xop0
= force_operand (plus_constant (xop0
, offset
- delta
),
6077 x
= plus_constant (xop0
, delta
);
6079 else if (offset
< 0 && offset
> -256)
6080 /* Small negative offsets are best done with a subtract before the
6081 dereference, forcing these into a register normally takes two
6083 x
= force_operand (x
, NULL_RTX
);
6086 /* For the remaining cases, force the constant into a register. */
6087 xop1
= force_reg (SImode
, xop1
);
6088 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
6091 else if (GET_CODE (x
) == PLUS
6092 && s_register_operand (XEXP (x
, 1), SImode
)
6093 && !s_register_operand (XEXP (x
, 0), SImode
))
6095 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
6097 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
6102 /* We need to find and carefully transform any SYMBOL and LABEL
6103 references; so go back to the original address expression. */
6104 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
6106 if (new_x
!= orig_x
)
6114 thumb_legitimize_reload_address (rtx
*x_p
,
6115 enum machine_mode mode
,
6116 int opnum
, int type
,
6117 int ind_levels ATTRIBUTE_UNUSED
)
6121 if (GET_CODE (x
) == PLUS
6122 && GET_MODE_SIZE (mode
) < 4
6123 && REG_P (XEXP (x
, 0))
6124 && XEXP (x
, 0) == stack_pointer_rtx
6125 && GET_CODE (XEXP (x
, 1)) == CONST_INT
6126 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
6131 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6132 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6136 /* If both registers are hi-regs, then it's better to reload the
6137 entire expression rather than each register individually. That
6138 only requires one reload register rather than two. */
6139 if (GET_CODE (x
) == PLUS
6140 && REG_P (XEXP (x
, 0))
6141 && REG_P (XEXP (x
, 1))
6142 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
6143 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
6148 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
6149 Pmode
, VOIDmode
, 0, 0, opnum
, (enum reload_type
) type
);
6156 /* Test for various thread-local symbols. */
6158 /* Return TRUE if X is a thread-local symbol. */
6161 arm_tls_symbol_p (rtx x
)
6163 if (! TARGET_HAVE_TLS
)
6166 if (GET_CODE (x
) != SYMBOL_REF
)
6169 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6172 /* Helper for arm_tls_referenced_p. */
6175 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
6177 if (GET_CODE (*x
) == SYMBOL_REF
)
6178 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
6180 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
6181 TLS offsets, not real symbol references. */
6182 if (GET_CODE (*x
) == UNSPEC
6183 && XINT (*x
, 1) == UNSPEC_TLS
)
6189 /* Return TRUE if X contains any TLS symbol references. */
6192 arm_tls_referenced_p (rtx x
)
6194 if (! TARGET_HAVE_TLS
)
6197 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
6200 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
6203 arm_cannot_force_const_mem (rtx x
)
6207 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
6209 split_const (x
, &base
, &offset
);
6210 if (GET_CODE (base
) == SYMBOL_REF
6211 && !offset_within_block_p (base
, INTVAL (offset
)))
6214 return arm_tls_referenced_p (x
);
6217 #define REG_OR_SUBREG_REG(X) \
6218 (GET_CODE (X) == REG \
6219 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6221 #define REG_OR_SUBREG_RTX(X) \
6222 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6224 #ifndef COSTS_N_INSNS
6225 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6228 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
6230 enum machine_mode mode
= GET_MODE (x
);
6243 return COSTS_N_INSNS (1);
6246 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6249 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
6256 return COSTS_N_INSNS (2) + cycles
;
6258 return COSTS_N_INSNS (1) + 16;
6261 return (COSTS_N_INSNS (1)
6262 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
6263 + GET_CODE (SET_DEST (x
)) == MEM
));
6268 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
6270 if (thumb_shiftable_const (INTVAL (x
)))
6271 return COSTS_N_INSNS (2);
6272 return COSTS_N_INSNS (3);
6274 else if ((outer
== PLUS
|| outer
== COMPARE
)
6275 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
6277 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
6278 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
6279 return COSTS_N_INSNS (1);
6280 else if (outer
== AND
)
6283 /* This duplicates the tests in the andsi3 expander. */
6284 for (i
= 9; i
<= 31; i
++)
6285 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
6286 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
6287 return COSTS_N_INSNS (2);
6289 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
6290 || outer
== LSHIFTRT
)
6292 return COSTS_N_INSNS (2);
6298 return COSTS_N_INSNS (3);
6316 /* XXX another guess. */
6317 /* Memory costs quite a lot for the first word, but subsequent words
6318 load at the equivalent of a single insn each. */
6319 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
6320 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
6325 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6330 /* XXX still guessing. */
6331 switch (GET_MODE (XEXP (x
, 0)))
6334 return (1 + (mode
== DImode
? 4 : 0)
6335 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
6338 return (4 + (mode
== DImode
? 4 : 0)
6339 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
6342 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
6354 arm_rtx_costs_1 (rtx x
, enum rtx_code outer
, int* total
, bool speed
)
6356 enum machine_mode mode
= GET_MODE (x
);
6357 enum rtx_code subcode
;
6359 enum rtx_code code
= GET_CODE (x
);
6365 /* Memory costs quite a lot for the first word, but subsequent words
6366 load at the equivalent of a single insn each. */
6367 *total
= COSTS_N_INSNS (2 + ARM_NUM_REGS (mode
));
6374 if (TARGET_HARD_FLOAT
&& mode
== SFmode
)
6375 *total
= COSTS_N_INSNS (2);
6376 else if (TARGET_HARD_FLOAT
&& mode
== DFmode
&& !TARGET_VFP_SINGLE
)
6377 *total
= COSTS_N_INSNS (4);
6379 *total
= COSTS_N_INSNS (20);
6383 if (GET_CODE (XEXP (x
, 1)) == REG
)
6384 *total
= COSTS_N_INSNS (1); /* Need to subtract from 32 */
6385 else if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6386 *total
= rtx_cost (XEXP (x
, 1), code
, speed
);
6392 *total
+= COSTS_N_INSNS (4);
6397 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
6398 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6401 *total
+= COSTS_N_INSNS (3);
6405 *total
+= COSTS_N_INSNS (1);
6406 /* Increase the cost of complex shifts because they aren't any faster,
6407 and reduce dual issue opportunities. */
6408 if (arm_tune_cortex_a9
6409 && outer
!= SET
&& GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6417 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6419 if (TARGET_HARD_FLOAT
&& (mode
== SFmode
|| mode
== DFmode
))
6420 *total
= COSTS_N_INSNS (1);
6422 *total
= COSTS_N_INSNS (20);
6425 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
6426 /* Thumb2 does not have RSB, so all arguments must be
6427 registers (subtracting a constant is canonicalized as
6428 addition of the negated constant). */
6434 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
6435 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6436 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6438 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6442 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6443 && const_ok_for_arm (INTVAL (XEXP (x
, 1))))
6445 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6452 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6454 if (TARGET_HARD_FLOAT
6456 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6458 *total
= COSTS_N_INSNS (1);
6459 if (GET_CODE (XEXP (x
, 0)) == CONST_DOUBLE
6460 && arm_const_double_rtx (XEXP (x
, 0)))
6462 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6466 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6467 && arm_const_double_rtx (XEXP (x
, 1)))
6469 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6475 *total
= COSTS_N_INSNS (20);
6479 *total
= COSTS_N_INSNS (1);
6480 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
6481 && const_ok_for_arm (INTVAL (XEXP (x
, 0))))
6483 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6487 subcode
= GET_CODE (XEXP (x
, 1));
6488 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6489 || subcode
== LSHIFTRT
6490 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6492 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6493 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6497 /* A shift as a part of RSB costs no more than RSB itself. */
6498 if (GET_CODE (XEXP (x
, 0)) == MULT
6499 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6501 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, speed
);
6502 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6507 && power_of_two_operand (XEXP (XEXP (x
, 1), 1), SImode
))
6509 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6510 *total
+= rtx_cost (XEXP (XEXP (x
, 1), 0), subcode
, speed
);
6514 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMPARE
6515 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 1))) == RTX_COMM_COMPARE
)
6517 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6518 if (GET_CODE (XEXP (XEXP (x
, 1), 0)) == REG
6519 && REGNO (XEXP (XEXP (x
, 1), 0)) != CC_REGNUM
)
6520 *total
+= COSTS_N_INSNS (1);
6528 if (code
== PLUS
&& arm_arch6
&& mode
== SImode
6529 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
6530 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
6532 *total
= COSTS_N_INSNS (1);
6533 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), GET_CODE (XEXP (x
, 0)),
6535 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6539 /* MLA: All arguments must be registers. We filter out
6540 multiplication by a power of two, so that we fall down into
6542 if (GET_CODE (XEXP (x
, 0)) == MULT
6543 && !power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6545 /* The cost comes from the cost of the multiply. */
6549 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6551 if (TARGET_HARD_FLOAT
6553 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6555 *total
= COSTS_N_INSNS (1);
6556 if (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
6557 && arm_const_double_rtx (XEXP (x
, 1)))
6559 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6566 *total
= COSTS_N_INSNS (20);
6570 if (GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMPARE
6571 || GET_RTX_CLASS (GET_CODE (XEXP (x
, 0))) == RTX_COMM_COMPARE
)
6573 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 1), code
, speed
);
6574 if (GET_CODE (XEXP (XEXP (x
, 0), 0)) == REG
6575 && REGNO (XEXP (XEXP (x
, 0), 0)) != CC_REGNUM
)
6576 *total
+= COSTS_N_INSNS (1);
6582 case AND
: case XOR
: case IOR
:
6584 /* Normally the frame registers will be spilt into reg+const during
6585 reload, so it is a bad idea to combine them with other instructions,
6586 since then they might not be moved outside of loops. As a compromise
6587 we allow integration with ops that have a constant as their second
6589 if ((REG_OR_SUBREG_REG (XEXP (x
, 0))
6590 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
6591 && GET_CODE (XEXP (x
, 1)) != CONST_INT
)
6592 || (REG_OR_SUBREG_REG (XEXP (x
, 0))
6593 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))))
6598 *total
+= COSTS_N_INSNS (2);
6599 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6600 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6602 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6609 *total
+= COSTS_N_INSNS (1);
6610 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6611 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6613 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6616 subcode
= GET_CODE (XEXP (x
, 0));
6617 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6618 || subcode
== LSHIFTRT
6619 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6621 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6622 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6627 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6629 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6630 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6634 if (subcode
== UMIN
|| subcode
== UMAX
6635 || subcode
== SMIN
|| subcode
== SMAX
)
6637 *total
= COSTS_N_INSNS (3);
6644 /* This should have been handled by the CPU specific routines. */
6648 if (arm_arch3m
&& mode
== SImode
6649 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6650 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6651 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
6652 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
6653 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6654 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
6656 *total
= rtx_cost (XEXP (XEXP (x
, 0), 0), LSHIFTRT
, speed
);
6659 *total
= COSTS_N_INSNS (2); /* Plus the cost of the MULT */
6663 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6665 if (TARGET_HARD_FLOAT
6667 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6669 *total
= COSTS_N_INSNS (1);
6672 *total
= COSTS_N_INSNS (2);
6678 *total
= COSTS_N_INSNS (ARM_NUM_REGS(mode
));
6679 if (mode
== SImode
&& code
== NOT
)
6681 subcode
= GET_CODE (XEXP (x
, 0));
6682 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6683 || subcode
== LSHIFTRT
6684 || subcode
== ROTATE
|| subcode
== ROTATERT
6686 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
)))
6688 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6689 /* Register shifts cost an extra cycle. */
6690 if (GET_CODE (XEXP (XEXP (x
, 0), 1)) != CONST_INT
)
6691 *total
+= COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x
, 0), 1),
6700 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
6702 *total
= COSTS_N_INSNS (4);
6706 operand
= XEXP (x
, 0);
6708 if (!((GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMPARE
6709 || GET_RTX_CLASS (GET_CODE (operand
)) == RTX_COMM_COMPARE
)
6710 && GET_CODE (XEXP (operand
, 0)) == REG
6711 && REGNO (XEXP (operand
, 0)) == CC_REGNUM
))
6712 *total
+= COSTS_N_INSNS (1);
6713 *total
+= (rtx_cost (XEXP (x
, 1), code
, speed
)
6714 + rtx_cost (XEXP (x
, 2), code
, speed
));
6718 if (mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6720 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6726 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6727 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6729 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6735 if ((GET_CODE (XEXP (x
, 0)) != REG
|| REGNO (XEXP (x
, 0)) != CC_REGNUM
)
6736 && mode
== SImode
&& XEXP (x
, 1) == const0_rtx
)
6738 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6758 /* SCC insns. In the case where the comparison has already been
6759 performed, then they cost 2 instructions. Otherwise they need
6760 an additional comparison before them. */
6761 *total
= COSTS_N_INSNS (2);
6762 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
6769 if (GET_CODE (XEXP (x
, 0)) == REG
&& REGNO (XEXP (x
, 0)) == CC_REGNUM
)
6775 *total
+= COSTS_N_INSNS (1);
6776 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
6777 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
))
6779 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6783 subcode
= GET_CODE (XEXP (x
, 0));
6784 if (subcode
== ASHIFT
|| subcode
== ASHIFTRT
6785 || subcode
== LSHIFTRT
6786 || subcode
== ROTATE
|| subcode
== ROTATERT
)
6788 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6789 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6794 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
6796 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6797 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), subcode
, speed
);
6807 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, speed
);
6808 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
6809 || !const_ok_for_arm (INTVAL (XEXP (x
, 1))))
6810 *total
+= rtx_cost (XEXP (x
, 1), code
, speed
);
6814 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6816 if (TARGET_HARD_FLOAT
6818 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
6820 *total
= COSTS_N_INSNS (1);
6823 *total
= COSTS_N_INSNS (20);
6826 *total
= COSTS_N_INSNS (1);
6828 *total
+= COSTS_N_INSNS (3);
6832 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6836 *total
+= COSTS_N_INSNS (1);
6838 if (GET_MODE (XEXP (x
, 0)) != SImode
)
6842 if (GET_CODE (XEXP (x
, 0)) != MEM
)
6843 *total
+= COSTS_N_INSNS (1);
6845 else if (!arm_arch4
|| GET_CODE (XEXP (x
, 0)) != MEM
)
6846 *total
+= COSTS_N_INSNS (2);
6855 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6858 *total
+= COSTS_N_INSNS (1);
6860 if (GET_MODE (XEXP (x
, 0)) != SImode
)
6864 if (GET_CODE (XEXP (x
, 0)) != MEM
)
6865 *total
+= COSTS_N_INSNS (1);
6867 else if (!arm_arch4
|| GET_CODE (XEXP (x
, 0)) != MEM
)
6868 *total
+= COSTS_N_INSNS (GET_MODE (XEXP (x
, 0)) == QImode
?
6875 switch (GET_MODE (XEXP (x
, 0)))
6882 *total
= COSTS_N_INSNS (1);
6892 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, speed
);
6896 if (const_ok_for_arm (INTVAL (x
))
6897 || const_ok_for_arm (~INTVAL (x
)))
6898 *total
= COSTS_N_INSNS (1);
6900 *total
= COSTS_N_INSNS (arm_gen_constant (SET
, mode
, NULL_RTX
,
6901 INTVAL (x
), NULL_RTX
,
6908 *total
= COSTS_N_INSNS (3);
6912 *total
= COSTS_N_INSNS (1);
6916 *total
= COSTS_N_INSNS (1);
6917 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
6921 if (TARGET_HARD_FLOAT
&& vfp3_const_double_rtx (x
)
6922 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
6923 *total
= COSTS_N_INSNS (1);
6925 *total
= COSTS_N_INSNS (4);
6929 *total
= COSTS_N_INSNS (4);
6934 /* Estimates the size cost of thumb1 instructions.
6935 For now most of the code is copied from thumb1_rtx_costs. We need more
6936 fine grain tuning when we have more related test cases. */
6938 thumb1_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
6940 enum machine_mode mode
= GET_MODE (x
);
6953 return COSTS_N_INSNS (1);
6956 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
6958 /* Thumb1 mul instruction can't operate on const. We must Load it
6959 into a register first. */
6960 int const_size
= thumb1_size_rtx_costs (XEXP (x
, 1), CONST_INT
, SET
);
6961 return COSTS_N_INSNS (1) + const_size
;
6963 return COSTS_N_INSNS (1);
6966 return (COSTS_N_INSNS (1)
6967 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
6968 + GET_CODE (SET_DEST (x
)) == MEM
));
6973 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
6975 if (thumb_shiftable_const (INTVAL (x
)))
6976 return COSTS_N_INSNS (2);
6977 return COSTS_N_INSNS (3);
6979 else if ((outer
== PLUS
|| outer
== COMPARE
)
6980 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
6982 else if ((outer
== IOR
|| outer
== XOR
|| outer
== AND
)
6983 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
6984 return COSTS_N_INSNS (1);
6985 else if (outer
== AND
)
6988 /* This duplicates the tests in the andsi3 expander. */
6989 for (i
= 9; i
<= 31; i
++)
6990 if ((((HOST_WIDE_INT
) 1) << i
) - 1 == INTVAL (x
)
6991 || (((HOST_WIDE_INT
) 1) << i
) - 1 == ~INTVAL (x
))
6992 return COSTS_N_INSNS (2);
6994 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
6995 || outer
== LSHIFTRT
)
6997 return COSTS_N_INSNS (2);
7003 return COSTS_N_INSNS (3);
7021 /* XXX another guess. */
7022 /* Memory costs quite a lot for the first word, but subsequent words
7023 load at the equivalent of a single insn each. */
7024 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
7025 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
7030 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
7035 /* XXX still guessing. */
7036 switch (GET_MODE (XEXP (x
, 0)))
7039 return (1 + (mode
== DImode
? 4 : 0)
7040 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7043 return (4 + (mode
== DImode
? 4 : 0)
7044 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7047 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
7058 /* RTX costs when optimizing for size. */
7060 arm_size_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7063 enum machine_mode mode
= GET_MODE (x
);
7066 *total
= thumb1_size_rtx_costs (x
, code
, outer_code
);
7070 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
7074 /* A memory access costs 1 insn if the mode is small, or the address is
7075 a single register, otherwise it costs one insn per word. */
7076 if (REG_P (XEXP (x
, 0)))
7077 *total
= COSTS_N_INSNS (1);
7079 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7086 /* Needs a libcall, so it costs about this. */
7087 *total
= COSTS_N_INSNS (2);
7091 if (mode
== SImode
&& GET_CODE (XEXP (x
, 1)) == REG
)
7093 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
, false);
7101 if (mode
== DImode
&& GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7103 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
, false);
7106 else if (mode
== SImode
)
7108 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
, false);
7109 /* Slightly disparage register shifts, but not by much. */
7110 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
7111 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
, false);
7115 /* Needs a libcall. */
7116 *total
= COSTS_N_INSNS (2);
7120 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7121 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7123 *total
= COSTS_N_INSNS (1);
7129 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
7130 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
7132 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
7133 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
7134 || subcode1
== ROTATE
|| subcode1
== ROTATERT
7135 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
7136 || subcode1
== ASHIFTRT
)
7138 /* It's just the cost of the two operands. */
7143 *total
= COSTS_N_INSNS (1);
7147 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7151 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7152 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7154 *total
= COSTS_N_INSNS (1);
7158 /* A shift as a part of ADD costs nothing. */
7159 if (GET_CODE (XEXP (x
, 0)) == MULT
7160 && power_of_two_operand (XEXP (XEXP (x
, 0), 1), SImode
))
7162 *total
= COSTS_N_INSNS (TARGET_THUMB2
? 2 : 1);
7163 *total
+= rtx_cost (XEXP (XEXP (x
, 0), 0), code
, false);
7164 *total
+= rtx_cost (XEXP (x
, 1), code
, false);
7169 case AND
: case XOR
: case IOR
:
7172 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
7174 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
7175 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
7176 || (code
== AND
&& subcode
== NOT
))
7178 /* It's just the cost of the two operands. */
7184 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7188 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7192 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7193 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7195 *total
= COSTS_N_INSNS (1);
7201 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7210 if (cc_register (XEXP (x
, 0), VOIDmode
))
7213 *total
= COSTS_N_INSNS (1);
7217 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
7218 && (mode
== SFmode
|| !TARGET_VFP_SINGLE
))
7219 *total
= COSTS_N_INSNS (1);
7221 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
7226 if (GET_MODE_SIZE (GET_MODE (XEXP (x
, 0))) < 4)
7228 if (!(arm_arch4
&& MEM_P (XEXP (x
, 0))))
7229 *total
+= COSTS_N_INSNS (arm_arch6
? 1 : 2);
7232 *total
+= COSTS_N_INSNS (1);
7237 if (!(arm_arch4
&& MEM_P (XEXP (x
, 0))))
7239 switch (GET_MODE (XEXP (x
, 0)))
7242 *total
+= COSTS_N_INSNS (1);
7246 *total
+= COSTS_N_INSNS (arm_arch6
? 1 : 2);
7252 *total
+= COSTS_N_INSNS (2);
7257 *total
+= COSTS_N_INSNS (1);
7262 if (const_ok_for_arm (INTVAL (x
)))
7263 /* A multiplication by a constant requires another instruction
7264 to load the constant to a register. */
7265 *total
= COSTS_N_INSNS ((outer_code
== SET
|| outer_code
== MULT
)
7267 else if (const_ok_for_arm (~INTVAL (x
)))
7268 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
7269 else if (const_ok_for_arm (-INTVAL (x
)))
7271 if (outer_code
== COMPARE
|| outer_code
== PLUS
7272 || outer_code
== MINUS
)
7275 *total
= COSTS_N_INSNS (1);
7278 *total
= COSTS_N_INSNS (2);
7284 *total
= COSTS_N_INSNS (2);
7288 *total
= COSTS_N_INSNS (4);
7293 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
7294 cost of these slightly. */
7295 *total
= COSTS_N_INSNS (1) + 1;
7299 if (mode
!= VOIDmode
)
7300 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
7302 *total
= COSTS_N_INSNS (4); /* How knows? */
7307 /* RTX costs when optimizing for size. */
7309 arm_rtx_costs (rtx x
, int code
, int outer_code
, int *total
,
7313 return arm_size_rtx_costs (x
, (enum rtx_code
) code
,
7314 (enum rtx_code
) outer_code
, total
);
7316 return current_tune
->rtx_costs (x
, (enum rtx_code
) code
,
7317 (enum rtx_code
) outer_code
,
7321 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7322 supported on any "slowmul" cores, so it can be ignored. */
7325 arm_slowmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7326 int *total
, bool speed
)
7328 enum machine_mode mode
= GET_MODE (x
);
7332 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7339 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
7342 *total
= COSTS_N_INSNS (20);
7346 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7348 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7349 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7350 int cost
, const_ok
= const_ok_for_arm (i
);
7351 int j
, booth_unit_size
;
7353 /* Tune as appropriate. */
7354 cost
= const_ok
? 4 : 8;
7355 booth_unit_size
= 2;
7356 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7358 i
>>= booth_unit_size
;
7362 *total
= COSTS_N_INSNS (cost
);
7363 *total
+= rtx_cost (XEXP (x
, 0), code
, speed
);
7367 *total
= COSTS_N_INSNS (20);
7371 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);;
7376 /* RTX cost for cores with a fast multiply unit (M variants). */
7379 arm_fastmul_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7380 int *total
, bool speed
)
7382 enum machine_mode mode
= GET_MODE (x
);
7386 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7390 /* ??? should thumb2 use different costs? */
7394 /* There is no point basing this on the tuning, since it is always the
7395 fast variant if it exists at all. */
7397 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7398 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7399 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7401 *total
= COSTS_N_INSNS(2);
7408 *total
= COSTS_N_INSNS (5);
7412 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7414 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
7415 & (unsigned HOST_WIDE_INT
) 0xffffffff);
7416 int cost
, const_ok
= const_ok_for_arm (i
);
7417 int j
, booth_unit_size
;
7419 /* Tune as appropriate. */
7420 cost
= const_ok
? 4 : 8;
7421 booth_unit_size
= 8;
7422 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
7424 i
>>= booth_unit_size
;
7428 *total
= COSTS_N_INSNS(cost
);
7434 *total
= COSTS_N_INSNS (4);
7438 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7440 if (TARGET_HARD_FLOAT
7442 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7444 *total
= COSTS_N_INSNS (1);
7449 /* Requires a lib call */
7450 *total
= COSTS_N_INSNS (20);
7454 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7459 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7460 so it can be ignored. */
7463 arm_xscale_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7464 int *total
, bool speed
)
7466 enum machine_mode mode
= GET_MODE (x
);
7470 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7477 if (GET_CODE (XEXP (x
, 0)) != MULT
)
7478 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7480 /* A COMPARE of a MULT is slow on XScale; the muls instruction
7481 will stall until the multiplication is complete. */
7482 *total
= COSTS_N_INSNS (3);
7486 /* There is no point basing this on the tuning, since it is always the
7487 fast variant if it exists at all. */
7489 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7490 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7491 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7493 *total
= COSTS_N_INSNS (2);
7500 *total
= COSTS_N_INSNS (5);
7504 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7506 /* If operand 1 is a constant we can more accurately
7507 calculate the cost of the multiply. The multiplier can
7508 retire 15 bits on the first cycle and a further 12 on the
7509 second. We do, of course, have to load the constant into
7510 a register first. */
7511 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
7512 /* There's a general overhead of one cycle. */
7514 unsigned HOST_WIDE_INT masked_const
;
7519 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
7521 masked_const
= i
& 0xffff8000;
7522 if (masked_const
!= 0)
7525 masked_const
= i
& 0xf8000000;
7526 if (masked_const
!= 0)
7529 *total
= COSTS_N_INSNS (cost
);
7535 *total
= COSTS_N_INSNS (3);
7539 /* Requires a lib call */
7540 *total
= COSTS_N_INSNS (20);
7544 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7549 /* RTX costs for 9e (and later) cores. */
7552 arm_9e_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer_code
,
7553 int *total
, bool speed
)
7555 enum machine_mode mode
= GET_MODE (x
);
7562 *total
= COSTS_N_INSNS (3);
7566 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
7574 /* There is no point basing this on the tuning, since it is always the
7575 fast variant if it exists at all. */
7577 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
7578 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
7579 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
7581 *total
= COSTS_N_INSNS (2);
7588 *total
= COSTS_N_INSNS (5);
7594 *total
= COSTS_N_INSNS (2);
7598 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7600 if (TARGET_HARD_FLOAT
7602 || (mode
== DFmode
&& !TARGET_VFP_SINGLE
)))
7604 *total
= COSTS_N_INSNS (1);
7609 *total
= COSTS_N_INSNS (20);
7613 return arm_rtx_costs_1 (x
, outer_code
, total
, speed
);
7616 /* All address computations that can be done are free, but rtx cost returns
7617 the same for practically all of them. So we weight the different types
7618 of address here in the order (most pref first):
7619 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
7621 arm_arm_address_cost (rtx x
)
7623 enum rtx_code c
= GET_CODE (x
);
7625 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
7627 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
7632 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7635 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
7645 arm_thumb_address_cost (rtx x
)
7647 enum rtx_code c
= GET_CODE (x
);
7652 && GET_CODE (XEXP (x
, 0)) == REG
7653 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
7660 arm_address_cost (rtx x
, bool speed ATTRIBUTE_UNUSED
)
7662 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
7666 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
7670 /* Some true dependencies can have a higher cost depending
7671 on precisely how certain input operands are used. */
7673 && REG_NOTE_KIND (link
) == 0
7674 && recog_memoized (insn
) >= 0
7675 && recog_memoized (dep
) >= 0)
7677 int shift_opnum
= get_attr_shift (insn
);
7678 enum attr_type attr_type
= get_attr_type (dep
);
7680 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
7681 operand for INSN. If we have a shifted input operand and the
7682 instruction we depend on is another ALU instruction, then we may
7683 have to account for an additional stall. */
7684 if (shift_opnum
!= 0
7685 && (attr_type
== TYPE_ALU_SHIFT
|| attr_type
== TYPE_ALU_SHIFT_REG
))
7687 rtx shifted_operand
;
7690 /* Get the shifted operand. */
7691 extract_insn (insn
);
7692 shifted_operand
= recog_data
.operand
[shift_opnum
];
7694 /* Iterate over all the operands in DEP. If we write an operand
7695 that overlaps with SHIFTED_OPERAND, then we have increase the
7696 cost of this dependency. */
7698 preprocess_constraints ();
7699 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
7701 /* We can ignore strict inputs. */
7702 if (recog_data
.operand_type
[opno
] == OP_IN
)
7705 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
7712 /* XXX This is not strictly true for the FPA. */
7713 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
7714 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
7717 /* Call insns don't incur a stall, even if they follow a load. */
7718 if (REG_NOTE_KIND (link
) == 0
7719 && GET_CODE (insn
) == CALL_INSN
)
7722 if ((i_pat
= single_set (insn
)) != NULL
7723 && GET_CODE (SET_SRC (i_pat
)) == MEM
7724 && (d_pat
= single_set (dep
)) != NULL
7725 && GET_CODE (SET_DEST (d_pat
)) == MEM
)
7727 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
7728 /* This is a load after a store, there is no conflict if the load reads
7729 from a cached area. Assume that loads from the stack, and from the
7730 constant pool are cached, and that others will miss. This is a
7733 if ((GET_CODE (src_mem
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (src_mem
))
7734 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
7735 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
7736 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
7743 static int fp_consts_inited
= 0;
7745 /* Only zero is valid for VFP. Other values are also valid for FPA. */
7746 static const char * const strings_fp
[8] =
7749 "4", "5", "0.5", "10"
7752 static REAL_VALUE_TYPE values_fp
[8];
7755 init_fp_table (void)
7761 fp_consts_inited
= 1;
7763 fp_consts_inited
= 8;
7765 for (i
= 0; i
< fp_consts_inited
; i
++)
7767 r
= REAL_VALUE_ATOF (strings_fp
[i
], DFmode
);
7772 /* Return TRUE if rtx X is a valid immediate FP constant. */
7774 arm_const_double_rtx (rtx x
)
7779 if (!fp_consts_inited
)
7782 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7783 if (REAL_VALUE_MINUS_ZERO (r
))
7786 for (i
= 0; i
< fp_consts_inited
; i
++)
7787 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
7793 /* Return TRUE if rtx X is a valid immediate FPA constant. */
7795 neg_const_double_rtx_ok_for_fpa (rtx x
)
7800 if (!fp_consts_inited
)
7803 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7804 r
= REAL_VALUE_NEGATE (r
);
7805 if (REAL_VALUE_MINUS_ZERO (r
))
7808 for (i
= 0; i
< 8; i
++)
7809 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
7816 /* VFPv3 has a fairly wide range of representable immediates, formed from
7817 "quarter-precision" floating-point values. These can be evaluated using this
7818 formula (with ^ for exponentiation):
7822 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
7823 16 <= n <= 31 and 0 <= r <= 7.
7825 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
7827 - A (most-significant) is the sign bit.
7828 - BCD are the exponent (encoded as r XOR 3).
7829 - EFGH are the mantissa (encoded as n - 16).
7832 /* Return an integer index for a VFPv3 immediate operand X suitable for the
7833 fconst[sd] instruction, or -1 if X isn't suitable. */
7835 vfp3_const_double_index (rtx x
)
7837 REAL_VALUE_TYPE r
, m
;
7839 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
7840 unsigned HOST_WIDE_INT mask
;
7841 HOST_WIDE_INT m1
, m2
;
7842 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
7844 if (!TARGET_VFP3
|| GET_CODE (x
) != CONST_DOUBLE
)
7847 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7849 /* We can't represent these things, so detect them first. */
7850 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
7853 /* Extract sign, exponent and mantissa. */
7854 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
7855 r
= REAL_VALUE_ABS (r
);
7856 exponent
= REAL_EXP (&r
);
7857 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7858 highest (sign) bit, with a fixed binary point at bit point_pos.
7859 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7860 bits for the mantissa, this may fail (low bits would be lost). */
7861 real_ldexp (&m
, &r
, point_pos
- exponent
);
7862 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
7866 /* If there are bits set in the low part of the mantissa, we can't
7867 represent this value. */
7871 /* Now make it so that mantissa contains the most-significant bits, and move
7872 the point_pos to indicate that the least-significant bits have been
7874 point_pos
-= HOST_BITS_PER_WIDE_INT
;
7877 /* We can permit four significant bits of mantissa only, plus a high bit
7878 which is always 1. */
7879 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
7880 if ((mantissa
& mask
) != 0)
7883 /* Now we know the mantissa is in range, chop off the unneeded bits. */
7884 mantissa
>>= point_pos
- 5;
7886 /* The mantissa may be zero. Disallow that case. (It's possible to load the
7887 floating-point immediate zero with Neon using an integer-zero load, but
7888 that case is handled elsewhere.) */
7892 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
7894 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
7895 normalized significands are in the range [1, 2). (Our mantissa is shifted
7896 left 4 places at this point relative to normalized IEEE754 values). GCC
7897 internally uses [0.5, 1) (see real.c), so the exponent returned from
7898 REAL_EXP must be altered. */
7899 exponent
= 5 - exponent
;
7901 if (exponent
< 0 || exponent
> 7)
7904 /* Sign, mantissa and exponent are now in the correct form to plug into the
7905 formula described in the comment above. */
7906 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
7909 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
7911 vfp3_const_double_rtx (rtx x
)
7916 return vfp3_const_double_index (x
) != -1;
7919 /* Recognize immediates which can be used in various Neon instructions. Legal
7920 immediates are described by the following table (for VMVN variants, the
7921 bitwise inverse of the constant shown is recognized. In either case, VMOV
7922 is output and the correct instruction to use for a given constant is chosen
7923 by the assembler). The constant shown is replicated across all elements of
7924 the destination vector.
7926 insn elems variant constant (binary)
7927 ---- ----- ------- -----------------
7928 vmov i32 0 00000000 00000000 00000000 abcdefgh
7929 vmov i32 1 00000000 00000000 abcdefgh 00000000
7930 vmov i32 2 00000000 abcdefgh 00000000 00000000
7931 vmov i32 3 abcdefgh 00000000 00000000 00000000
7932 vmov i16 4 00000000 abcdefgh
7933 vmov i16 5 abcdefgh 00000000
7934 vmvn i32 6 00000000 00000000 00000000 abcdefgh
7935 vmvn i32 7 00000000 00000000 abcdefgh 00000000
7936 vmvn i32 8 00000000 abcdefgh 00000000 00000000
7937 vmvn i32 9 abcdefgh 00000000 00000000 00000000
7938 vmvn i16 10 00000000 abcdefgh
7939 vmvn i16 11 abcdefgh 00000000
7940 vmov i32 12 00000000 00000000 abcdefgh 11111111
7941 vmvn i32 13 00000000 00000000 abcdefgh 11111111
7942 vmov i32 14 00000000 abcdefgh 11111111 11111111
7943 vmvn i32 15 00000000 abcdefgh 11111111 11111111
7945 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
7946 eeeeeeee ffffffff gggggggg hhhhhhhh
7947 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
7949 For case 18, B = !b. Representable values are exactly those accepted by
7950 vfp3_const_double_index, but are output as floating-point numbers rather
7953 Variants 0-5 (inclusive) may also be used as immediates for the second
7954 operand of VORR/VBIC instructions.
7956 The INVERSE argument causes the bitwise inverse of the given operand to be
7957 recognized instead (used for recognizing legal immediates for the VAND/VORN
7958 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
7959 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
7960 output, rather than the real insns vbic/vorr).
7962 INVERSE makes no difference to the recognition of float vectors.
7964 The return value is the variant of immediate as shown in the above table, or
7965 -1 if the given value doesn't match any of the listed patterns.
7968 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
7969 rtx
*modconst
, int *elementwidth
)
7971 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
7973 for (i = 0; i < idx; i += (STRIDE)) \
7978 immtype = (CLASS); \
7979 elsize = (ELSIZE); \
7983 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
7984 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
7985 unsigned char bytes
[16];
7986 int immtype
= -1, matches
;
7987 unsigned int invmask
= inverse
? 0xff : 0;
7989 /* Vectors of float constants. */
7990 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
7992 rtx el0
= CONST_VECTOR_ELT (op
, 0);
7995 if (!vfp3_const_double_rtx (el0
))
7998 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
8000 for (i
= 1; i
< n_elts
; i
++)
8002 rtx elt
= CONST_VECTOR_ELT (op
, i
);
8005 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
8007 if (!REAL_VALUES_EQUAL (r0
, re
))
8012 *modconst
= CONST_VECTOR_ELT (op
, 0);
8020 /* Splat vector constant out into a byte vector. */
8021 for (i
= 0; i
< n_elts
; i
++)
8023 rtx el
= CONST_VECTOR_ELT (op
, i
);
8024 unsigned HOST_WIDE_INT elpart
;
8025 unsigned int part
, parts
;
8027 if (GET_CODE (el
) == CONST_INT
)
8029 elpart
= INTVAL (el
);
8032 else if (GET_CODE (el
) == CONST_DOUBLE
)
8034 elpart
= CONST_DOUBLE_LOW (el
);
8040 for (part
= 0; part
< parts
; part
++)
8043 for (byte
= 0; byte
< innersize
; byte
++)
8045 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8046 elpart
>>= BITS_PER_UNIT
;
8048 if (GET_CODE (el
) == CONST_DOUBLE
)
8049 elpart
= CONST_DOUBLE_HIGH (el
);
8054 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8058 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8059 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8061 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8062 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8064 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8065 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8067 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8068 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
8070 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
8072 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
8074 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8075 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8077 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8078 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8080 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8081 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8083 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8084 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
8086 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
8088 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
8090 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8091 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
8093 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8094 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
8096 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8097 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
8099 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8100 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
8102 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
8104 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8105 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
8113 *elementwidth
= elsize
;
8117 unsigned HOST_WIDE_INT imm
= 0;
8119 /* Un-invert bytes of recognized vector, if necessary. */
8121 for (i
= 0; i
< idx
; i
++)
8122 bytes
[i
] ^= invmask
;
8126 /* FIXME: Broken on 32-bit H_W_I hosts. */
8127 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8129 for (i
= 0; i
< 8; i
++)
8130 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8131 << (i
* BITS_PER_UNIT
);
8133 *modconst
= GEN_INT (imm
);
8137 unsigned HOST_WIDE_INT imm
= 0;
8139 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8140 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8142 *modconst
= GEN_INT (imm
);
8150 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
8151 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
8152 float elements), and a modified constant (whatever should be output for a
8153 VMOV) in *MODCONST. */
8156 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
8157 rtx
*modconst
, int *elementwidth
)
8161 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
8167 *modconst
= tmpconst
;
8170 *elementwidth
= tmpwidth
;
8175 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
8176 the immediate is valid, write a constant suitable for using as an operand
8177 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
8178 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
8181 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
8182 rtx
*modconst
, int *elementwidth
)
8186 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
8188 if (retval
< 0 || retval
> 5)
8192 *modconst
= tmpconst
;
8195 *elementwidth
= tmpwidth
;
8200 /* Return a string suitable for output of Neon immediate logic operation
8204 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
8205 int inverse
, int quad
)
8207 int width
, is_valid
;
8208 static char templ
[40];
8210 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
8212 gcc_assert (is_valid
!= 0);
8215 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
8217 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
8222 /* Output a sequence of pairwise operations to implement a reduction.
8223 NOTE: We do "too much work" here, because pairwise operations work on two
8224 registers-worth of operands in one go. Unfortunately we can't exploit those
8225 extra calculations to do the full operation in fewer steps, I don't think.
8226 Although all vector elements of the result but the first are ignored, we
8227 actually calculate the same result in each of the elements. An alternative
8228 such as initially loading a vector with zero to use as each of the second
8229 operands would use up an additional register and take an extra instruction,
8230 for no particular gain. */
8233 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
8234 rtx (*reduc
) (rtx
, rtx
, rtx
))
8236 enum machine_mode inner
= GET_MODE_INNER (mode
);
8237 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
8240 for (i
= parts
/ 2; i
>= 1; i
/= 2)
8242 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
8243 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
8248 /* If VALS is a vector constant that can be loaded into a register
8249 using VDUP, generate instructions to do so and return an RTX to
8250 assign to the register. Otherwise return NULL_RTX. */
8253 neon_vdup_constant (rtx vals
)
8255 enum machine_mode mode
= GET_MODE (vals
);
8256 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8257 int n_elts
= GET_MODE_NUNITS (mode
);
8258 bool all_same
= true;
8262 if (GET_CODE (vals
) != CONST_VECTOR
|| GET_MODE_SIZE (inner_mode
) > 4)
8265 for (i
= 0; i
< n_elts
; ++i
)
8267 x
= XVECEXP (vals
, 0, i
);
8268 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8273 /* The elements are not all the same. We could handle repeating
8274 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
8275 {0, C, 0, C, 0, C, 0, C} which can be loaded using
8279 /* We can load this constant by using VDUP and a constant in a
8280 single ARM register. This will be cheaper than a vector
8283 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8284 return gen_rtx_UNSPEC (mode
, gen_rtvec (1, x
),
8288 /* Generate code to load VALS, which is a PARALLEL containing only
8289 constants (for vec_init) or CONST_VECTOR, efficiently into a
8290 register. Returns an RTX to copy into the register, or NULL_RTX
8291 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8294 neon_make_constant (rtx vals
)
8296 enum machine_mode mode
= GET_MODE (vals
);
8298 rtx const_vec
= NULL_RTX
;
8299 int n_elts
= GET_MODE_NUNITS (mode
);
8303 if (GET_CODE (vals
) == CONST_VECTOR
)
8305 else if (GET_CODE (vals
) == PARALLEL
)
8307 /* A CONST_VECTOR must contain only CONST_INTs and
8308 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8309 Only store valid constants in a CONST_VECTOR. */
8310 for (i
= 0; i
< n_elts
; ++i
)
8312 rtx x
= XVECEXP (vals
, 0, i
);
8313 if (GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST_DOUBLE
)
8316 if (n_const
== n_elts
)
8317 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8322 if (const_vec
!= NULL
8323 && neon_immediate_valid_for_move (const_vec
, mode
, NULL
, NULL
))
8324 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
8326 else if ((target
= neon_vdup_constant (vals
)) != NULL_RTX
)
8327 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
8328 pipeline cycle; creating the constant takes one or two ARM
8331 else if (const_vec
!= NULL_RTX
)
8332 /* Load from constant pool. On Cortex-A8 this takes two cycles
8333 (for either double or quad vectors). We can not take advantage
8334 of single-cycle VLD1 because we need a PC-relative addressing
8338 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8339 We can not construct an initializer. */
8343 /* Initialize vector TARGET to VALS. */
8346 neon_expand_vector_init (rtx target
, rtx vals
)
8348 enum machine_mode mode
= GET_MODE (target
);
8349 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8350 int n_elts
= GET_MODE_NUNITS (mode
);
8351 int n_var
= 0, one_var
= -1;
8352 bool all_same
= true;
8356 for (i
= 0; i
< n_elts
; ++i
)
8358 x
= XVECEXP (vals
, 0, i
);
8359 if (!CONSTANT_P (x
))
8360 ++n_var
, one_var
= i
;
8362 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8368 rtx constant
= neon_make_constant (vals
);
8369 if (constant
!= NULL_RTX
)
8371 emit_move_insn (target
, constant
);
8376 /* Splat a single non-constant element if we can. */
8377 if (all_same
&& GET_MODE_SIZE (inner_mode
) <= 4)
8379 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8380 emit_insn (gen_rtx_SET (VOIDmode
, target
,
8381 gen_rtx_UNSPEC (mode
, gen_rtvec (1, x
),
8386 /* One field is non-constant. Load constant then overwrite varying
8387 field. This is more efficient than using the stack. */
8390 rtx copy
= copy_rtx (vals
);
8393 /* Load constant part of vector, substitute neighboring value for
8395 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, (one_var
+ 1) % n_elts
);
8396 neon_expand_vector_init (target
, copy
);
8398 /* Insert variable. */
8399 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8400 ops
= gen_rtvec (3, x
, target
, GEN_INT (one_var
));
8401 emit_insn (gen_rtx_SET (VOIDmode
, target
,
8402 gen_rtx_UNSPEC (mode
, ops
, UNSPEC_VSET_LANE
)));
8406 /* Construct the vector in memory one field at a time
8407 and load the whole vector. */
8408 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), 0);
8409 for (i
= 0; i
< n_elts
; i
++)
8410 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8411 i
* GET_MODE_SIZE (inner_mode
)),
8412 XVECEXP (vals
, 0, i
));
8413 emit_move_insn (target
, mem
);
8416 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
8417 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
8418 reported source locations are bogus. */
8421 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
8426 gcc_assert (GET_CODE (operand
) == CONST_INT
);
8428 lane
= INTVAL (operand
);
8430 if (lane
< low
|| lane
>= high
)
8434 /* Bounds-check lanes. */
8437 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8439 bounds_check (operand
, low
, high
, "lane out of range");
8442 /* Bounds-check constants. */
8445 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
8447 bounds_check (operand
, low
, high
, "constant out of range");
8451 neon_element_bits (enum machine_mode mode
)
8454 return GET_MODE_BITSIZE (mode
);
8456 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
8460 /* Predicates for `match_operand' and `match_operator'. */
8462 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
8464 cirrus_memory_offset (rtx op
)
8466 /* Reject eliminable registers. */
8467 if (! (reload_in_progress
|| reload_completed
)
8468 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8469 || reg_mentioned_p (arg_pointer_rtx
, op
)
8470 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8471 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8472 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8473 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8476 if (GET_CODE (op
) == MEM
)
8482 /* Match: (mem (reg)). */
8483 if (GET_CODE (ind
) == REG
)
8489 if (GET_CODE (ind
) == PLUS
8490 && GET_CODE (XEXP (ind
, 0)) == REG
8491 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8492 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
)
8499 /* Return TRUE if OP is a valid coprocessor memory address pattern.
8500 WB is true if full writeback address modes are allowed and is false
8501 if limited writeback address modes (POST_INC and PRE_DEC) are
8505 arm_coproc_mem_operand (rtx op
, bool wb
)
8509 /* Reject eliminable registers. */
8510 if (! (reload_in_progress
|| reload_completed
)
8511 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8512 || reg_mentioned_p (arg_pointer_rtx
, op
)
8513 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8514 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8515 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8516 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8519 /* Constants are converted into offsets from labels. */
8520 if (GET_CODE (op
) != MEM
)
8525 if (reload_completed
8526 && (GET_CODE (ind
) == LABEL_REF
8527 || (GET_CODE (ind
) == CONST
8528 && GET_CODE (XEXP (ind
, 0)) == PLUS
8529 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8530 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8533 /* Match: (mem (reg)). */
8534 if (GET_CODE (ind
) == REG
)
8535 return arm_address_register_rtx_p (ind
, 0);
8537 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
8538 acceptable in any case (subject to verification by
8539 arm_address_register_rtx_p). We need WB to be true to accept
8540 PRE_INC and POST_DEC. */
8541 if (GET_CODE (ind
) == POST_INC
8542 || GET_CODE (ind
) == PRE_DEC
8544 && (GET_CODE (ind
) == PRE_INC
8545 || GET_CODE (ind
) == POST_DEC
)))
8546 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
8549 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
8550 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
8551 && GET_CODE (XEXP (ind
, 1)) == PLUS
8552 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
8553 ind
= XEXP (ind
, 1);
8558 if (GET_CODE (ind
) == PLUS
8559 && GET_CODE (XEXP (ind
, 0)) == REG
8560 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8561 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
8562 && INTVAL (XEXP (ind
, 1)) > -1024
8563 && INTVAL (XEXP (ind
, 1)) < 1024
8564 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
8570 /* Return TRUE if OP is a memory operand which we can load or store a vector
8571 to/from. TYPE is one of the following values:
8572 0 - Vector load/stor (vldr)
8573 1 - Core registers (ldm)
8574 2 - Element/structure loads (vld1)
8577 neon_vector_mem_operand (rtx op
, int type
)
8581 /* Reject eliminable registers. */
8582 if (! (reload_in_progress
|| reload_completed
)
8583 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8584 || reg_mentioned_p (arg_pointer_rtx
, op
)
8585 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8586 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8587 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8588 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8591 /* Constants are converted into offsets from labels. */
8592 if (GET_CODE (op
) != MEM
)
8597 if (reload_completed
8598 && (GET_CODE (ind
) == LABEL_REF
8599 || (GET_CODE (ind
) == CONST
8600 && GET_CODE (XEXP (ind
, 0)) == PLUS
8601 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8602 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8605 /* Match: (mem (reg)). */
8606 if (GET_CODE (ind
) == REG
)
8607 return arm_address_register_rtx_p (ind
, 0);
8609 /* Allow post-increment with Neon registers. */
8610 if (type
!= 1 && (GET_CODE (ind
) == POST_INC
|| GET_CODE (ind
) == PRE_DEC
))
8611 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
8613 /* FIXME: vld1 allows register post-modify. */
8619 && GET_CODE (ind
) == PLUS
8620 && GET_CODE (XEXP (ind
, 0)) == REG
8621 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
8622 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
8623 && INTVAL (XEXP (ind
, 1)) > -1024
8624 && INTVAL (XEXP (ind
, 1)) < 1016
8625 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
8631 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
8634 neon_struct_mem_operand (rtx op
)
8638 /* Reject eliminable registers. */
8639 if (! (reload_in_progress
|| reload_completed
)
8640 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
8641 || reg_mentioned_p (arg_pointer_rtx
, op
)
8642 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
8643 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
8644 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
8645 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
8648 /* Constants are converted into offsets from labels. */
8649 if (GET_CODE (op
) != MEM
)
8654 if (reload_completed
8655 && (GET_CODE (ind
) == LABEL_REF
8656 || (GET_CODE (ind
) == CONST
8657 && GET_CODE (XEXP (ind
, 0)) == PLUS
8658 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
8659 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
8662 /* Match: (mem (reg)). */
8663 if (GET_CODE (ind
) == REG
)
8664 return arm_address_register_rtx_p (ind
, 0);
8669 /* Return true if X is a register that will be eliminated later on. */
8671 arm_eliminable_register (rtx x
)
8673 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
8674 || REGNO (x
) == ARG_POINTER_REGNUM
8675 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
8676 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
8679 /* Return GENERAL_REGS if a scratch register required to reload x to/from
8680 coprocessor registers. Otherwise return NO_REGS. */
8683 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
8687 if (!TARGET_NEON_FP16
)
8688 return GENERAL_REGS
;
8689 if (s_register_operand (x
, mode
) || neon_vector_mem_operand (x
, 2))
8691 return GENERAL_REGS
;
8695 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
8696 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8697 && neon_vector_mem_operand (x
, 0))
8700 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
8703 return GENERAL_REGS
;
8706 /* Values which must be returned in the most-significant end of the return
8710 arm_return_in_msb (const_tree valtype
)
8712 return (TARGET_AAPCS_BASED
8714 && (AGGREGATE_TYPE_P (valtype
)
8715 || TREE_CODE (valtype
) == COMPLEX_TYPE
));
8718 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
8719 Use by the Cirrus Maverick code which has to workaround
8720 a hardware bug triggered by such instructions. */
8722 arm_memory_load_p (rtx insn
)
8724 rtx body
, lhs
, rhs
;;
8726 if (insn
== NULL_RTX
|| GET_CODE (insn
) != INSN
)
8729 body
= PATTERN (insn
);
8731 if (GET_CODE (body
) != SET
)
8734 lhs
= XEXP (body
, 0);
8735 rhs
= XEXP (body
, 1);
8737 lhs
= REG_OR_SUBREG_RTX (lhs
);
8739 /* If the destination is not a general purpose
8740 register we do not have to worry. */
8741 if (GET_CODE (lhs
) != REG
8742 || REGNO_REG_CLASS (REGNO (lhs
)) != GENERAL_REGS
)
8745 /* As well as loads from memory we also have to react
8746 to loads of invalid constants which will be turned
8747 into loads from the minipool. */
8748 return (GET_CODE (rhs
) == MEM
8749 || GET_CODE (rhs
) == SYMBOL_REF
8750 || note_invalid_constants (insn
, -1, false));
8753 /* Return TRUE if INSN is a Cirrus instruction. */
8755 arm_cirrus_insn_p (rtx insn
)
8757 enum attr_cirrus attr
;
8759 /* get_attr cannot accept USE or CLOBBER. */
8761 || GET_CODE (insn
) != INSN
8762 || GET_CODE (PATTERN (insn
)) == USE
8763 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
8766 attr
= get_attr_cirrus (insn
);
8768 return attr
!= CIRRUS_NOT
;
8771 /* Cirrus reorg for invalid instruction combinations. */
8773 cirrus_reorg (rtx first
)
8775 enum attr_cirrus attr
;
8776 rtx body
= PATTERN (first
);
8780 /* Any branch must be followed by 2 non Cirrus instructions. */
8781 if (GET_CODE (first
) == JUMP_INSN
&& GET_CODE (body
) != RETURN
)
8784 t
= next_nonnote_insn (first
);
8786 if (arm_cirrus_insn_p (t
))
8789 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
8793 emit_insn_after (gen_nop (), first
);
8798 /* (float (blah)) is in parallel with a clobber. */
8799 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
8800 body
= XVECEXP (body
, 0, 0);
8802 if (GET_CODE (body
) == SET
)
8804 rtx lhs
= XEXP (body
, 0), rhs
= XEXP (body
, 1);
8806 /* cfldrd, cfldr64, cfstrd, cfstr64 must
8807 be followed by a non Cirrus insn. */
8808 if (get_attr_cirrus (first
) == CIRRUS_DOUBLE
)
8810 if (arm_cirrus_insn_p (next_nonnote_insn (first
)))
8811 emit_insn_after (gen_nop (), first
);
8815 else if (arm_memory_load_p (first
))
8817 unsigned int arm_regno
;
8819 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
8820 ldr/cfmv64hr combination where the Rd field is the same
8821 in both instructions must be split with a non Cirrus
8828 /* Get Arm register number for ldr insn. */
8829 if (GET_CODE (lhs
) == REG
)
8830 arm_regno
= REGNO (lhs
);
8833 gcc_assert (GET_CODE (rhs
) == REG
);
8834 arm_regno
= REGNO (rhs
);
8838 first
= next_nonnote_insn (first
);
8840 if (! arm_cirrus_insn_p (first
))
8843 body
= PATTERN (first
);
8845 /* (float (blah)) is in parallel with a clobber. */
8846 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0))
8847 body
= XVECEXP (body
, 0, 0);
8849 if (GET_CODE (body
) == FLOAT
)
8850 body
= XEXP (body
, 0);
8852 if (get_attr_cirrus (first
) == CIRRUS_MOVE
8853 && GET_CODE (XEXP (body
, 1)) == REG
8854 && arm_regno
== REGNO (XEXP (body
, 1)))
8855 emit_insn_after (gen_nop (), first
);
8861 /* get_attr cannot accept USE or CLOBBER. */
8863 || GET_CODE (first
) != INSN
8864 || GET_CODE (PATTERN (first
)) == USE
8865 || GET_CODE (PATTERN (first
)) == CLOBBER
)
8868 attr
= get_attr_cirrus (first
);
8870 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
8871 must be followed by a non-coprocessor instruction. */
8872 if (attr
== CIRRUS_COMPARE
)
8876 t
= next_nonnote_insn (first
);
8878 if (arm_cirrus_insn_p (t
))
8881 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
8885 emit_insn_after (gen_nop (), first
);
8891 /* Return TRUE if X references a SYMBOL_REF. */
8893 symbol_mentioned_p (rtx x
)
8898 if (GET_CODE (x
) == SYMBOL_REF
)
8901 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
8902 are constant offsets, not symbols. */
8903 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8906 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8908 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8914 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
8915 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
8918 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
8925 /* Return TRUE if X references a LABEL_REF. */
8927 label_mentioned_p (rtx x
)
8932 if (GET_CODE (x
) == LABEL_REF
)
8935 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
8936 instruction, but they are constant offsets, not symbols. */
8937 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
8940 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8941 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8947 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
8948 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
8951 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
8959 tls_mentioned_p (rtx x
)
8961 switch (GET_CODE (x
))
8964 return tls_mentioned_p (XEXP (x
, 0));
8967 if (XINT (x
, 1) == UNSPEC_TLS
)
8975 /* Must not copy any rtx that uses a pc-relative address. */
8978 arm_note_pic_base (rtx
*x
, void *date ATTRIBUTE_UNUSED
)
8980 if (GET_CODE (*x
) == UNSPEC
8981 && XINT (*x
, 1) == UNSPEC_PIC_BASE
)
8987 arm_cannot_copy_insn_p (rtx insn
)
8989 return for_each_rtx (&PATTERN (insn
), arm_note_pic_base
, NULL
);
8995 enum rtx_code code
= GET_CODE (x
);
9012 /* Return 1 if memory locations are adjacent. */
9014 adjacent_mem_locations (rtx a
, rtx b
)
9016 /* We don't guarantee to preserve the order of these memory refs. */
9017 if (volatile_refs_p (a
) || volatile_refs_p (b
))
9020 if ((GET_CODE (XEXP (a
, 0)) == REG
9021 || (GET_CODE (XEXP (a
, 0)) == PLUS
9022 && GET_CODE (XEXP (XEXP (a
, 0), 1)) == CONST_INT
))
9023 && (GET_CODE (XEXP (b
, 0)) == REG
9024 || (GET_CODE (XEXP (b
, 0)) == PLUS
9025 && GET_CODE (XEXP (XEXP (b
, 0), 1)) == CONST_INT
)))
9027 HOST_WIDE_INT val0
= 0, val1
= 0;
9031 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
9033 reg0
= XEXP (XEXP (a
, 0), 0);
9034 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
9039 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
9041 reg1
= XEXP (XEXP (b
, 0), 0);
9042 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
9047 /* Don't accept any offset that will require multiple
9048 instructions to handle, since this would cause the
9049 arith_adjacentmem pattern to output an overlong sequence. */
9050 if (!const_ok_for_op (val0
, PLUS
) || !const_ok_for_op (val1
, PLUS
))
9053 /* Don't allow an eliminable register: register elimination can make
9054 the offset too large. */
9055 if (arm_eliminable_register (reg0
))
9058 val_diff
= val1
- val0
;
9062 /* If the target has load delay slots, then there's no benefit
9063 to using an ldm instruction unless the offset is zero and
9064 we are optimizing for size. */
9065 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
9066 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
9067 && (val_diff
== 4 || val_diff
== -4));
9070 return ((REGNO (reg0
) == REGNO (reg1
))
9071 && (val_diff
== 4 || val_diff
== -4));
9078 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *base
,
9079 HOST_WIDE_INT
*load_offset
)
9081 int unsorted_regs
[4];
9082 HOST_WIDE_INT unsorted_offsets
[4];
9087 /* Can only handle 2, 3, or 4 insns at present,
9088 though could be easily extended if required. */
9089 gcc_assert (nops
>= 2 && nops
<= 4);
9091 memset (order
, 0, 4 * sizeof (int));
9093 /* Loop over the operands and check that the memory references are
9094 suitable (i.e. immediate offsets from the same base register). At
9095 the same time, extract the target register, and the memory
9097 for (i
= 0; i
< nops
; i
++)
9102 /* Convert a subreg of a mem into the mem itself. */
9103 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9104 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9106 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9108 /* Don't reorder volatile memory references; it doesn't seem worth
9109 looking for the case where the order is ok anyway. */
9110 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9113 offset
= const0_rtx
;
9115 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9116 || (GET_CODE (reg
) == SUBREG
9117 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9118 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9119 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9121 || (GET_CODE (reg
) == SUBREG
9122 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9123 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9128 base_reg
= REGNO (reg
);
9129 unsorted_regs
[0] = (GET_CODE (operands
[i
]) == REG
9130 ? REGNO (operands
[i
])
9131 : REGNO (SUBREG_REG (operands
[i
])));
9136 if (base_reg
!= (int) REGNO (reg
))
9137 /* Not addressed from the same base register. */
9140 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
9141 ? REGNO (operands
[i
])
9142 : REGNO (SUBREG_REG (operands
[i
])));
9143 if (unsorted_regs
[i
] < unsorted_regs
[order
[0]])
9147 /* If it isn't an integer register, or if it overwrites the
9148 base register but isn't the last insn in the list, then
9149 we can't do this. */
9150 if (unsorted_regs
[i
] < 0 || unsorted_regs
[i
] > 14
9151 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
9154 unsorted_offsets
[i
] = INTVAL (offset
);
9157 /* Not a suitable memory address. */
9161 /* All the useful information has now been extracted from the
9162 operands into unsorted_regs and unsorted_offsets; additionally,
9163 order[0] has been set to the lowest numbered register in the
9164 list. Sort the registers into order, and check that the memory
9165 offsets are ascending and adjacent. */
9167 for (i
= 1; i
< nops
; i
++)
9171 order
[i
] = order
[i
- 1];
9172 for (j
= 0; j
< nops
; j
++)
9173 if (unsorted_regs
[j
] > unsorted_regs
[order
[i
- 1]]
9174 && (order
[i
] == order
[i
- 1]
9175 || unsorted_regs
[j
] < unsorted_regs
[order
[i
]]))
9178 /* Have we found a suitable register? if not, one must be used more
9180 if (order
[i
] == order
[i
- 1])
9183 /* Is the memory address adjacent and ascending? */
9184 if (unsorted_offsets
[order
[i
]] != unsorted_offsets
[order
[i
- 1]] + 4)
9192 for (i
= 0; i
< nops
; i
++)
9193 regs
[i
] = unsorted_regs
[order
[i
]];
9195 *load_offset
= unsorted_offsets
[order
[0]];
9198 if (unsorted_offsets
[order
[0]] == 0)
9199 return 1; /* ldmia */
9201 if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
9202 return 2; /* ldmib */
9204 if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
9205 return 3; /* ldmda */
9207 if (unsorted_offsets
[order
[nops
- 1]] == -4)
9208 return 4; /* ldmdb */
9210 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9211 if the offset isn't small enough. The reason 2 ldrs are faster
9212 is because these ARMs are able to do more than one cache access
9213 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9214 whilst the ARM8 has a double bandwidth cache. This means that
9215 these cores can do both an instruction fetch and a data fetch in
9216 a single cycle, so the trick of calculating the address into a
9217 scratch register (one of the result regs) and then doing a load
9218 multiple actually becomes slower (and no smaller in code size).
9219 That is the transformation
9221 ldr rd1, [rbase + offset]
9222 ldr rd2, [rbase + offset + 4]
9226 add rd1, rbase, offset
9227 ldmia rd1, {rd1, rd2}
9229 produces worse code -- '3 cycles + any stalls on rd2' instead of
9230 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9231 access per cycle, the first sequence could never complete in less
9232 than 6 cycles, whereas the ldm sequence would only take 5 and
9233 would make better use of sequential accesses if not hitting the
9236 We cheat here and test 'arm_ld_sched' which we currently know to
9237 only be true for the ARM8, ARM9 and StrongARM. If this ever
9238 changes, then the test below needs to be reworked. */
9239 if (nops
== 2 && arm_ld_sched
)
9242 /* Can't do it without setting up the offset, only do this if it takes
9243 no more than one insn. */
9244 return (const_ok_for_arm (unsorted_offsets
[order
[0]])
9245 || const_ok_for_arm (-unsorted_offsets
[order
[0]])) ? 5 : 0;
9249 emit_ldm_seq (rtx
*operands
, int nops
)
9253 HOST_WIDE_INT offset
;
9257 switch (load_multiple_sequence (operands
, nops
, regs
, &base_reg
, &offset
))
9260 strcpy (buf
, "ldm%(ia%)\t");
9264 strcpy (buf
, "ldm%(ib%)\t");
9268 strcpy (buf
, "ldm%(da%)\t");
9272 strcpy (buf
, "ldm%(db%)\t");
9277 sprintf (buf
, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX
,
9278 reg_names
[regs
[0]], REGISTER_PREFIX
, reg_names
[base_reg
],
9281 sprintf (buf
, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX
,
9282 reg_names
[regs
[0]], REGISTER_PREFIX
, reg_names
[base_reg
],
9284 output_asm_insn (buf
, operands
);
9286 strcpy (buf
, "ldm%(ia%)\t");
9293 sprintf (buf
+ strlen (buf
), "%s%s, {%s%s", REGISTER_PREFIX
,
9294 reg_names
[base_reg
], REGISTER_PREFIX
, reg_names
[regs
[0]]);
9296 for (i
= 1; i
< nops
; i
++)
9297 sprintf (buf
+ strlen (buf
), ", %s%s", REGISTER_PREFIX
,
9298 reg_names
[regs
[i
]]);
9300 strcat (buf
, "}\t%@ phole ldm");
9302 output_asm_insn (buf
, operands
);
9307 store_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *base
,
9308 HOST_WIDE_INT
* load_offset
)
9310 int unsorted_regs
[4];
9311 HOST_WIDE_INT unsorted_offsets
[4];
9316 /* Can only handle 2, 3, or 4 insns at present, though could be easily
9317 extended if required. */
9318 gcc_assert (nops
>= 2 && nops
<= 4);
9320 memset (order
, 0, 4 * sizeof (int));
9322 /* Loop over the operands and check that the memory references are
9323 suitable (i.e. immediate offsets from the same base register). At
9324 the same time, extract the target register, and the memory
9326 for (i
= 0; i
< nops
; i
++)
9331 /* Convert a subreg of a mem into the mem itself. */
9332 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
9333 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
9335 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
9337 /* Don't reorder volatile memory references; it doesn't seem worth
9338 looking for the case where the order is ok anyway. */
9339 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
9342 offset
= const0_rtx
;
9344 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
9345 || (GET_CODE (reg
) == SUBREG
9346 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9347 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
9348 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
9350 || (GET_CODE (reg
) == SUBREG
9351 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
9352 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
9357 base_reg
= REGNO (reg
);
9358 unsorted_regs
[0] = (GET_CODE (operands
[i
]) == REG
9359 ? REGNO (operands
[i
])
9360 : REGNO (SUBREG_REG (operands
[i
])));
9365 if (base_reg
!= (int) REGNO (reg
))
9366 /* Not addressed from the same base register. */
9369 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
9370 ? REGNO (operands
[i
])
9371 : REGNO (SUBREG_REG (operands
[i
])));
9372 if (unsorted_regs
[i
] < unsorted_regs
[order
[0]])
9376 /* If it isn't an integer register, then we can't do this. */
9377 if (unsorted_regs
[i
] < 0 || unsorted_regs
[i
] > 14)
9380 unsorted_offsets
[i
] = INTVAL (offset
);
9383 /* Not a suitable memory address. */
9387 /* All the useful information has now been extracted from the
9388 operands into unsorted_regs and unsorted_offsets; additionally,
9389 order[0] has been set to the lowest numbered register in the
9390 list. Sort the registers into order, and check that the memory
9391 offsets are ascending and adjacent. */
9393 for (i
= 1; i
< nops
; i
++)
9397 order
[i
] = order
[i
- 1];
9398 for (j
= 0; j
< nops
; j
++)
9399 if (unsorted_regs
[j
] > unsorted_regs
[order
[i
- 1]]
9400 && (order
[i
] == order
[i
- 1]
9401 || unsorted_regs
[j
] < unsorted_regs
[order
[i
]]))
9404 /* Have we found a suitable register? if not, one must be used more
9406 if (order
[i
] == order
[i
- 1])
9409 /* Is the memory address adjacent and ascending? */
9410 if (unsorted_offsets
[order
[i
]] != unsorted_offsets
[order
[i
- 1]] + 4)
9418 for (i
= 0; i
< nops
; i
++)
9419 regs
[i
] = unsorted_regs
[order
[i
]];
9421 *load_offset
= unsorted_offsets
[order
[0]];
9424 if (unsorted_offsets
[order
[0]] == 0)
9425 return 1; /* stmia */
9427 if (unsorted_offsets
[order
[0]] == 4)
9428 return 2; /* stmib */
9430 if (unsorted_offsets
[order
[nops
- 1]] == 0)
9431 return 3; /* stmda */
9433 if (unsorted_offsets
[order
[nops
- 1]] == -4)
9434 return 4; /* stmdb */
9440 emit_stm_seq (rtx
*operands
, int nops
)
9444 HOST_WIDE_INT offset
;
9448 switch (store_multiple_sequence (operands
, nops
, regs
, &base_reg
, &offset
))
9451 strcpy (buf
, "stm%(ia%)\t");
9455 strcpy (buf
, "stm%(ib%)\t");
9459 strcpy (buf
, "stm%(da%)\t");
9463 strcpy (buf
, "stm%(db%)\t");
9470 sprintf (buf
+ strlen (buf
), "%s%s, {%s%s", REGISTER_PREFIX
,
9471 reg_names
[base_reg
], REGISTER_PREFIX
, reg_names
[regs
[0]]);
9473 for (i
= 1; i
< nops
; i
++)
9474 sprintf (buf
+ strlen (buf
), ", %s%s", REGISTER_PREFIX
,
9475 reg_names
[regs
[i
]]);
9477 strcat (buf
, "}\t%@ phole stm");
9479 output_asm_insn (buf
, operands
);
9483 /* Routines for use in generating RTL. */
9486 arm_gen_load_multiple (int base_regno
, int count
, rtx from
, int up
,
9487 int write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
9489 HOST_WIDE_INT offset
= *offsetp
;
9492 int sign
= up
? 1 : -1;
9495 /* XScale has load-store double instructions, but they have stricter
9496 alignment requirements than load-store multiple, so we cannot
9499 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9500 the pipeline until completion.
9508 An ldr instruction takes 1-3 cycles, but does not block the
9517 Best case ldr will always win. However, the more ldr instructions
9518 we issue, the less likely we are to be able to schedule them well.
9519 Using ldr instructions also increases code size.
9521 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9522 for counts of 3 or 4 regs. */
9523 if (arm_tune_xscale
&& count
<= 2 && ! optimize_size
)
9529 for (i
= 0; i
< count
; i
++)
9531 addr
= plus_constant (from
, i
* 4 * sign
);
9532 mem
= adjust_automodify_address (basemem
, SImode
, addr
, offset
);
9533 emit_move_insn (gen_rtx_REG (SImode
, base_regno
+ i
), mem
);
9539 emit_move_insn (from
, plus_constant (from
, count
* 4 * sign
));
9549 result
= gen_rtx_PARALLEL (VOIDmode
,
9550 rtvec_alloc (count
+ (write_back
? 1 : 0)));
9553 XVECEXP (result
, 0, 0)
9554 = gen_rtx_SET (VOIDmode
, from
, plus_constant (from
, count
* 4 * sign
));
9559 for (j
= 0; i
< count
; i
++, j
++)
9561 addr
= plus_constant (from
, j
* 4 * sign
);
9562 mem
= adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
9563 XVECEXP (result
, 0, i
)
9564 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, base_regno
+ j
), mem
);
9575 arm_gen_store_multiple (int base_regno
, int count
, rtx to
, int up
,
9576 int write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
9578 HOST_WIDE_INT offset
= *offsetp
;
9581 int sign
= up
? 1 : -1;
9584 /* See arm_gen_load_multiple for discussion of
9585 the pros/cons of ldm/stm usage for XScale. */
9586 if (arm_tune_xscale
&& count
<= 2 && ! optimize_size
)
9592 for (i
= 0; i
< count
; i
++)
9594 addr
= plus_constant (to
, i
* 4 * sign
);
9595 mem
= adjust_automodify_address (basemem
, SImode
, addr
, offset
);
9596 emit_move_insn (mem
, gen_rtx_REG (SImode
, base_regno
+ i
));
9602 emit_move_insn (to
, plus_constant (to
, count
* 4 * sign
));
9612 result
= gen_rtx_PARALLEL (VOIDmode
,
9613 rtvec_alloc (count
+ (write_back
? 1 : 0)));
9616 XVECEXP (result
, 0, 0)
9617 = gen_rtx_SET (VOIDmode
, to
,
9618 plus_constant (to
, count
* 4 * sign
));
9623 for (j
= 0; i
< count
; i
++, j
++)
9625 addr
= plus_constant (to
, j
* 4 * sign
);
9626 mem
= adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
9627 XVECEXP (result
, 0, i
)
9628 = gen_rtx_SET (VOIDmode
, mem
, gen_rtx_REG (SImode
, base_regno
+ j
));
9639 arm_gen_movmemqi (rtx
*operands
)
9641 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
9642 HOST_WIDE_INT srcoffset
, dstoffset
;
9644 rtx src
, dst
, srcbase
, dstbase
;
9645 rtx part_bytes_reg
= NULL
;
9648 if (GET_CODE (operands
[2]) != CONST_INT
9649 || GET_CODE (operands
[3]) != CONST_INT
9650 || INTVAL (operands
[2]) > 64
9651 || INTVAL (operands
[3]) & 3)
9654 dstbase
= operands
[0];
9655 srcbase
= operands
[1];
9657 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
9658 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
9660 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
9661 out_words_to_go
= INTVAL (operands
[2]) / 4;
9662 last_bytes
= INTVAL (operands
[2]) & 3;
9663 dstoffset
= srcoffset
= 0;
9665 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
9666 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
9668 for (i
= 0; in_words_to_go
>= 2; i
+=4)
9670 if (in_words_to_go
> 4)
9671 emit_insn (arm_gen_load_multiple (0, 4, src
, TRUE
, TRUE
,
9672 srcbase
, &srcoffset
));
9674 emit_insn (arm_gen_load_multiple (0, in_words_to_go
, src
, TRUE
,
9675 FALSE
, srcbase
, &srcoffset
));
9677 if (out_words_to_go
)
9679 if (out_words_to_go
> 4)
9680 emit_insn (arm_gen_store_multiple (0, 4, dst
, TRUE
, TRUE
,
9681 dstbase
, &dstoffset
));
9682 else if (out_words_to_go
!= 1)
9683 emit_insn (arm_gen_store_multiple (0, out_words_to_go
,
9687 dstbase
, &dstoffset
));
9690 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
9691 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
9692 if (last_bytes
!= 0)
9694 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
9700 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
9701 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
9704 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
9705 if (out_words_to_go
)
9709 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
9710 sreg
= copy_to_reg (mem
);
9712 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
9713 emit_move_insn (mem
, sreg
);
9716 gcc_assert (!in_words_to_go
); /* Sanity check */
9721 gcc_assert (in_words_to_go
> 0);
9723 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
9724 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
9727 gcc_assert (!last_bytes
|| part_bytes_reg
);
9729 if (BYTES_BIG_ENDIAN
&& last_bytes
)
9731 rtx tmp
= gen_reg_rtx (SImode
);
9733 /* The bytes we want are in the top end of the word. */
9734 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
9735 GEN_INT (8 * (4 - last_bytes
))));
9736 part_bytes_reg
= tmp
;
9740 mem
= adjust_automodify_address (dstbase
, QImode
,
9741 plus_constant (dst
, last_bytes
- 1),
9742 dstoffset
+ last_bytes
- 1);
9743 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
9747 tmp
= gen_reg_rtx (SImode
);
9748 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
9749 part_bytes_reg
= tmp
;
9758 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
9759 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
9763 rtx tmp
= gen_reg_rtx (SImode
);
9764 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
9765 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
9766 part_bytes_reg
= tmp
;
9773 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
9774 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
9781 /* Select a dominance comparison mode if possible for a test of the general
9782 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
9783 COND_OR == DOM_CC_X_AND_Y => (X && Y)
9784 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
9785 COND_OR == DOM_CC_X_OR_Y => (X || Y)
9786 In all cases OP will be either EQ or NE, but we don't need to know which
9787 here. If we are unable to support a dominance comparison we return
9788 CC mode. This will then fail to match for the RTL expressions that
9789 generate this call. */
9791 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
9793 enum rtx_code cond1
, cond2
;
9796 /* Currently we will probably get the wrong result if the individual
9797 comparisons are not simple. This also ensures that it is safe to
9798 reverse a comparison if necessary. */
9799 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
9801 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
9805 /* The if_then_else variant of this tests the second condition if the
9806 first passes, but is true if the first fails. Reverse the first
9807 condition to get a true "inclusive-or" expression. */
9808 if (cond_or
== DOM_CC_NX_OR_Y
)
9809 cond1
= reverse_condition (cond1
);
9811 /* If the comparisons are not equal, and one doesn't dominate the other,
9812 then we can't do this. */
9814 && !comparison_dominates_p (cond1
, cond2
)
9815 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
9820 enum rtx_code temp
= cond1
;
9828 if (cond_or
== DOM_CC_X_AND_Y
)
9833 case EQ
: return CC_DEQmode
;
9834 case LE
: return CC_DLEmode
;
9835 case LEU
: return CC_DLEUmode
;
9836 case GE
: return CC_DGEmode
;
9837 case GEU
: return CC_DGEUmode
;
9838 default: gcc_unreachable ();
9842 if (cond_or
== DOM_CC_X_AND_Y
)
9858 if (cond_or
== DOM_CC_X_AND_Y
)
9874 if (cond_or
== DOM_CC_X_AND_Y
)
9890 if (cond_or
== DOM_CC_X_AND_Y
)
9905 /* The remaining cases only occur when both comparisons are the
9908 gcc_assert (cond1
== cond2
);
9912 gcc_assert (cond1
== cond2
);
9916 gcc_assert (cond1
== cond2
);
9920 gcc_assert (cond1
== cond2
);
9924 gcc_assert (cond1
== cond2
);
9933 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
9935 /* All floating point compares return CCFP if it is an equality
9936 comparison, and CCFPE otherwise. */
9937 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
9957 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
9966 /* A compare with a shifted operand. Because of canonicalization, the
9967 comparison will have to be swapped when we emit the assembler. */
9968 if (GET_MODE (y
) == SImode
9969 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
9970 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
9971 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
9972 || GET_CODE (x
) == ROTATERT
))
9975 /* This operation is performed swapped, but since we only rely on the Z
9976 flag we don't need an additional mode. */
9977 if (GET_MODE (y
) == SImode
9978 && (REG_P (y
) || (GET_CODE (y
) == SUBREG
))
9979 && GET_CODE (x
) == NEG
9980 && (op
== EQ
|| op
== NE
))
9983 /* This is a special case that is used by combine to allow a
9984 comparison of a shifted byte load to be split into a zero-extend
9985 followed by a comparison of the shifted integer (only valid for
9986 equalities and unsigned inequalities). */
9987 if (GET_MODE (x
) == SImode
9988 && GET_CODE (x
) == ASHIFT
9989 && GET_CODE (XEXP (x
, 1)) == CONST_INT
&& INTVAL (XEXP (x
, 1)) == 24
9990 && GET_CODE (XEXP (x
, 0)) == SUBREG
9991 && GET_CODE (SUBREG_REG (XEXP (x
, 0))) == MEM
9992 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
9993 && (op
== EQ
|| op
== NE
9994 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
9995 && GET_CODE (y
) == CONST_INT
)
9998 /* A construct for a conditional compare, if the false arm contains
9999 0, then both conditions must be true, otherwise either condition
10000 must be true. Not all conditions are possible, so CCmode is
10001 returned if it can't be done. */
10002 if (GET_CODE (x
) == IF_THEN_ELSE
10003 && (XEXP (x
, 2) == const0_rtx
10004 || XEXP (x
, 2) == const1_rtx
)
10005 && COMPARISON_P (XEXP (x
, 0))
10006 && COMPARISON_P (XEXP (x
, 1)))
10007 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10008 INTVAL (XEXP (x
, 2)));
10010 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10011 if (GET_CODE (x
) == AND
10012 && COMPARISON_P (XEXP (x
, 0))
10013 && COMPARISON_P (XEXP (x
, 1)))
10014 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10017 if (GET_CODE (x
) == IOR
10018 && COMPARISON_P (XEXP (x
, 0))
10019 && COMPARISON_P (XEXP (x
, 1)))
10020 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
10023 /* An operation (on Thumb) where we want to test for a single bit.
10024 This is done by shifting that bit up into the top bit of a
10025 scratch register; we can then branch on the sign bit. */
10027 && GET_MODE (x
) == SImode
10028 && (op
== EQ
|| op
== NE
)
10029 && GET_CODE (x
) == ZERO_EXTRACT
10030 && XEXP (x
, 1) == const1_rtx
)
10033 /* An operation that sets the condition codes as a side-effect, the
10034 V flag is not set correctly, so we can only use comparisons where
10035 this doesn't matter. (For LT and GE we can use "mi" and "pl"
10037 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
10038 if (GET_MODE (x
) == SImode
10040 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
10041 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
10042 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
10043 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
10044 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
10045 || GET_CODE (x
) == LSHIFTRT
10046 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
10047 || GET_CODE (x
) == ROTATERT
10048 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
10049 return CC_NOOVmode
;
10051 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
10054 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
10055 && GET_CODE (x
) == PLUS
10056 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
10062 /* X and Y are two things to compare using CODE. Emit the compare insn and
10063 return the rtx for register 0 in the proper mode. FP means this is a
10064 floating point compare: I don't think that it is needed on the arm. */
10066 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
)
10068 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
10069 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
10071 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
10076 /* Generate a sequence of insns that will generate the correct return
10077 address mask depending on the physical architecture that the program
10080 arm_gen_return_addr_mask (void)
10082 rtx reg
= gen_reg_rtx (Pmode
);
10084 emit_insn (gen_return_addr_mask (reg
));
10089 arm_reload_in_hi (rtx
*operands
)
10091 rtx ref
= operands
[1];
10093 HOST_WIDE_INT offset
= 0;
10095 if (GET_CODE (ref
) == SUBREG
)
10097 offset
= SUBREG_BYTE (ref
);
10098 ref
= SUBREG_REG (ref
);
10101 if (GET_CODE (ref
) == REG
)
10103 /* We have a pseudo which has been spilt onto the stack; there
10104 are two cases here: the first where there is a simple
10105 stack-slot replacement and a second where the stack-slot is
10106 out of range, or is used as a subreg. */
10107 if (reg_equiv_mem
[REGNO (ref
)])
10109 ref
= reg_equiv_mem
[REGNO (ref
)];
10110 base
= find_replacement (&XEXP (ref
, 0));
10113 /* The slot is out of range, or was dressed up in a SUBREG. */
10114 base
= reg_equiv_address
[REGNO (ref
)];
10117 base
= find_replacement (&XEXP (ref
, 0));
10119 /* Handle the case where the address is too complex to be offset by 1. */
10120 if (GET_CODE (base
) == MINUS
10121 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10123 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10125 emit_set_insn (base_plus
, base
);
10128 else if (GET_CODE (base
) == PLUS
)
10130 /* The addend must be CONST_INT, or we would have dealt with it above. */
10131 HOST_WIDE_INT hi
, lo
;
10133 offset
+= INTVAL (XEXP (base
, 1));
10134 base
= XEXP (base
, 0);
10136 /* Rework the address into a legal sequence of insns. */
10137 /* Valid range for lo is -4095 -> 4095 */
10140 : -((-offset
) & 0xfff));
10142 /* Corner case, if lo is the max offset then we would be out of range
10143 once we have added the additional 1 below, so bump the msb into the
10144 pre-loading insn(s). */
10148 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
10149 ^ (HOST_WIDE_INT
) 0x80000000)
10150 - (HOST_WIDE_INT
) 0x80000000);
10152 gcc_assert (hi
+ lo
== offset
);
10156 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10158 /* Get the base address; addsi3 knows how to handle constants
10159 that require more than one insn. */
10160 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
10166 /* Operands[2] may overlap operands[0] (though it won't overlap
10167 operands[1]), that's why we asked for a DImode reg -- so we can
10168 use the bit that does not overlap. */
10169 if (REGNO (operands
[2]) == REGNO (operands
[0]))
10170 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10172 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10174 emit_insn (gen_zero_extendqisi2 (scratch
,
10175 gen_rtx_MEM (QImode
,
10176 plus_constant (base
,
10178 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10179 gen_rtx_MEM (QImode
,
10180 plus_constant (base
,
10182 if (!BYTES_BIG_ENDIAN
)
10183 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10184 gen_rtx_IOR (SImode
,
10187 gen_rtx_SUBREG (SImode
, operands
[0], 0),
10191 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
10192 gen_rtx_IOR (SImode
,
10193 gen_rtx_ASHIFT (SImode
, scratch
,
10195 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
10198 /* Handle storing a half-word to memory during reload by synthesizing as two
10199 byte stores. Take care not to clobber the input values until after we
10200 have moved them somewhere safe. This code assumes that if the DImode
10201 scratch in operands[2] overlaps either the input value or output address
10202 in some way, then that value must die in this insn (we absolutely need
10203 two scratch registers for some corner cases). */
10205 arm_reload_out_hi (rtx
*operands
)
10207 rtx ref
= operands
[0];
10208 rtx outval
= operands
[1];
10210 HOST_WIDE_INT offset
= 0;
10212 if (GET_CODE (ref
) == SUBREG
)
10214 offset
= SUBREG_BYTE (ref
);
10215 ref
= SUBREG_REG (ref
);
10218 if (GET_CODE (ref
) == REG
)
10220 /* We have a pseudo which has been spilt onto the stack; there
10221 are two cases here: the first where there is a simple
10222 stack-slot replacement and a second where the stack-slot is
10223 out of range, or is used as a subreg. */
10224 if (reg_equiv_mem
[REGNO (ref
)])
10226 ref
= reg_equiv_mem
[REGNO (ref
)];
10227 base
= find_replacement (&XEXP (ref
, 0));
10230 /* The slot is out of range, or was dressed up in a SUBREG. */
10231 base
= reg_equiv_address
[REGNO (ref
)];
10234 base
= find_replacement (&XEXP (ref
, 0));
10236 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
10238 /* Handle the case where the address is too complex to be offset by 1. */
10239 if (GET_CODE (base
) == MINUS
10240 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
10242 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10244 /* Be careful not to destroy OUTVAL. */
10245 if (reg_overlap_mentioned_p (base_plus
, outval
))
10247 /* Updating base_plus might destroy outval, see if we can
10248 swap the scratch and base_plus. */
10249 if (!reg_overlap_mentioned_p (scratch
, outval
))
10252 scratch
= base_plus
;
10257 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
10259 /* Be conservative and copy OUTVAL into the scratch now,
10260 this should only be necessary if outval is a subreg
10261 of something larger than a word. */
10262 /* XXX Might this clobber base? I can't see how it can,
10263 since scratch is known to overlap with OUTVAL, and
10264 must be wider than a word. */
10265 emit_insn (gen_movhi (scratch_hi
, outval
));
10266 outval
= scratch_hi
;
10270 emit_set_insn (base_plus
, base
);
10273 else if (GET_CODE (base
) == PLUS
)
10275 /* The addend must be CONST_INT, or we would have dealt with it above. */
10276 HOST_WIDE_INT hi
, lo
;
10278 offset
+= INTVAL (XEXP (base
, 1));
10279 base
= XEXP (base
, 0);
10281 /* Rework the address into a legal sequence of insns. */
10282 /* Valid range for lo is -4095 -> 4095 */
10285 : -((-offset
) & 0xfff));
10287 /* Corner case, if lo is the max offset then we would be out of range
10288 once we have added the additional 1 below, so bump the msb into the
10289 pre-loading insn(s). */
10293 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
10294 ^ (HOST_WIDE_INT
) 0x80000000)
10295 - (HOST_WIDE_INT
) 0x80000000);
10297 gcc_assert (hi
+ lo
== offset
);
10301 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
10303 /* Be careful not to destroy OUTVAL. */
10304 if (reg_overlap_mentioned_p (base_plus
, outval
))
10306 /* Updating base_plus might destroy outval, see if we
10307 can swap the scratch and base_plus. */
10308 if (!reg_overlap_mentioned_p (scratch
, outval
))
10311 scratch
= base_plus
;
10316 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
10318 /* Be conservative and copy outval into scratch now,
10319 this should only be necessary if outval is a
10320 subreg of something larger than a word. */
10321 /* XXX Might this clobber base? I can't see how it
10322 can, since scratch is known to overlap with
10324 emit_insn (gen_movhi (scratch_hi
, outval
));
10325 outval
= scratch_hi
;
10329 /* Get the base address; addsi3 knows how to handle constants
10330 that require more than one insn. */
10331 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
10337 if (BYTES_BIG_ENDIAN
)
10339 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
10340 plus_constant (base
, offset
+ 1)),
10341 gen_lowpart (QImode
, outval
)));
10342 emit_insn (gen_lshrsi3 (scratch
,
10343 gen_rtx_SUBREG (SImode
, outval
, 0),
10345 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
10346 gen_lowpart (QImode
, scratch
)));
10350 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
10351 gen_lowpart (QImode
, outval
)));
10352 emit_insn (gen_lshrsi3 (scratch
,
10353 gen_rtx_SUBREG (SImode
, outval
, 0),
10355 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
10356 plus_constant (base
, offset
+ 1)),
10357 gen_lowpart (QImode
, scratch
)));
10361 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
10362 (padded to the size of a word) should be passed in a register. */
10365 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
10367 if (TARGET_AAPCS_BASED
)
10368 return must_pass_in_stack_var_size (mode
, type
);
10370 return must_pass_in_stack_var_size_or_pad (mode
, type
);
10374 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
10375 Return true if an argument passed on the stack should be padded upwards,
10376 i.e. if the least-significant byte has useful data.
10377 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
10378 aggregate types are placed in the lowest memory address. */
10381 arm_pad_arg_upward (enum machine_mode mode
, const_tree type
)
10383 if (!TARGET_AAPCS_BASED
)
10384 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
10386 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
10393 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
10394 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
10395 byte of the register has useful data, and return the opposite if the
10396 most significant byte does.
10397 For AAPCS, small aggregates and small complex types are always padded
10401 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
,
10402 tree type
, int first ATTRIBUTE_UNUSED
)
10404 if (TARGET_AAPCS_BASED
10405 && BYTES_BIG_ENDIAN
10406 && (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
)
10407 && int_size_in_bytes (type
) <= 4)
10410 /* Otherwise, use default padding. */
10411 return !BYTES_BIG_ENDIAN
;
10415 /* Print a symbolic form of X to the debug file, F. */
10417 arm_print_value (FILE *f
, rtx x
)
10419 switch (GET_CODE (x
))
10422 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
10426 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
10434 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
10436 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
10437 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
10445 fprintf (f
, "\"%s\"", XSTR (x
, 0));
10449 fprintf (f
, "`%s'", XSTR (x
, 0));
10453 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
10457 arm_print_value (f
, XEXP (x
, 0));
10461 arm_print_value (f
, XEXP (x
, 0));
10463 arm_print_value (f
, XEXP (x
, 1));
10471 fprintf (f
, "????");
10476 /* Routines for manipulation of the constant pool. */
10478 /* Arm instructions cannot load a large constant directly into a
10479 register; they have to come from a pc relative load. The constant
10480 must therefore be placed in the addressable range of the pc
10481 relative load. Depending on the precise pc relative load
10482 instruction the range is somewhere between 256 bytes and 4k. This
10483 means that we often have to dump a constant inside a function, and
10484 generate code to branch around it.
10486 It is important to minimize this, since the branches will slow
10487 things down and make the code larger.
10489 Normally we can hide the table after an existing unconditional
10490 branch so that there is no interruption of the flow, but in the
10491 worst case the code looks like this:
10509 We fix this by performing a scan after scheduling, which notices
10510 which instructions need to have their operands fetched from the
10511 constant table and builds the table.
10513 The algorithm starts by building a table of all the constants that
10514 need fixing up and all the natural barriers in the function (places
10515 where a constant table can be dropped without breaking the flow).
10516 For each fixup we note how far the pc-relative replacement will be
10517 able to reach and the offset of the instruction into the function.
10519 Having built the table we then group the fixes together to form
10520 tables that are as large as possible (subject to addressing
10521 constraints) and emit each table of constants after the last
10522 barrier that is within range of all the instructions in the group.
10523 If a group does not contain a barrier, then we forcibly create one
10524 by inserting a jump instruction into the flow. Once the table has
10525 been inserted, the insns are then modified to reference the
10526 relevant entry in the pool.
10528 Possible enhancements to the algorithm (not implemented) are:
10530 1) For some processors and object formats, there may be benefit in
10531 aligning the pools to the start of cache lines; this alignment
10532 would need to be taken into account when calculating addressability
10535 /* These typedefs are located at the start of this file, so that
10536 they can be used in the prototypes there. This comment is to
10537 remind readers of that fact so that the following structures
10538 can be understood more easily.
10540 typedef struct minipool_node Mnode;
10541 typedef struct minipool_fixup Mfix; */
10543 struct minipool_node
10545 /* Doubly linked chain of entries. */
10548 /* The maximum offset into the code that this entry can be placed. While
10549 pushing fixes for forward references, all entries are sorted in order
10550 of increasing max_address. */
10551 HOST_WIDE_INT max_address
;
10552 /* Similarly for an entry inserted for a backwards ref. */
10553 HOST_WIDE_INT min_address
;
10554 /* The number of fixes referencing this entry. This can become zero
10555 if we "unpush" an entry. In this case we ignore the entry when we
10556 come to emit the code. */
10558 /* The offset from the start of the minipool. */
10559 HOST_WIDE_INT offset
;
10560 /* The value in table. */
10562 /* The mode of value. */
10563 enum machine_mode mode
;
10564 /* The size of the value. With iWMMXt enabled
10565 sizes > 4 also imply an alignment of 8-bytes. */
10569 struct minipool_fixup
10573 HOST_WIDE_INT address
;
10575 enum machine_mode mode
;
10579 HOST_WIDE_INT forwards
;
10580 HOST_WIDE_INT backwards
;
10583 /* Fixes less than a word need padding out to a word boundary. */
10584 #define MINIPOOL_FIX_SIZE(mode) \
10585 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
10587 static Mnode
* minipool_vector_head
;
10588 static Mnode
* minipool_vector_tail
;
10589 static rtx minipool_vector_label
;
10590 static int minipool_pad
;
10592 /* The linked list of all minipool fixes required for this function. */
10593 Mfix
* minipool_fix_head
;
10594 Mfix
* minipool_fix_tail
;
10595 /* The fix entry for the current minipool, once it has been placed. */
10596 Mfix
* minipool_barrier
;
10598 /* Determines if INSN is the start of a jump table. Returns the end
10599 of the TABLE or NULL_RTX. */
10601 is_jump_table (rtx insn
)
10605 if (GET_CODE (insn
) == JUMP_INSN
10606 && JUMP_LABEL (insn
) != NULL
10607 && ((table
= next_real_insn (JUMP_LABEL (insn
)))
10608 == next_real_insn (insn
))
10610 && GET_CODE (table
) == JUMP_INSN
10611 && (GET_CODE (PATTERN (table
)) == ADDR_VEC
10612 || GET_CODE (PATTERN (table
)) == ADDR_DIFF_VEC
))
10618 #ifndef JUMP_TABLES_IN_TEXT_SECTION
10619 #define JUMP_TABLES_IN_TEXT_SECTION 0
10622 static HOST_WIDE_INT
10623 get_jump_table_size (rtx insn
)
10625 /* ADDR_VECs only take room if read-only data does into the text
10627 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
10629 rtx body
= PATTERN (insn
);
10630 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
10631 HOST_WIDE_INT size
;
10632 HOST_WIDE_INT modesize
;
10634 modesize
= GET_MODE_SIZE (GET_MODE (body
));
10635 size
= modesize
* XVECLEN (body
, elt
);
10639 /* Round up size of TBB table to a halfword boundary. */
10640 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
10643 /* No padding necessary for TBH. */
10646 /* Add two bytes for alignment on Thumb. */
10651 gcc_unreachable ();
10659 /* Move a minipool fix MP from its current location to before MAX_MP.
10660 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
10661 constraints may need updating. */
10663 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
10664 HOST_WIDE_INT max_address
)
10666 /* The code below assumes these are different. */
10667 gcc_assert (mp
!= max_mp
);
10669 if (max_mp
== NULL
)
10671 if (max_address
< mp
->max_address
)
10672 mp
->max_address
= max_address
;
10676 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
10677 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
10679 mp
->max_address
= max_address
;
10681 /* Unlink MP from its current position. Since max_mp is non-null,
10682 mp->prev must be non-null. */
10683 mp
->prev
->next
= mp
->next
;
10684 if (mp
->next
!= NULL
)
10685 mp
->next
->prev
= mp
->prev
;
10687 minipool_vector_tail
= mp
->prev
;
10689 /* Re-insert it before MAX_MP. */
10691 mp
->prev
= max_mp
->prev
;
10694 if (mp
->prev
!= NULL
)
10695 mp
->prev
->next
= mp
;
10697 minipool_vector_head
= mp
;
10700 /* Save the new entry. */
10703 /* Scan over the preceding entries and adjust their addresses as
10705 while (mp
->prev
!= NULL
10706 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
10708 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
10715 /* Add a constant to the minipool for a forward reference. Returns the
10716 node added or NULL if the constant will not fit in this pool. */
10718 add_minipool_forward_ref (Mfix
*fix
)
10720 /* If set, max_mp is the first pool_entry that has a lower
10721 constraint than the one we are trying to add. */
10722 Mnode
* max_mp
= NULL
;
10723 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
10726 /* If the minipool starts before the end of FIX->INSN then this FIX
10727 can not be placed into the current pool. Furthermore, adding the
10728 new constant pool entry may cause the pool to start FIX_SIZE bytes
10730 if (minipool_vector_head
&&
10731 (fix
->address
+ get_attr_length (fix
->insn
)
10732 >= minipool_vector_head
->max_address
- fix
->fix_size
))
10735 /* Scan the pool to see if a constant with the same value has
10736 already been added. While we are doing this, also note the
10737 location where we must insert the constant if it doesn't already
10739 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
10741 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
10742 && fix
->mode
== mp
->mode
10743 && (GET_CODE (fix
->value
) != CODE_LABEL
10744 || (CODE_LABEL_NUMBER (fix
->value
)
10745 == CODE_LABEL_NUMBER (mp
->value
)))
10746 && rtx_equal_p (fix
->value
, mp
->value
))
10748 /* More than one fix references this entry. */
10750 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
10753 /* Note the insertion point if necessary. */
10755 && mp
->max_address
> max_address
)
10758 /* If we are inserting an 8-bytes aligned quantity and
10759 we have not already found an insertion point, then
10760 make sure that all such 8-byte aligned quantities are
10761 placed at the start of the pool. */
10762 if (ARM_DOUBLEWORD_ALIGN
10764 && fix
->fix_size
>= 8
10765 && mp
->fix_size
< 8)
10768 max_address
= mp
->max_address
;
10772 /* The value is not currently in the minipool, so we need to create
10773 a new entry for it. If MAX_MP is NULL, the entry will be put on
10774 the end of the list since the placement is less constrained than
10775 any existing entry. Otherwise, we insert the new fix before
10776 MAX_MP and, if necessary, adjust the constraints on the other
10779 mp
->fix_size
= fix
->fix_size
;
10780 mp
->mode
= fix
->mode
;
10781 mp
->value
= fix
->value
;
10783 /* Not yet required for a backwards ref. */
10784 mp
->min_address
= -65536;
10786 if (max_mp
== NULL
)
10788 mp
->max_address
= max_address
;
10790 mp
->prev
= minipool_vector_tail
;
10792 if (mp
->prev
== NULL
)
10794 minipool_vector_head
= mp
;
10795 minipool_vector_label
= gen_label_rtx ();
10798 mp
->prev
->next
= mp
;
10800 minipool_vector_tail
= mp
;
10804 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
10805 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
10807 mp
->max_address
= max_address
;
10810 mp
->prev
= max_mp
->prev
;
10812 if (mp
->prev
!= NULL
)
10813 mp
->prev
->next
= mp
;
10815 minipool_vector_head
= mp
;
10818 /* Save the new entry. */
10821 /* Scan over the preceding entries and adjust their addresses as
10823 while (mp
->prev
!= NULL
10824 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
10826 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
10834 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
10835 HOST_WIDE_INT min_address
)
10837 HOST_WIDE_INT offset
;
10839 /* The code below assumes these are different. */
10840 gcc_assert (mp
!= min_mp
);
10842 if (min_mp
== NULL
)
10844 if (min_address
> mp
->min_address
)
10845 mp
->min_address
= min_address
;
10849 /* We will adjust this below if it is too loose. */
10850 mp
->min_address
= min_address
;
10852 /* Unlink MP from its current position. Since min_mp is non-null,
10853 mp->next must be non-null. */
10854 mp
->next
->prev
= mp
->prev
;
10855 if (mp
->prev
!= NULL
)
10856 mp
->prev
->next
= mp
->next
;
10858 minipool_vector_head
= mp
->next
;
10860 /* Reinsert it after MIN_MP. */
10862 mp
->next
= min_mp
->next
;
10864 if (mp
->next
!= NULL
)
10865 mp
->next
->prev
= mp
;
10867 minipool_vector_tail
= mp
;
10873 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
10875 mp
->offset
= offset
;
10876 if (mp
->refcount
> 0)
10877 offset
+= mp
->fix_size
;
10879 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
10880 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
10886 /* Add a constant to the minipool for a backward reference. Returns the
10887 node added or NULL if the constant will not fit in this pool.
10889 Note that the code for insertion for a backwards reference can be
10890 somewhat confusing because the calculated offsets for each fix do
10891 not take into account the size of the pool (which is still under
10894 add_minipool_backward_ref (Mfix
*fix
)
10896 /* If set, min_mp is the last pool_entry that has a lower constraint
10897 than the one we are trying to add. */
10898 Mnode
*min_mp
= NULL
;
10899 /* This can be negative, since it is only a constraint. */
10900 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
10903 /* If we can't reach the current pool from this insn, or if we can't
10904 insert this entry at the end of the pool without pushing other
10905 fixes out of range, then we don't try. This ensures that we
10906 can't fail later on. */
10907 if (min_address
>= minipool_barrier
->address
10908 || (minipool_vector_tail
->min_address
+ fix
->fix_size
10909 >= minipool_barrier
->address
))
10912 /* Scan the pool to see if a constant with the same value has
10913 already been added. While we are doing this, also note the
10914 location where we must insert the constant if it doesn't already
10916 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
10918 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
10919 && fix
->mode
== mp
->mode
10920 && (GET_CODE (fix
->value
) != CODE_LABEL
10921 || (CODE_LABEL_NUMBER (fix
->value
)
10922 == CODE_LABEL_NUMBER (mp
->value
)))
10923 && rtx_equal_p (fix
->value
, mp
->value
)
10924 /* Check that there is enough slack to move this entry to the
10925 end of the table (this is conservative). */
10926 && (mp
->max_address
10927 > (minipool_barrier
->address
10928 + minipool_vector_tail
->offset
10929 + minipool_vector_tail
->fix_size
)))
10932 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
10935 if (min_mp
!= NULL
)
10936 mp
->min_address
+= fix
->fix_size
;
10939 /* Note the insertion point if necessary. */
10940 if (mp
->min_address
< min_address
)
10942 /* For now, we do not allow the insertion of 8-byte alignment
10943 requiring nodes anywhere but at the start of the pool. */
10944 if (ARM_DOUBLEWORD_ALIGN
10945 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
10950 else if (mp
->max_address
10951 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
10953 /* Inserting before this entry would push the fix beyond
10954 its maximum address (which can happen if we have
10955 re-located a forwards fix); force the new fix to come
10957 if (ARM_DOUBLEWORD_ALIGN
10958 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
10963 min_address
= mp
->min_address
+ fix
->fix_size
;
10966 /* Do not insert a non-8-byte aligned quantity before 8-byte
10967 aligned quantities. */
10968 else if (ARM_DOUBLEWORD_ALIGN
10969 && fix
->fix_size
< 8
10970 && mp
->fix_size
>= 8)
10973 min_address
= mp
->min_address
+ fix
->fix_size
;
10978 /* We need to create a new entry. */
10980 mp
->fix_size
= fix
->fix_size
;
10981 mp
->mode
= fix
->mode
;
10982 mp
->value
= fix
->value
;
10984 mp
->max_address
= minipool_barrier
->address
+ 65536;
10986 mp
->min_address
= min_address
;
10988 if (min_mp
== NULL
)
10991 mp
->next
= minipool_vector_head
;
10993 if (mp
->next
== NULL
)
10995 minipool_vector_tail
= mp
;
10996 minipool_vector_label
= gen_label_rtx ();
10999 mp
->next
->prev
= mp
;
11001 minipool_vector_head
= mp
;
11005 mp
->next
= min_mp
->next
;
11009 if (mp
->next
!= NULL
)
11010 mp
->next
->prev
= mp
;
11012 minipool_vector_tail
= mp
;
11015 /* Save the new entry. */
11023 /* Scan over the following entries and adjust their offsets. */
11024 while (mp
->next
!= NULL
)
11026 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
11027 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
11030 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
11032 mp
->next
->offset
= mp
->offset
;
11041 assign_minipool_offsets (Mfix
*barrier
)
11043 HOST_WIDE_INT offset
= 0;
11046 minipool_barrier
= barrier
;
11048 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11050 mp
->offset
= offset
;
11052 if (mp
->refcount
> 0)
11053 offset
+= mp
->fix_size
;
11057 /* Output the literal table */
11059 dump_minipool (rtx scan
)
11065 if (ARM_DOUBLEWORD_ALIGN
)
11066 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
11067 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
11074 fprintf (dump_file
,
11075 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
11076 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
11078 scan
= emit_label_after (gen_label_rtx (), scan
);
11079 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
11080 scan
= emit_label_after (minipool_vector_label
, scan
);
11082 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
11084 if (mp
->refcount
> 0)
11088 fprintf (dump_file
,
11089 ";; Offset %u, min %ld, max %ld ",
11090 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
11091 (unsigned long) mp
->max_address
);
11092 arm_print_value (dump_file
, mp
->value
);
11093 fputc ('\n', dump_file
);
11096 switch (mp
->fix_size
)
11098 #ifdef HAVE_consttable_1
11100 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
11104 #ifdef HAVE_consttable_2
11106 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
11110 #ifdef HAVE_consttable_4
11112 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
11116 #ifdef HAVE_consttable_8
11118 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
11122 #ifdef HAVE_consttable_16
11124 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
11129 gcc_unreachable ();
11137 minipool_vector_head
= minipool_vector_tail
= NULL
;
11138 scan
= emit_insn_after (gen_consttable_end (), scan
);
11139 scan
= emit_barrier_after (scan
);
11142 /* Return the cost of forcibly inserting a barrier after INSN. */
11144 arm_barrier_cost (rtx insn
)
11146 /* Basing the location of the pool on the loop depth is preferable,
11147 but at the moment, the basic block information seems to be
11148 corrupt by this stage of the compilation. */
11149 int base_cost
= 50;
11150 rtx next
= next_nonnote_insn (insn
);
11152 if (next
!= NULL
&& GET_CODE (next
) == CODE_LABEL
)
11155 switch (GET_CODE (insn
))
11158 /* It will always be better to place the table before the label, rather
11167 return base_cost
- 10;
11170 return base_cost
+ 10;
11174 /* Find the best place in the insn stream in the range
11175 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
11176 Create the barrier by inserting a jump and add a new fix entry for
11179 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
11181 HOST_WIDE_INT count
= 0;
11183 rtx from
= fix
->insn
;
11184 /* The instruction after which we will insert the jump. */
11185 rtx selected
= NULL
;
11187 /* The address at which the jump instruction will be placed. */
11188 HOST_WIDE_INT selected_address
;
11190 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
11191 rtx label
= gen_label_rtx ();
11193 selected_cost
= arm_barrier_cost (from
);
11194 selected_address
= fix
->address
;
11196 while (from
&& count
< max_count
)
11201 /* This code shouldn't have been called if there was a natural barrier
11203 gcc_assert (GET_CODE (from
) != BARRIER
);
11205 /* Count the length of this insn. */
11206 count
+= get_attr_length (from
);
11208 /* If there is a jump table, add its length. */
11209 tmp
= is_jump_table (from
);
11212 count
+= get_jump_table_size (tmp
);
11214 /* Jump tables aren't in a basic block, so base the cost on
11215 the dispatch insn. If we select this location, we will
11216 still put the pool after the table. */
11217 new_cost
= arm_barrier_cost (from
);
11219 if (count
< max_count
11220 && (!selected
|| new_cost
<= selected_cost
))
11223 selected_cost
= new_cost
;
11224 selected_address
= fix
->address
+ count
;
11227 /* Continue after the dispatch table. */
11228 from
= NEXT_INSN (tmp
);
11232 new_cost
= arm_barrier_cost (from
);
11234 if (count
< max_count
11235 && (!selected
|| new_cost
<= selected_cost
))
11238 selected_cost
= new_cost
;
11239 selected_address
= fix
->address
+ count
;
11242 from
= NEXT_INSN (from
);
11245 /* Make sure that we found a place to insert the jump. */
11246 gcc_assert (selected
);
11248 /* Create a new JUMP_INSN that branches around a barrier. */
11249 from
= emit_jump_insn_after (gen_jump (label
), selected
);
11250 JUMP_LABEL (from
) = label
;
11251 barrier
= emit_barrier_after (from
);
11252 emit_label_after (label
, barrier
);
11254 /* Create a minipool barrier entry for the new barrier. */
11255 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
11256 new_fix
->insn
= barrier
;
11257 new_fix
->address
= selected_address
;
11258 new_fix
->next
= fix
->next
;
11259 fix
->next
= new_fix
;
11264 /* Record that there is a natural barrier in the insn stream at
11267 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
11269 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
11272 fix
->address
= address
;
11275 if (minipool_fix_head
!= NULL
)
11276 minipool_fix_tail
->next
= fix
;
11278 minipool_fix_head
= fix
;
11280 minipool_fix_tail
= fix
;
11283 /* Record INSN, which will need fixing up to load a value from the
11284 minipool. ADDRESS is the offset of the insn since the start of the
11285 function; LOC is a pointer to the part of the insn which requires
11286 fixing; VALUE is the constant that must be loaded, which is of type
11289 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
11290 enum machine_mode mode
, rtx value
)
11292 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
11295 fix
->address
= address
;
11298 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
11299 fix
->value
= value
;
11300 fix
->forwards
= get_attr_pool_range (insn
);
11301 fix
->backwards
= get_attr_neg_pool_range (insn
);
11302 fix
->minipool
= NULL
;
11304 /* If an insn doesn't have a range defined for it, then it isn't
11305 expecting to be reworked by this code. Better to stop now than
11306 to generate duff assembly code. */
11307 gcc_assert (fix
->forwards
|| fix
->backwards
);
11309 /* If an entry requires 8-byte alignment then assume all constant pools
11310 require 4 bytes of padding. Trying to do this later on a per-pool
11311 basis is awkward because existing pool entries have to be modified. */
11312 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
11317 fprintf (dump_file
,
11318 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
11319 GET_MODE_NAME (mode
),
11320 INSN_UID (insn
), (unsigned long) address
,
11321 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
11322 arm_print_value (dump_file
, fix
->value
);
11323 fprintf (dump_file
, "\n");
11326 /* Add it to the chain of fixes. */
11329 if (minipool_fix_head
!= NULL
)
11330 minipool_fix_tail
->next
= fix
;
11332 minipool_fix_head
= fix
;
11334 minipool_fix_tail
= fix
;
11337 /* Return the cost of synthesizing a 64-bit constant VAL inline.
11338 Returns the number of insns needed, or 99 if we don't know how to
11341 arm_const_double_inline_cost (rtx val
)
11343 rtx lowpart
, highpart
;
11344 enum machine_mode mode
;
11346 mode
= GET_MODE (val
);
11348 if (mode
== VOIDmode
)
11351 gcc_assert (GET_MODE_SIZE (mode
) == 8);
11353 lowpart
= gen_lowpart (SImode
, val
);
11354 highpart
= gen_highpart_mode (SImode
, mode
, val
);
11356 gcc_assert (GET_CODE (lowpart
) == CONST_INT
);
11357 gcc_assert (GET_CODE (highpart
) == CONST_INT
);
11359 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
11360 NULL_RTX
, NULL_RTX
, 0, 0)
11361 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
11362 NULL_RTX
, NULL_RTX
, 0, 0));
11365 /* Return true if it is worthwhile to split a 64-bit constant into two
11366 32-bit operations. This is the case if optimizing for size, or
11367 if we have load delay slots, or if one 32-bit part can be done with
11368 a single data operation. */
11370 arm_const_double_by_parts (rtx val
)
11372 enum machine_mode mode
= GET_MODE (val
);
11375 if (optimize_size
|| arm_ld_sched
)
11378 if (mode
== VOIDmode
)
11381 part
= gen_highpart_mode (SImode
, mode
, val
);
11383 gcc_assert (GET_CODE (part
) == CONST_INT
);
11385 if (const_ok_for_arm (INTVAL (part
))
11386 || const_ok_for_arm (~INTVAL (part
)))
11389 part
= gen_lowpart (SImode
, val
);
11391 gcc_assert (GET_CODE (part
) == CONST_INT
);
11393 if (const_ok_for_arm (INTVAL (part
))
11394 || const_ok_for_arm (~INTVAL (part
)))
11400 /* Scan INSN and note any of its operands that need fixing.
11401 If DO_PUSHES is false we do not actually push any of the fixups
11402 needed. The function returns TRUE if any fixups were needed/pushed.
11403 This is used by arm_memory_load_p() which needs to know about loads
11404 of constants that will be converted into minipool loads. */
11406 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
11408 bool result
= false;
11411 extract_insn (insn
);
11413 if (!constrain_operands (1))
11414 fatal_insn_not_found (insn
);
11416 if (recog_data
.n_alternatives
== 0)
11419 /* Fill in recog_op_alt with information about the constraints of
11421 preprocess_constraints ();
11423 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
11425 /* Things we need to fix can only occur in inputs. */
11426 if (recog_data
.operand_type
[opno
] != OP_IN
)
11429 /* If this alternative is a memory reference, then any mention
11430 of constants in this alternative is really to fool reload
11431 into allowing us to accept one there. We need to fix them up
11432 now so that we output the right code. */
11433 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
11435 rtx op
= recog_data
.operand
[opno
];
11437 if (CONSTANT_P (op
))
11440 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
11441 recog_data
.operand_mode
[opno
], op
);
11444 else if (GET_CODE (op
) == MEM
11445 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
11446 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
11450 rtx cop
= avoid_constant_pool_reference (op
);
11452 /* Casting the address of something to a mode narrower
11453 than a word can cause avoid_constant_pool_reference()
11454 to return the pool reference itself. That's no good to
11455 us here. Lets just hope that we can use the
11456 constant pool value directly. */
11458 cop
= get_pool_constant (XEXP (op
, 0));
11460 push_minipool_fix (insn
, address
,
11461 recog_data
.operand_loc
[opno
],
11462 recog_data
.operand_mode
[opno
], cop
);
11473 /* Gcc puts the pool in the wrong place for ARM, since we can only
11474 load addresses a limited distance around the pc. We do some
11475 special munging to move the constant pool values to the correct
11476 point in the code. */
11481 HOST_WIDE_INT address
= 0;
11484 minipool_fix_head
= minipool_fix_tail
= NULL
;
11486 /* The first insn must always be a note, or the code below won't
11487 scan it properly. */
11488 insn
= get_insns ();
11489 gcc_assert (GET_CODE (insn
) == NOTE
);
11492 /* Scan all the insns and record the operands that will need fixing. */
11493 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
11495 if (TARGET_CIRRUS_FIX_INVALID_INSNS
11496 && (arm_cirrus_insn_p (insn
)
11497 || GET_CODE (insn
) == JUMP_INSN
11498 || arm_memory_load_p (insn
)))
11499 cirrus_reorg (insn
);
11501 if (GET_CODE (insn
) == BARRIER
)
11502 push_minipool_barrier (insn
, address
);
11503 else if (INSN_P (insn
))
11507 note_invalid_constants (insn
, address
, true);
11508 address
+= get_attr_length (insn
);
11510 /* If the insn is a vector jump, add the size of the table
11511 and skip the table. */
11512 if ((table
= is_jump_table (insn
)) != NULL
)
11514 address
+= get_jump_table_size (table
);
11520 fix
= minipool_fix_head
;
11522 /* Now scan the fixups and perform the required changes. */
11527 Mfix
* last_added_fix
;
11528 Mfix
* last_barrier
= NULL
;
11531 /* Skip any further barriers before the next fix. */
11532 while (fix
&& GET_CODE (fix
->insn
) == BARRIER
)
11535 /* No more fixes. */
11539 last_added_fix
= NULL
;
11541 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
11543 if (GET_CODE (ftmp
->insn
) == BARRIER
)
11545 if (ftmp
->address
>= minipool_vector_head
->max_address
)
11548 last_barrier
= ftmp
;
11550 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
11553 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
11556 /* If we found a barrier, drop back to that; any fixes that we
11557 could have reached but come after the barrier will now go in
11558 the next mini-pool. */
11559 if (last_barrier
!= NULL
)
11561 /* Reduce the refcount for those fixes that won't go into this
11563 for (fdel
= last_barrier
->next
;
11564 fdel
&& fdel
!= ftmp
;
11567 fdel
->minipool
->refcount
--;
11568 fdel
->minipool
= NULL
;
11571 ftmp
= last_barrier
;
11575 /* ftmp is first fix that we can't fit into this pool and
11576 there no natural barriers that we could use. Insert a
11577 new barrier in the code somewhere between the previous
11578 fix and this one, and arrange to jump around it. */
11579 HOST_WIDE_INT max_address
;
11581 /* The last item on the list of fixes must be a barrier, so
11582 we can never run off the end of the list of fixes without
11583 last_barrier being set. */
11586 max_address
= minipool_vector_head
->max_address
;
11587 /* Check that there isn't another fix that is in range that
11588 we couldn't fit into this pool because the pool was
11589 already too large: we need to put the pool before such an
11590 instruction. The pool itself may come just after the
11591 fix because create_fix_barrier also allows space for a
11592 jump instruction. */
11593 if (ftmp
->address
< max_address
)
11594 max_address
= ftmp
->address
+ 1;
11596 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
11599 assign_minipool_offsets (last_barrier
);
11603 if (GET_CODE (ftmp
->insn
) != BARRIER
11604 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
11611 /* Scan over the fixes we have identified for this pool, fixing them
11612 up and adding the constants to the pool itself. */
11613 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
11614 this_fix
= this_fix
->next
)
11615 if (GET_CODE (this_fix
->insn
) != BARRIER
)
11618 = plus_constant (gen_rtx_LABEL_REF (VOIDmode
,
11619 minipool_vector_label
),
11620 this_fix
->minipool
->offset
);
11621 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
11624 dump_minipool (last_barrier
->insn
);
11628 /* From now on we must synthesize any constants that we can't handle
11629 directly. This can happen if the RTL gets split during final
11630 instruction generation. */
11631 after_arm_reorg
= 1;
11633 /* Free the minipool memory. */
11634 obstack_free (&minipool_obstack
, minipool_startobj
);
11637 /* Routines to output assembly language. */
11639 /* If the rtx is the correct value then return the string of the number.
11640 In this way we can ensure that valid double constants are generated even
11641 when cross compiling. */
11643 fp_immediate_constant (rtx x
)
11648 if (!fp_consts_inited
)
11651 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
11652 for (i
= 0; i
< 8; i
++)
11653 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
11654 return strings_fp
[i
];
11656 gcc_unreachable ();
11659 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
11660 static const char *
11661 fp_const_from_val (REAL_VALUE_TYPE
*r
)
11665 if (!fp_consts_inited
)
11668 for (i
= 0; i
< 8; i
++)
11669 if (REAL_VALUES_EQUAL (*r
, values_fp
[i
]))
11670 return strings_fp
[i
];
11672 gcc_unreachable ();
11675 /* Output the operands of a LDM/STM instruction to STREAM.
11676 MASK is the ARM register set mask of which only bits 0-15 are important.
11677 REG is the base register, either the frame pointer or the stack pointer,
11678 INSTR is the possibly suffixed load or store instruction.
11679 RFE is nonzero if the instruction should also copy spsr to cpsr. */
11682 print_multi_reg (FILE *stream
, const char *instr
, unsigned reg
,
11683 unsigned long mask
, int rfe
)
11686 bool not_first
= FALSE
;
11688 gcc_assert (!rfe
|| (mask
& (1 << PC_REGNUM
)));
11689 fputc ('\t', stream
);
11690 asm_fprintf (stream
, instr
, reg
);
11691 fputc ('{', stream
);
11693 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
11694 if (mask
& (1 << i
))
11697 fprintf (stream
, ", ");
11699 asm_fprintf (stream
, "%r", i
);
11704 fprintf (stream
, "}^\n");
11706 fprintf (stream
, "}\n");
11710 /* Output a FLDMD instruction to STREAM.
11711 BASE if the register containing the address.
11712 REG and COUNT specify the register range.
11713 Extra registers may be added to avoid hardware bugs.
11715 We output FLDMD even for ARMv5 VFP implementations. Although
11716 FLDMD is technically not supported until ARMv6, it is believed
11717 that all VFP implementations support its use in this context. */
11720 vfp_output_fldmd (FILE * stream
, unsigned int base
, int reg
, int count
)
11724 /* Workaround ARM10 VFPr1 bug. */
11725 if (count
== 2 && !arm_arch6
)
11732 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
11733 load into multiple parts if we have to handle more than 16 registers. */
11736 vfp_output_fldmd (stream
, base
, reg
, 16);
11737 vfp_output_fldmd (stream
, base
, reg
+ 16, count
- 16);
11741 fputc ('\t', stream
);
11742 asm_fprintf (stream
, "fldmfdd\t%r!, {", base
);
11744 for (i
= reg
; i
< reg
+ count
; i
++)
11747 fputs (", ", stream
);
11748 asm_fprintf (stream
, "d%d", i
);
11750 fputs ("}\n", stream
);
11755 /* Output the assembly for a store multiple. */
11758 vfp_output_fstmd (rtx
* operands
)
11765 strcpy (pattern
, "fstmfdd\t%m0!, {%P1");
11766 p
= strlen (pattern
);
11768 gcc_assert (GET_CODE (operands
[1]) == REG
);
11770 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
11771 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
11773 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
11775 strcpy (&pattern
[p
], "}");
11777 output_asm_insn (pattern
, operands
);
11782 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
11783 number of bytes pushed. */
11786 vfp_emit_fstmd (int base_reg
, int count
)
11793 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
11794 register pairs are stored by a store multiple insn. We avoid this
11795 by pushing an extra pair. */
11796 if (count
== 2 && !arm_arch6
)
11798 if (base_reg
== LAST_VFP_REGNUM
- 3)
11803 /* FSTMD may not store more than 16 doubleword registers at once. Split
11804 larger stores into multiple parts (up to a maximum of two, in
11809 /* NOTE: base_reg is an internal register number, so each D register
11811 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
11812 saved
+= vfp_emit_fstmd (base_reg
, 16);
11816 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
11817 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
11819 reg
= gen_rtx_REG (DFmode
, base_reg
);
11822 XVECEXP (par
, 0, 0)
11823 = gen_rtx_SET (VOIDmode
,
11826 gen_rtx_PRE_MODIFY (Pmode
,
11829 (stack_pointer_rtx
,
11832 gen_rtx_UNSPEC (BLKmode
,
11833 gen_rtvec (1, reg
),
11834 UNSPEC_PUSH_MULT
));
11836 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
11837 plus_constant (stack_pointer_rtx
, -(count
* 8)));
11838 RTX_FRAME_RELATED_P (tmp
) = 1;
11839 XVECEXP (dwarf
, 0, 0) = tmp
;
11841 tmp
= gen_rtx_SET (VOIDmode
,
11842 gen_frame_mem (DFmode
, stack_pointer_rtx
),
11844 RTX_FRAME_RELATED_P (tmp
) = 1;
11845 XVECEXP (dwarf
, 0, 1) = tmp
;
11847 for (i
= 1; i
< count
; i
++)
11849 reg
= gen_rtx_REG (DFmode
, base_reg
);
11851 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
11853 tmp
= gen_rtx_SET (VOIDmode
,
11854 gen_frame_mem (DFmode
,
11855 plus_constant (stack_pointer_rtx
,
11858 RTX_FRAME_RELATED_P (tmp
) = 1;
11859 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
11862 par
= emit_insn (par
);
11863 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
11864 RTX_FRAME_RELATED_P (par
) = 1;
11869 /* Emit a call instruction with pattern PAT. ADDR is the address of
11870 the call target. */
11873 arm_emit_call_insn (rtx pat
, rtx addr
)
11877 insn
= emit_call_insn (pat
);
11879 /* The PIC register is live on entry to VxWorks PIC PLT entries.
11880 If the call might use such an entry, add a use of the PIC register
11881 to the instruction's CALL_INSN_FUNCTION_USAGE. */
11882 if (TARGET_VXWORKS_RTP
11884 && GET_CODE (addr
) == SYMBOL_REF
11885 && (SYMBOL_REF_DECL (addr
)
11886 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
11887 : !SYMBOL_REF_LOCAL_P (addr
)))
11889 require_pic_register ();
11890 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
11894 /* Output a 'call' insn. */
11896 output_call (rtx
*operands
)
11898 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
11900 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
11901 if (REGNO (operands
[0]) == LR_REGNUM
)
11903 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
11904 output_asm_insn ("mov%?\t%0, %|lr", operands
);
11907 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
11909 if (TARGET_INTERWORK
|| arm_arch4t
)
11910 output_asm_insn ("bx%?\t%0", operands
);
11912 output_asm_insn ("mov%?\t%|pc, %0", operands
);
11917 /* Output a 'call' insn that is a reference in memory. This is
11918 disabled for ARMv5 and we prefer a blx instead because otherwise
11919 there's a significant performance overhead. */
11921 output_call_mem (rtx
*operands
)
11923 gcc_assert (!arm_arch5
);
11924 if (TARGET_INTERWORK
)
11926 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
11927 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
11928 output_asm_insn ("bx%?\t%|ip", operands
);
11930 else if (regno_use_in (LR_REGNUM
, operands
[0]))
11932 /* LR is used in the memory address. We load the address in the
11933 first instruction. It's safe to use IP as the target of the
11934 load since the call will kill it anyway. */
11935 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
11936 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
11938 output_asm_insn ("bx%?\t%|ip", operands
);
11940 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
11944 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
11945 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
11952 /* Output a move from arm registers to an fpa registers.
11953 OPERANDS[0] is an fpa register.
11954 OPERANDS[1] is the first registers of an arm register pair. */
11956 output_mov_long_double_fpa_from_arm (rtx
*operands
)
11958 int arm_reg0
= REGNO (operands
[1]);
11961 gcc_assert (arm_reg0
!= IP_REGNUM
);
11963 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
11964 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
11965 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
11967 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
11968 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands
);
11973 /* Output a move from an fpa register to arm registers.
11974 OPERANDS[0] is the first registers of an arm register pair.
11975 OPERANDS[1] is an fpa register. */
11977 output_mov_long_double_arm_from_fpa (rtx
*operands
)
11979 int arm_reg0
= REGNO (operands
[0]);
11982 gcc_assert (arm_reg0
!= IP_REGNUM
);
11984 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
11985 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
11986 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
11988 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands
);
11989 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
11993 /* Output a move from arm registers to arm registers of a long double
11994 OPERANDS[0] is the destination.
11995 OPERANDS[1] is the source. */
11997 output_mov_long_double_arm_from_arm (rtx
*operands
)
11999 /* We have to be careful here because the two might overlap. */
12000 int dest_start
= REGNO (operands
[0]);
12001 int src_start
= REGNO (operands
[1]);
12005 if (dest_start
< src_start
)
12007 for (i
= 0; i
< 3; i
++)
12009 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
12010 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
12011 output_asm_insn ("mov%?\t%0, %1", ops
);
12016 for (i
= 2; i
>= 0; i
--)
12018 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
12019 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
12020 output_asm_insn ("mov%?\t%0, %1", ops
);
12028 arm_emit_movpair (rtx dest
, rtx src
)
12030 /* If the src is an immediate, simplify it. */
12031 if (CONST_INT_P (src
))
12033 HOST_WIDE_INT val
= INTVAL (src
);
12034 emit_set_insn (dest
, GEN_INT (val
& 0x0000ffff));
12035 if ((val
>> 16) & 0x0000ffff)
12036 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode
, dest
, GEN_INT (16),
12038 GEN_INT ((val
>> 16) & 0x0000ffff));
12041 emit_set_insn (dest
, gen_rtx_HIGH (SImode
, src
));
12042 emit_set_insn (dest
, gen_rtx_LO_SUM (SImode
, dest
, src
));
12045 /* Output a move from arm registers to an fpa registers.
12046 OPERANDS[0] is an fpa register.
12047 OPERANDS[1] is the first registers of an arm register pair. */
12049 output_mov_double_fpa_from_arm (rtx
*operands
)
12051 int arm_reg0
= REGNO (operands
[1]);
12054 gcc_assert (arm_reg0
!= IP_REGNUM
);
12056 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12057 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12058 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops
);
12059 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands
);
12063 /* Output a move from an fpa register to arm registers.
12064 OPERANDS[0] is the first registers of an arm register pair.
12065 OPERANDS[1] is an fpa register. */
12067 output_mov_double_arm_from_fpa (rtx
*operands
)
12069 int arm_reg0
= REGNO (operands
[0]);
12072 gcc_assert (arm_reg0
!= IP_REGNUM
);
12074 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
12075 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
12076 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands
);
12077 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops
);
12081 /* Output a move between double words.
12082 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
12083 or MEM<-REG and all MEMs must be offsettable addresses. */
12085 output_move_double (rtx
*operands
)
12087 enum rtx_code code0
= GET_CODE (operands
[0]);
12088 enum rtx_code code1
= GET_CODE (operands
[1]);
12093 unsigned int reg0
= REGNO (operands
[0]);
12095 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
12097 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
12099 switch (GET_CODE (XEXP (operands
[1], 0)))
12103 && !(fix_cm3_ldrd
&& reg0
== REGNO(XEXP (operands
[1], 0))))
12104 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
12106 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
12110 gcc_assert (TARGET_LDRD
);
12111 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
12116 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
12118 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
12123 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
12125 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
12129 gcc_assert (TARGET_LDRD
);
12130 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
12135 /* Autoicrement addressing modes should never have overlapping
12136 base and destination registers, and overlapping index registers
12137 are already prohibited, so this doesn't need to worry about
12139 otherops
[0] = operands
[0];
12140 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
12141 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
12143 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
12145 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
12147 /* Registers overlap so split out the increment. */
12148 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
12149 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
12153 /* Use a single insn if we can.
12154 FIXME: IWMMXT allows offsets larger than ldrd can
12155 handle, fix these up with a pair of ldr. */
12157 || GET_CODE (otherops
[2]) != CONST_INT
12158 || (INTVAL (otherops
[2]) > -256
12159 && INTVAL (otherops
[2]) < 256))
12160 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
12163 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
12164 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12170 /* Use a single insn if we can.
12171 FIXME: IWMMXT allows offsets larger than ldrd can handle,
12172 fix these up with a pair of ldr. */
12174 || GET_CODE (otherops
[2]) != CONST_INT
12175 || (INTVAL (otherops
[2]) > -256
12176 && INTVAL (otherops
[2]) < 256))
12177 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
12180 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12181 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
12188 /* We might be able to use ldrd %0, %1 here. However the range is
12189 different to ldr/adr, and it is broken on some ARMv7-M
12190 implementations. */
12191 /* Use the second register of the pair to avoid problematic
12193 otherops
[1] = operands
[1];
12194 output_asm_insn ("adr%?\t%0, %1", otherops
);
12195 operands
[1] = otherops
[0];
12197 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
12199 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands
);
12202 /* ??? This needs checking for thumb2. */
12204 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
12205 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
12207 otherops
[0] = operands
[0];
12208 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
12209 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
12211 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
12213 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
12215 switch ((int) INTVAL (otherops
[2]))
12218 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
12223 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
12228 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
12232 otherops
[0] = gen_rtx_REG(SImode
, REGNO(operands
[0]) + 1);
12233 operands
[1] = otherops
[0];
12235 && (GET_CODE (otherops
[2]) == REG
12237 || (GET_CODE (otherops
[2]) == CONST_INT
12238 && INTVAL (otherops
[2]) > -256
12239 && INTVAL (otherops
[2]) < 256)))
12241 if (reg_overlap_mentioned_p (operands
[0],
12245 /* Swap base and index registers over to
12246 avoid a conflict. */
12248 otherops
[1] = otherops
[2];
12251 /* If both registers conflict, it will usually
12252 have been fixed by a splitter. */
12253 if (reg_overlap_mentioned_p (operands
[0], otherops
[2])
12254 || (fix_cm3_ldrd
&& reg0
== REGNO (otherops
[1])))
12256 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
12257 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands
);
12261 otherops
[0] = operands
[0];
12262 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
12267 if (GET_CODE (otherops
[2]) == CONST_INT
)
12269 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
12270 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
12272 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
12275 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
12278 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
12281 return "ldr%(d%)\t%0, [%1]";
12283 return "ldm%(ia%)\t%1, %M0";
12287 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
12288 /* Take care of overlapping base/data reg. */
12289 if (reg_mentioned_p (operands
[0], operands
[1]))
12291 output_asm_insn ("ldr%?\t%0, %1", otherops
);
12292 output_asm_insn ("ldr%?\t%0, %1", operands
);
12296 output_asm_insn ("ldr%?\t%0, %1", operands
);
12297 output_asm_insn ("ldr%?\t%0, %1", otherops
);
12304 /* Constraints should ensure this. */
12305 gcc_assert (code0
== MEM
&& code1
== REG
);
12306 gcc_assert (REGNO (operands
[1]) != IP_REGNUM
);
12308 switch (GET_CODE (XEXP (operands
[0], 0)))
12312 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
12314 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
12318 gcc_assert (TARGET_LDRD
);
12319 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
12324 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
12326 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
12331 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
12333 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
12337 gcc_assert (TARGET_LDRD
);
12338 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
12343 otherops
[0] = operands
[1];
12344 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
12345 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
12347 /* IWMMXT allows offsets larger than ldrd can handle,
12348 fix these up with a pair of ldr. */
12350 && GET_CODE (otherops
[2]) == CONST_INT
12351 && (INTVAL(otherops
[2]) <= -256
12352 || INTVAL(otherops
[2]) >= 256))
12354 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
12356 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
12357 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12361 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops
);
12362 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
12365 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
12366 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
12368 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
12372 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
12373 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
12375 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
12378 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
12384 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
12390 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
12395 && (GET_CODE (otherops
[2]) == REG
12397 || (GET_CODE (otherops
[2]) == CONST_INT
12398 && INTVAL (otherops
[2]) > -256
12399 && INTVAL (otherops
[2]) < 256)))
12401 otherops
[0] = operands
[1];
12402 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
12403 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
12409 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
12410 otherops
[1] = operands
[1];
12411 output_asm_insn ("str%?\t%1, %0", operands
);
12412 output_asm_insn ("str%?\t%H1, %0", otherops
);
12419 /* Output a move, load or store for quad-word vectors in ARM registers. Only
12420 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
12423 output_move_quad (rtx
*operands
)
12425 if (REG_P (operands
[0]))
12427 /* Load, or reg->reg move. */
12429 if (MEM_P (operands
[1]))
12431 switch (GET_CODE (XEXP (operands
[1], 0)))
12434 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
12439 output_asm_insn ("adr%?\t%0, %1", operands
);
12440 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
12444 gcc_unreachable ();
12452 gcc_assert (REG_P (operands
[1]));
12454 dest
= REGNO (operands
[0]);
12455 src
= REGNO (operands
[1]);
12457 /* This seems pretty dumb, but hopefully GCC won't try to do it
12460 for (i
= 0; i
< 4; i
++)
12462 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
12463 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
12464 output_asm_insn ("mov%?\t%0, %1", ops
);
12467 for (i
= 3; i
>= 0; i
--)
12469 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
12470 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
12471 output_asm_insn ("mov%?\t%0, %1", ops
);
12477 gcc_assert (MEM_P (operands
[0]));
12478 gcc_assert (REG_P (operands
[1]));
12479 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
12481 switch (GET_CODE (XEXP (operands
[0], 0)))
12484 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
12488 gcc_unreachable ();
12495 /* Output a VFP load or store instruction. */
12498 output_move_vfp (rtx
*operands
)
12500 rtx reg
, mem
, addr
, ops
[2];
12501 int load
= REG_P (operands
[0]);
12502 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
12503 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
12506 enum machine_mode mode
;
12508 reg
= operands
[!load
];
12509 mem
= operands
[load
];
12511 mode
= GET_MODE (reg
);
12513 gcc_assert (REG_P (reg
));
12514 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
12515 gcc_assert (mode
== SFmode
12519 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
12520 gcc_assert (MEM_P (mem
));
12522 addr
= XEXP (mem
, 0);
12524 switch (GET_CODE (addr
))
12527 templ
= "f%smdb%c%%?\t%%0!, {%%%s1}%s";
12528 ops
[0] = XEXP (addr
, 0);
12533 templ
= "f%smia%c%%?\t%%0!, {%%%s1}%s";
12534 ops
[0] = XEXP (addr
, 0);
12539 templ
= "f%s%c%%?\t%%%s0, %%1%s";
12545 sprintf (buff
, templ
,
12546 load
? "ld" : "st",
12549 integer_p
? "\t%@ int" : "");
12550 output_asm_insn (buff
, ops
);
12555 /* Output a Neon quad-word load or store, or a load or store for
12556 larger structure modes.
12558 WARNING: The ordering of elements is weird in big-endian mode,
12559 because we use VSTM, as required by the EABI. GCC RTL defines
12560 element ordering based on in-memory order. This can be differ
12561 from the architectural ordering of elements within a NEON register.
12562 The intrinsics defined in arm_neon.h use the NEON register element
12563 ordering, not the GCC RTL element ordering.
12565 For example, the in-memory ordering of a big-endian a quadword
12566 vector with 16-bit elements when stored from register pair {d0,d1}
12567 will be (lowest address first, d0[N] is NEON register element N):
12569 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
12571 When necessary, quadword registers (dN, dN+1) are moved to ARM
12572 registers from rN in the order:
12574 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
12576 So that STM/LDM can be used on vectors in ARM registers, and the
12577 same memory layout will result as if VSTM/VLDM were used. */
12580 output_move_neon (rtx
*operands
)
12582 rtx reg
, mem
, addr
, ops
[2];
12583 int regno
, load
= REG_P (operands
[0]);
12586 enum machine_mode mode
;
12588 reg
= operands
[!load
];
12589 mem
= operands
[load
];
12591 mode
= GET_MODE (reg
);
12593 gcc_assert (REG_P (reg
));
12594 regno
= REGNO (reg
);
12595 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
12596 || NEON_REGNO_OK_FOR_QUAD (regno
));
12597 gcc_assert (VALID_NEON_DREG_MODE (mode
)
12598 || VALID_NEON_QREG_MODE (mode
)
12599 || VALID_NEON_STRUCT_MODE (mode
));
12600 gcc_assert (MEM_P (mem
));
12602 addr
= XEXP (mem
, 0);
12604 /* Strip off const from addresses like (const (plus (...))). */
12605 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
12606 addr
= XEXP (addr
, 0);
12608 switch (GET_CODE (addr
))
12611 templ
= "v%smia%%?\t%%0!, %%h1";
12612 ops
[0] = XEXP (addr
, 0);
12617 /* FIXME: We should be using vld1/vst1 here in BE mode? */
12618 templ
= "v%smdb%%?\t%%0!, %%h1";
12619 ops
[0] = XEXP (addr
, 0);
12624 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
12625 gcc_unreachable ();
12630 int nregs
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
12633 for (i
= 0; i
< nregs
; i
++)
12635 /* We're only using DImode here because it's a convenient size. */
12636 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
12637 ops
[1] = adjust_address (mem
, DImode
, 8 * i
);
12638 if (reg_overlap_mentioned_p (ops
[0], mem
))
12640 gcc_assert (overlap
== -1);
12645 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
12646 output_asm_insn (buff
, ops
);
12651 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
12652 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
12653 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
12654 output_asm_insn (buff
, ops
);
12661 templ
= "v%smia%%?\t%%m0, %%h1";
12666 sprintf (buff
, templ
, load
? "ld" : "st");
12667 output_asm_insn (buff
, ops
);
12672 /* Output an ADD r, s, #n where n may be too big for one instruction.
12673 If adding zero to one register, output nothing. */
12675 output_add_immediate (rtx
*operands
)
12677 HOST_WIDE_INT n
= INTVAL (operands
[2]);
12679 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
12682 output_multi_immediate (operands
,
12683 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
12686 output_multi_immediate (operands
,
12687 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
12694 /* Output a multiple immediate operation.
12695 OPERANDS is the vector of operands referred to in the output patterns.
12696 INSTR1 is the output pattern to use for the first constant.
12697 INSTR2 is the output pattern to use for subsequent constants.
12698 IMMED_OP is the index of the constant slot in OPERANDS.
12699 N is the constant value. */
12700 static const char *
12701 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
12702 int immed_op
, HOST_WIDE_INT n
)
12704 #if HOST_BITS_PER_WIDE_INT > 32
12710 /* Quick and easy output. */
12711 operands
[immed_op
] = const0_rtx
;
12712 output_asm_insn (instr1
, operands
);
12717 const char * instr
= instr1
;
12719 /* Note that n is never zero here (which would give no output). */
12720 for (i
= 0; i
< 32; i
+= 2)
12724 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
12725 output_asm_insn (instr
, operands
);
12735 /* Return the name of a shifter operation. */
12736 static const char *
12737 arm_shift_nmem(enum rtx_code code
)
12742 return ARM_LSL_NAME
;
12758 /* Return the appropriate ARM instruction for the operation code.
12759 The returned result should not be overwritten. OP is the rtx of the
12760 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
12763 arithmetic_instr (rtx op
, int shift_first_arg
)
12765 switch (GET_CODE (op
))
12771 return shift_first_arg
? "rsb" : "sub";
12786 return arm_shift_nmem(GET_CODE(op
));
12789 gcc_unreachable ();
12793 /* Ensure valid constant shifts and return the appropriate shift mnemonic
12794 for the operation code. The returned result should not be overwritten.
12795 OP is the rtx code of the shift.
12796 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
12798 static const char *
12799 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
12802 enum rtx_code code
= GET_CODE (op
);
12804 switch (GET_CODE (XEXP (op
, 1)))
12812 *amountp
= INTVAL (XEXP (op
, 1));
12816 gcc_unreachable ();
12822 gcc_assert (*amountp
!= -1);
12823 *amountp
= 32 - *amountp
;
12826 /* Fall through. */
12832 mnem
= arm_shift_nmem(code
);
12836 /* We never have to worry about the amount being other than a
12837 power of 2, since this case can never be reloaded from a reg. */
12838 gcc_assert (*amountp
!= -1);
12839 *amountp
= int_log2 (*amountp
);
12840 return ARM_LSL_NAME
;
12843 gcc_unreachable ();
12846 if (*amountp
!= -1)
12848 /* This is not 100% correct, but follows from the desire to merge
12849 multiplication by a power of 2 with the recognizer for a
12850 shift. >=32 is not a valid shift for "lsl", so we must try and
12851 output a shift that produces the correct arithmetical result.
12852 Using lsr #32 is identical except for the fact that the carry bit
12853 is not set correctly if we set the flags; but we never use the
12854 carry bit from such an operation, so we can ignore that. */
12855 if (code
== ROTATERT
)
12856 /* Rotate is just modulo 32. */
12858 else if (*amountp
!= (*amountp
& 31))
12860 if (code
== ASHIFT
)
12865 /* Shifts of 0 are no-ops. */
12873 /* Obtain the shift from the POWER of two. */
12875 static HOST_WIDE_INT
12876 int_log2 (HOST_WIDE_INT power
)
12878 HOST_WIDE_INT shift
= 0;
12880 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
12882 gcc_assert (shift
<= 31);
12889 /* Output a .ascii pseudo-op, keeping track of lengths. This is
12890 because /bin/as is horribly restrictive. The judgement about
12891 whether or not each character is 'printable' (and can be output as
12892 is) or not (and must be printed with an octal escape) must be made
12893 with reference to the *host* character set -- the situation is
12894 similar to that discussed in the comments above pp_c_char in
12895 c-pretty-print.c. */
12897 #define MAX_ASCII_LEN 51
12900 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
12903 int len_so_far
= 0;
12905 fputs ("\t.ascii\t\"", stream
);
12907 for (i
= 0; i
< len
; i
++)
12911 if (len_so_far
>= MAX_ASCII_LEN
)
12913 fputs ("\"\n\t.ascii\t\"", stream
);
12919 if (c
== '\\' || c
== '\"')
12921 putc ('\\', stream
);
12929 fprintf (stream
, "\\%03o", c
);
12934 fputs ("\"\n", stream
);
12937 /* Compute the register save mask for registers 0 through 12
12938 inclusive. This code is used by arm_compute_save_reg_mask. */
12940 static unsigned long
12941 arm_compute_save_reg0_reg12_mask (void)
12943 unsigned long func_type
= arm_current_func_type ();
12944 unsigned long save_reg_mask
= 0;
12947 if (IS_INTERRUPT (func_type
))
12949 unsigned int max_reg
;
12950 /* Interrupt functions must not corrupt any registers,
12951 even call clobbered ones. If this is a leaf function
12952 we can just examine the registers used by the RTL, but
12953 otherwise we have to assume that whatever function is
12954 called might clobber anything, and so we have to save
12955 all the call-clobbered registers as well. */
12956 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
12957 /* FIQ handlers have registers r8 - r12 banked, so
12958 we only need to check r0 - r7, Normal ISRs only
12959 bank r14 and r15, so we must check up to r12.
12960 r13 is the stack pointer which is always preserved,
12961 so we do not need to consider it here. */
12966 for (reg
= 0; reg
<= max_reg
; reg
++)
12967 if (df_regs_ever_live_p (reg
)
12968 || (! current_function_is_leaf
&& call_used_regs
[reg
]))
12969 save_reg_mask
|= (1 << reg
);
12971 /* Also save the pic base register if necessary. */
12973 && !TARGET_SINGLE_PIC_BASE
12974 && arm_pic_register
!= INVALID_REGNUM
12975 && crtl
->uses_pic_offset_table
)
12976 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
12978 else if (IS_VOLATILE(func_type
))
12980 /* For noreturn functions we historically omitted register saves
12981 altogether. However this really messes up debugging. As a
12982 compromise save just the frame pointers. Combined with the link
12983 register saved elsewhere this should be sufficient to get
12985 if (frame_pointer_needed
)
12986 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
12987 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM
))
12988 save_reg_mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
12989 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM
))
12990 save_reg_mask
|= 1 << THUMB_HARD_FRAME_POINTER_REGNUM
;
12994 /* In the normal case we only need to save those registers
12995 which are call saved and which are used by this function. */
12996 for (reg
= 0; reg
<= 11; reg
++)
12997 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
12998 save_reg_mask
|= (1 << reg
);
13000 /* Handle the frame pointer as a special case. */
13001 if (frame_pointer_needed
)
13002 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
13004 /* If we aren't loading the PIC register,
13005 don't stack it even though it may be live. */
13007 && !TARGET_SINGLE_PIC_BASE
13008 && arm_pic_register
!= INVALID_REGNUM
13009 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
13010 || crtl
->uses_pic_offset_table
))
13011 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13013 /* The prologue will copy SP into R0, so save it. */
13014 if (IS_STACKALIGN (func_type
))
13015 save_reg_mask
|= 1;
13018 /* Save registers so the exception handler can modify them. */
13019 if (crtl
->calls_eh_return
)
13025 reg
= EH_RETURN_DATA_REGNO (i
);
13026 if (reg
== INVALID_REGNUM
)
13028 save_reg_mask
|= 1 << reg
;
13032 return save_reg_mask
;
13036 /* Compute the number of bytes used to store the static chain register on the
13037 stack, above the stack frame. We need to know this accurately to get the
13038 alignment of the rest of the stack frame correct. */
13040 static int arm_compute_static_chain_stack_bytes (void)
13042 unsigned long func_type
= arm_current_func_type ();
13043 int static_chain_stack_bytes
= 0;
13045 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
&&
13046 IS_NESTED (func_type
) &&
13047 df_regs_ever_live_p (3) && crtl
->args
.pretend_args_size
== 0)
13048 static_chain_stack_bytes
= 4;
13050 return static_chain_stack_bytes
;
13054 /* Compute a bit mask of which registers need to be
13055 saved on the stack for the current function.
13056 This is used by arm_get_frame_offsets, which may add extra registers. */
13058 static unsigned long
13059 arm_compute_save_reg_mask (void)
13061 unsigned int save_reg_mask
= 0;
13062 unsigned long func_type
= arm_current_func_type ();
13065 if (IS_NAKED (func_type
))
13066 /* This should never really happen. */
13069 /* If we are creating a stack frame, then we must save the frame pointer,
13070 IP (which will hold the old stack pointer), LR and the PC. */
13071 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
13073 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
13076 | (1 << PC_REGNUM
);
13078 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
13080 /* Decide if we need to save the link register.
13081 Interrupt routines have their own banked link register,
13082 so they never need to save it.
13083 Otherwise if we do not use the link register we do not need to save
13084 it. If we are pushing other registers onto the stack however, we
13085 can save an instruction in the epilogue by pushing the link register
13086 now and then popping it back into the PC. This incurs extra memory
13087 accesses though, so we only do it when optimizing for size, and only
13088 if we know that we will not need a fancy return sequence. */
13089 if (df_regs_ever_live_p (LR_REGNUM
)
13092 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
13093 && !crtl
->calls_eh_return
))
13094 save_reg_mask
|= 1 << LR_REGNUM
;
13096 if (cfun
->machine
->lr_save_eliminated
)
13097 save_reg_mask
&= ~ (1 << LR_REGNUM
);
13099 if (TARGET_REALLY_IWMMXT
13100 && ((bit_count (save_reg_mask
)
13101 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
+
13102 arm_compute_static_chain_stack_bytes())
13105 /* The total number of registers that are going to be pushed
13106 onto the stack is odd. We need to ensure that the stack
13107 is 64-bit aligned before we start to save iWMMXt registers,
13108 and also before we start to create locals. (A local variable
13109 might be a double or long long which we will load/store using
13110 an iWMMXt instruction). Therefore we need to push another
13111 ARM register, so that the stack will be 64-bit aligned. We
13112 try to avoid using the arg registers (r0 -r3) as they might be
13113 used to pass values in a tail call. */
13114 for (reg
= 4; reg
<= 12; reg
++)
13115 if ((save_reg_mask
& (1 << reg
)) == 0)
13119 save_reg_mask
|= (1 << reg
);
13122 cfun
->machine
->sibcall_blocked
= 1;
13123 save_reg_mask
|= (1 << 3);
13127 /* We may need to push an additional register for use initializing the
13128 PIC base register. */
13129 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
13130 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
13132 reg
= thumb_find_work_register (1 << 4);
13133 if (!call_used_regs
[reg
])
13134 save_reg_mask
|= (1 << reg
);
13137 return save_reg_mask
;
13141 /* Compute a bit mask of which registers need to be
13142 saved on the stack for the current function. */
13143 static unsigned long
13144 thumb1_compute_save_reg_mask (void)
13146 unsigned long mask
;
13150 for (reg
= 0; reg
< 12; reg
++)
13151 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13155 && !TARGET_SINGLE_PIC_BASE
13156 && arm_pic_register
!= INVALID_REGNUM
13157 && crtl
->uses_pic_offset_table
)
13158 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
13160 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
13161 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
13162 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
13164 /* LR will also be pushed if any lo regs are pushed. */
13165 if (mask
& 0xff || thumb_force_lr_save ())
13166 mask
|= (1 << LR_REGNUM
);
13168 /* Make sure we have a low work register if we need one.
13169 We will need one if we are going to push a high register,
13170 but we are not currently intending to push a low register. */
13171 if ((mask
& 0xff) == 0
13172 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
13174 /* Use thumb_find_work_register to choose which register
13175 we will use. If the register is live then we will
13176 have to push it. Use LAST_LO_REGNUM as our fallback
13177 choice for the register to select. */
13178 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
13179 /* Make sure the register returned by thumb_find_work_register is
13180 not part of the return value. */
13181 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
13182 reg
= LAST_LO_REGNUM
;
13184 if (! call_used_regs
[reg
])
13188 /* The 504 below is 8 bytes less than 512 because there are two possible
13189 alignment words. We can't tell here if they will be present or not so we
13190 have to play it safe and assume that they are. */
13191 if ((CALLER_INTERWORKING_SLOT_SIZE
+
13192 ROUND_UP_WORD (get_frame_size ()) +
13193 crtl
->outgoing_args_size
) >= 504)
13195 /* This is the same as the code in thumb1_expand_prologue() which
13196 determines which register to use for stack decrement. */
13197 for (reg
= LAST_ARG_REGNUM
+ 1; reg
<= LAST_LO_REGNUM
; reg
++)
13198 if (mask
& (1 << reg
))
13201 if (reg
> LAST_LO_REGNUM
)
13203 /* Make sure we have a register available for stack decrement. */
13204 mask
|= 1 << LAST_LO_REGNUM
;
13212 /* Return the number of bytes required to save VFP registers. */
13214 arm_get_vfp_saved_size (void)
13216 unsigned int regno
;
13221 /* Space for saved VFP registers. */
13222 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
13225 for (regno
= FIRST_VFP_REGNUM
;
13226 regno
< LAST_VFP_REGNUM
;
13229 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
13230 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
13234 /* Workaround ARM10 VFPr1 bug. */
13235 if (count
== 2 && !arm_arch6
)
13237 saved
+= count
* 8;
13246 if (count
== 2 && !arm_arch6
)
13248 saved
+= count
* 8;
13255 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
13256 everything bar the final return instruction. */
13258 output_return_instruction (rtx operand
, int really_return
, int reverse
)
13260 char conditional
[10];
13263 unsigned long live_regs_mask
;
13264 unsigned long func_type
;
13265 arm_stack_offsets
*offsets
;
13267 func_type
= arm_current_func_type ();
13269 if (IS_NAKED (func_type
))
13272 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
13274 /* If this function was declared non-returning, and we have
13275 found a tail call, then we have to trust that the called
13276 function won't return. */
13281 /* Otherwise, trap an attempted return by aborting. */
13283 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
13285 assemble_external_libcall (ops
[1]);
13286 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
13292 gcc_assert (!cfun
->calls_alloca
|| really_return
);
13294 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
13296 cfun
->machine
->return_used_this_function
= 1;
13298 offsets
= arm_get_frame_offsets ();
13299 live_regs_mask
= offsets
->saved_regs_mask
;
13301 if (live_regs_mask
)
13303 const char * return_reg
;
13305 /* If we do not have any special requirements for function exit
13306 (e.g. interworking) then we can load the return address
13307 directly into the PC. Otherwise we must load it into LR. */
13309 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
13310 return_reg
= reg_names
[PC_REGNUM
];
13312 return_reg
= reg_names
[LR_REGNUM
];
13314 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
13316 /* There are three possible reasons for the IP register
13317 being saved. 1) a stack frame was created, in which case
13318 IP contains the old stack pointer, or 2) an ISR routine
13319 corrupted it, or 3) it was saved to align the stack on
13320 iWMMXt. In case 1, restore IP into SP, otherwise just
13322 if (frame_pointer_needed
)
13324 live_regs_mask
&= ~ (1 << IP_REGNUM
);
13325 live_regs_mask
|= (1 << SP_REGNUM
);
13328 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
13331 /* On some ARM architectures it is faster to use LDR rather than
13332 LDM to load a single register. On other architectures, the
13333 cost is the same. In 26 bit mode, or for exception handlers,
13334 we have to use LDM to load the PC so that the CPSR is also
13336 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
13337 if (live_regs_mask
== (1U << reg
))
13340 if (reg
<= LAST_ARM_REGNUM
13341 && (reg
!= LR_REGNUM
13343 || ! IS_INTERRUPT (func_type
)))
13345 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
13346 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
13353 /* Generate the load multiple instruction to restore the
13354 registers. Note we can get here, even if
13355 frame_pointer_needed is true, but only if sp already
13356 points to the base of the saved core registers. */
13357 if (live_regs_mask
& (1 << SP_REGNUM
))
13359 unsigned HOST_WIDE_INT stack_adjust
;
13361 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
13362 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
13364 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
13365 if (TARGET_UNIFIED_ASM
)
13366 sprintf (instr
, "ldmib%s\t%%|sp, {", conditional
);
13368 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
13371 /* If we can't use ldmib (SA110 bug),
13372 then try to pop r3 instead. */
13374 live_regs_mask
|= 1 << 3;
13376 if (TARGET_UNIFIED_ASM
)
13377 sprintf (instr
, "ldmfd%s\t%%|sp, {", conditional
);
13379 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
13383 if (TARGET_UNIFIED_ASM
)
13384 sprintf (instr
, "pop%s\t{", conditional
);
13386 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
13388 p
= instr
+ strlen (instr
);
13390 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
13391 if (live_regs_mask
& (1 << reg
))
13393 int l
= strlen (reg_names
[reg
]);
13399 memcpy (p
, ", ", 2);
13403 memcpy (p
, "%|", 2);
13404 memcpy (p
+ 2, reg_names
[reg
], l
);
13408 if (live_regs_mask
& (1 << LR_REGNUM
))
13410 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
13411 /* If returning from an interrupt, restore the CPSR. */
13412 if (IS_INTERRUPT (func_type
))
13419 output_asm_insn (instr
, & operand
);
13421 /* See if we need to generate an extra instruction to
13422 perform the actual function return. */
13424 && func_type
!= ARM_FT_INTERWORKED
13425 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
13427 /* The return has already been handled
13428 by loading the LR into the PC. */
13435 switch ((int) ARM_FUNC_TYPE (func_type
))
13439 /* ??? This is wrong for unified assembly syntax. */
13440 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
13443 case ARM_FT_INTERWORKED
:
13444 sprintf (instr
, "bx%s\t%%|lr", conditional
);
13447 case ARM_FT_EXCEPTION
:
13448 /* ??? This is wrong for unified assembly syntax. */
13449 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
13453 /* Use bx if it's available. */
13454 if (arm_arch5
|| arm_arch4t
)
13455 sprintf (instr
, "bx%s\t%%|lr", conditional
);
13457 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
13461 output_asm_insn (instr
, & operand
);
13467 /* Write the function name into the code section, directly preceding
13468 the function prologue.
13470 Code will be output similar to this:
13472 .ascii "arm_poke_function_name", 0
13475 .word 0xff000000 + (t1 - t0)
13476 arm_poke_function_name
13478 stmfd sp!, {fp, ip, lr, pc}
13481 When performing a stack backtrace, code can inspect the value
13482 of 'pc' stored at 'fp' + 0. If the trace function then looks
13483 at location pc - 12 and the top 8 bits are set, then we know
13484 that there is a function name embedded immediately preceding this
13485 location and has length ((pc[-3]) & 0xff000000).
13487 We assume that pc is declared as a pointer to an unsigned long.
13489 It is of no benefit to output the function name if we are assembling
13490 a leaf function. These function types will not contain a stack
13491 backtrace structure, therefore it is not possible to determine the
13494 arm_poke_function_name (FILE *stream
, const char *name
)
13496 unsigned long alignlength
;
13497 unsigned long length
;
13500 length
= strlen (name
) + 1;
13501 alignlength
= ROUND_UP_WORD (length
);
13503 ASM_OUTPUT_ASCII (stream
, name
, length
);
13504 ASM_OUTPUT_ALIGN (stream
, 2);
13505 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
13506 assemble_aligned_integer (UNITS_PER_WORD
, x
);
13509 /* Place some comments into the assembler stream
13510 describing the current function. */
13512 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
13514 unsigned long func_type
;
13518 thumb1_output_function_prologue (f
, frame_size
);
13522 /* Sanity check. */
13523 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
13525 func_type
= arm_current_func_type ();
13527 switch ((int) ARM_FUNC_TYPE (func_type
))
13530 case ARM_FT_NORMAL
:
13532 case ARM_FT_INTERWORKED
:
13533 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
13536 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
13539 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
13541 case ARM_FT_EXCEPTION
:
13542 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
13546 if (IS_NAKED (func_type
))
13547 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
13549 if (IS_VOLATILE (func_type
))
13550 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
13552 if (IS_NESTED (func_type
))
13553 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
13554 if (IS_STACKALIGN (func_type
))
13555 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
13557 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
13559 crtl
->args
.pretend_args_size
, frame_size
);
13561 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
13562 frame_pointer_needed
,
13563 cfun
->machine
->uses_anonymous_args
);
13565 if (cfun
->machine
->lr_save_eliminated
)
13566 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
13568 if (crtl
->calls_eh_return
)
13569 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
13574 arm_output_epilogue (rtx sibling
)
13577 unsigned long saved_regs_mask
;
13578 unsigned long func_type
;
13579 /* Floats_offset is the offset from the "virtual" frame. In an APCS
13580 frame that is $fp + 4 for a non-variadic function. */
13581 int floats_offset
= 0;
13583 FILE * f
= asm_out_file
;
13584 unsigned int lrm_count
= 0;
13585 int really_return
= (sibling
== NULL
);
13587 arm_stack_offsets
*offsets
;
13589 /* If we have already generated the return instruction
13590 then it is futile to generate anything else. */
13591 if (use_return_insn (FALSE
, sibling
) &&
13592 (cfun
->machine
->return_used_this_function
!= 0))
13595 func_type
= arm_current_func_type ();
13597 if (IS_NAKED (func_type
))
13598 /* Naked functions don't have epilogues. */
13601 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
13605 /* A volatile function should never return. Call abort. */
13606 op
= gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)" : "abort");
13607 assemble_external_libcall (op
);
13608 output_asm_insn ("bl\t%a0", &op
);
13613 /* If we are throwing an exception, then we really must be doing a
13614 return, so we can't tail-call. */
13615 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
13617 offsets
= arm_get_frame_offsets ();
13618 saved_regs_mask
= offsets
->saved_regs_mask
;
13621 lrm_count
= bit_count (saved_regs_mask
);
13623 floats_offset
= offsets
->saved_args
;
13624 /* Compute how far away the floats will be. */
13625 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
13626 if (saved_regs_mask
& (1 << reg
))
13627 floats_offset
+= 4;
13629 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
13631 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
13632 int vfp_offset
= offsets
->frame
;
13634 if (TARGET_FPA_EMU2
)
13636 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
13637 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13639 floats_offset
+= 12;
13640 asm_fprintf (f
, "\tldfe\t%r, [%r, #-%d]\n",
13641 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
13646 start_reg
= LAST_FPA_REGNUM
;
13648 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
13650 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13652 floats_offset
+= 12;
13654 /* We can't unstack more than four registers at once. */
13655 if (start_reg
- reg
== 3)
13657 asm_fprintf (f
, "\tlfm\t%r, 4, [%r, #-%d]\n",
13658 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
13659 start_reg
= reg
- 1;
13664 if (reg
!= start_reg
)
13665 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
13666 reg
+ 1, start_reg
- reg
,
13667 FP_REGNUM
, floats_offset
- vfp_offset
);
13668 start_reg
= reg
- 1;
13672 /* Just in case the last register checked also needs unstacking. */
13673 if (reg
!= start_reg
)
13674 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
13675 reg
+ 1, start_reg
- reg
,
13676 FP_REGNUM
, floats_offset
- vfp_offset
);
13679 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
13683 /* The fldmd insns do not have base+offset addressing
13684 modes, so we use IP to hold the address. */
13685 saved_size
= arm_get_vfp_saved_size ();
13687 if (saved_size
> 0)
13689 floats_offset
+= saved_size
;
13690 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", IP_REGNUM
,
13691 FP_REGNUM
, floats_offset
- vfp_offset
);
13693 start_reg
= FIRST_VFP_REGNUM
;
13694 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
13696 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
13697 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
13699 if (start_reg
!= reg
)
13700 vfp_output_fldmd (f
, IP_REGNUM
,
13701 (start_reg
- FIRST_VFP_REGNUM
) / 2,
13702 (reg
- start_reg
) / 2);
13703 start_reg
= reg
+ 2;
13706 if (start_reg
!= reg
)
13707 vfp_output_fldmd (f
, IP_REGNUM
,
13708 (start_reg
- FIRST_VFP_REGNUM
) / 2,
13709 (reg
- start_reg
) / 2);
13714 /* The frame pointer is guaranteed to be non-double-word aligned.
13715 This is because it is set to (old_stack_pointer - 4) and the
13716 old_stack_pointer was double word aligned. Thus the offset to
13717 the iWMMXt registers to be loaded must also be non-double-word
13718 sized, so that the resultant address *is* double-word aligned.
13719 We can ignore floats_offset since that was already included in
13720 the live_regs_mask. */
13721 lrm_count
+= (lrm_count
% 2 ? 2 : 1);
13723 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
13724 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13726 asm_fprintf (f
, "\twldrd\t%r, [%r, #-%d]\n",
13727 reg
, FP_REGNUM
, lrm_count
* 4);
13732 /* saved_regs_mask should contain the IP, which at the time of stack
13733 frame generation actually contains the old stack pointer. So a
13734 quick way to unwind the stack is just pop the IP register directly
13735 into the stack pointer. */
13736 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
13737 saved_regs_mask
&= ~ (1 << IP_REGNUM
);
13738 saved_regs_mask
|= (1 << SP_REGNUM
);
13740 /* There are two registers left in saved_regs_mask - LR and PC. We
13741 only need to restore the LR register (the return address), but to
13742 save time we can load it directly into the PC, unless we need a
13743 special function exit sequence, or we are not really returning. */
13745 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
13746 && !crtl
->calls_eh_return
)
13747 /* Delete the LR from the register mask, so that the LR on
13748 the stack is loaded into the PC in the register mask. */
13749 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
13751 saved_regs_mask
&= ~ (1 << PC_REGNUM
);
13753 /* We must use SP as the base register, because SP is one of the
13754 registers being restored. If an interrupt or page fault
13755 happens in the ldm instruction, the SP might or might not
13756 have been restored. That would be bad, as then SP will no
13757 longer indicate the safe area of stack, and we can get stack
13758 corruption. Using SP as the base register means that it will
13759 be reset correctly to the original value, should an interrupt
13760 occur. If the stack pointer already points at the right
13761 place, then omit the subtraction. */
13762 if (offsets
->outgoing_args
!= (1 + (int) bit_count (saved_regs_mask
))
13763 || cfun
->calls_alloca
)
13764 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", SP_REGNUM
, FP_REGNUM
,
13765 4 * bit_count (saved_regs_mask
));
13766 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
, 0);
13768 if (IS_INTERRUPT (func_type
))
13769 /* Interrupt handlers will have pushed the
13770 IP onto the stack, so restore it now. */
13771 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, 1 << IP_REGNUM
, 0);
13775 /* This branch is executed for ARM mode (non-apcs frames) and
13776 Thumb-2 mode. Frame layout is essentially the same for those
13777 cases, except that in ARM mode frame pointer points to the
13778 first saved register, while in Thumb-2 mode the frame pointer points
13779 to the last saved register.
13781 It is possible to make frame pointer point to last saved
13782 register in both cases, and remove some conditionals below.
13783 That means that fp setup in prologue would be just "mov fp, sp"
13784 and sp restore in epilogue would be just "mov sp, fp", whereas
13785 now we have to use add/sub in those cases. However, the value
13786 of that would be marginal, as both mov and add/sub are 32-bit
13787 in ARM mode, and it would require extra conditionals
13788 in arm_expand_prologue to distingish ARM-apcs-frame case
13789 (where frame pointer is required to point at first register)
13790 and ARM-non-apcs-frame. Therefore, such change is postponed
13791 until real need arise. */
13792 unsigned HOST_WIDE_INT amount
;
13794 /* Restore stack pointer if necessary. */
13795 if (TARGET_ARM
&& frame_pointer_needed
)
13797 operands
[0] = stack_pointer_rtx
;
13798 operands
[1] = hard_frame_pointer_rtx
;
13800 operands
[2] = GEN_INT (offsets
->frame
- offsets
->saved_regs
);
13801 output_add_immediate (operands
);
13805 if (frame_pointer_needed
)
13807 /* For Thumb-2 restore sp from the frame pointer.
13808 Operand restrictions mean we have to incrememnt FP, then copy
13810 amount
= offsets
->locals_base
- offsets
->saved_regs
;
13811 operands
[0] = hard_frame_pointer_rtx
;
13815 unsigned long count
;
13816 operands
[0] = stack_pointer_rtx
;
13817 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
13818 /* pop call clobbered registers if it avoids a
13819 separate stack adjustment. */
13820 count
= offsets
->saved_regs
- offsets
->saved_args
;
13823 && !crtl
->calls_eh_return
13824 && bit_count(saved_regs_mask
) * 4 == count
13825 && !IS_INTERRUPT (func_type
)
13826 && !crtl
->tail_call_emit
)
13828 unsigned long mask
;
13829 mask
= (1 << (arm_size_return_regs() / 4)) - 1;
13831 mask
&= ~saved_regs_mask
;
13833 while (bit_count (mask
) * 4 > amount
)
13835 while ((mask
& (1 << reg
)) == 0)
13837 mask
&= ~(1 << reg
);
13839 if (bit_count (mask
) * 4 == amount
) {
13841 saved_regs_mask
|= mask
;
13848 operands
[1] = operands
[0];
13849 operands
[2] = GEN_INT (amount
);
13850 output_add_immediate (operands
);
13852 if (frame_pointer_needed
)
13853 asm_fprintf (f
, "\tmov\t%r, %r\n",
13854 SP_REGNUM
, HARD_FRAME_POINTER_REGNUM
);
13857 if (TARGET_FPA_EMU2
)
13859 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
13860 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13861 asm_fprintf (f
, "\tldfe\t%r, [%r], #12\n",
13866 start_reg
= FIRST_FPA_REGNUM
;
13868 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
13870 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13872 if (reg
- start_reg
== 3)
13874 asm_fprintf (f
, "\tlfmfd\t%r, 4, [%r]!\n",
13875 start_reg
, SP_REGNUM
);
13876 start_reg
= reg
+ 1;
13881 if (reg
!= start_reg
)
13882 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
13883 start_reg
, reg
- start_reg
,
13886 start_reg
= reg
+ 1;
13890 /* Just in case the last register checked also needs unstacking. */
13891 if (reg
!= start_reg
)
13892 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
13893 start_reg
, reg
- start_reg
, SP_REGNUM
);
13896 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
13898 int end_reg
= LAST_VFP_REGNUM
+ 1;
13900 /* Scan the registers in reverse order. We need to match
13901 any groupings made in the prologue and generate matching
13903 for (reg
= LAST_VFP_REGNUM
- 1; reg
>= FIRST_VFP_REGNUM
; reg
-= 2)
13905 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
13906 && (!df_regs_ever_live_p (reg
+ 1)
13907 || call_used_regs
[reg
+ 1]))
13909 if (end_reg
> reg
+ 2)
13910 vfp_output_fldmd (f
, SP_REGNUM
,
13911 (reg
+ 2 - FIRST_VFP_REGNUM
) / 2,
13912 (end_reg
- (reg
+ 2)) / 2);
13916 if (end_reg
> reg
+ 2)
13917 vfp_output_fldmd (f
, SP_REGNUM
, 0,
13918 (end_reg
- (reg
+ 2)) / 2);
13922 for (reg
= FIRST_IWMMXT_REGNUM
; reg
<= LAST_IWMMXT_REGNUM
; reg
++)
13923 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
13924 asm_fprintf (f
, "\twldrd\t%r, [%r], #8\n", reg
, SP_REGNUM
);
13926 /* If we can, restore the LR into the PC. */
13927 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
13928 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
13929 && !IS_STACKALIGN (func_type
)
13931 && crtl
->args
.pretend_args_size
== 0
13932 && saved_regs_mask
& (1 << LR_REGNUM
)
13933 && !crtl
->calls_eh_return
)
13935 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
13936 saved_regs_mask
|= (1 << PC_REGNUM
);
13937 rfe
= IS_INTERRUPT (func_type
);
13942 /* Load the registers off the stack. If we only have one register
13943 to load use the LDR instruction - it is faster. For Thumb-2
13944 always use pop and the assembler will pick the best instruction.*/
13945 if (TARGET_ARM
&& saved_regs_mask
== (1 << LR_REGNUM
)
13946 && !IS_INTERRUPT(func_type
))
13948 asm_fprintf (f
, "\tldr\t%r, [%r], #4\n", LR_REGNUM
, SP_REGNUM
);
13950 else if (saved_regs_mask
)
13952 if (saved_regs_mask
& (1 << SP_REGNUM
))
13953 /* Note - write back to the stack register is not enabled
13954 (i.e. "ldmfd sp!..."). We know that the stack pointer is
13955 in the list of registers and if we add writeback the
13956 instruction becomes UNPREDICTABLE. */
13957 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
,
13959 else if (TARGET_ARM
)
13960 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, saved_regs_mask
,
13963 print_multi_reg (f
, "pop\t", SP_REGNUM
, saved_regs_mask
, 0);
13966 if (crtl
->args
.pretend_args_size
)
13968 /* Unwind the pre-pushed regs. */
13969 operands
[0] = operands
[1] = stack_pointer_rtx
;
13970 operands
[2] = GEN_INT (crtl
->args
.pretend_args_size
);
13971 output_add_immediate (operands
);
13975 /* We may have already restored PC directly from the stack. */
13976 if (!really_return
|| saved_regs_mask
& (1 << PC_REGNUM
))
13979 /* Stack adjustment for exception handler. */
13980 if (crtl
->calls_eh_return
)
13981 asm_fprintf (f
, "\tadd\t%r, %r, %r\n", SP_REGNUM
, SP_REGNUM
,
13982 ARM_EH_STACKADJ_REGNUM
);
13984 /* Generate the return instruction. */
13985 switch ((int) ARM_FUNC_TYPE (func_type
))
13989 asm_fprintf (f
, "\tsubs\t%r, %r, #4\n", PC_REGNUM
, LR_REGNUM
);
13992 case ARM_FT_EXCEPTION
:
13993 asm_fprintf (f
, "\tmovs\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
13996 case ARM_FT_INTERWORKED
:
13997 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
14001 if (IS_STACKALIGN (func_type
))
14003 /* See comment in arm_expand_prologue. */
14004 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, 0);
14006 if (arm_arch5
|| arm_arch4t
)
14007 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
14009 asm_fprintf (f
, "\tmov\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
14017 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
14018 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
14020 arm_stack_offsets
*offsets
;
14026 /* Emit any call-via-reg trampolines that are needed for v4t support
14027 of call_reg and call_value_reg type insns. */
14028 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
14030 rtx label
= cfun
->machine
->call_via
[regno
];
14034 switch_to_section (function_section (current_function_decl
));
14035 targetm
.asm_out
.internal_label (asm_out_file
, "L",
14036 CODE_LABEL_NUMBER (label
));
14037 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
14041 /* ??? Probably not safe to set this here, since it assumes that a
14042 function will be emitted as assembly immediately after we generate
14043 RTL for it. This does not happen for inline functions. */
14044 cfun
->machine
->return_used_this_function
= 0;
14046 else /* TARGET_32BIT */
14048 /* We need to take into account any stack-frame rounding. */
14049 offsets
= arm_get_frame_offsets ();
14051 gcc_assert (!use_return_insn (FALSE
, NULL
)
14052 || (cfun
->machine
->return_used_this_function
!= 0)
14053 || offsets
->saved_regs
== offsets
->outgoing_args
14054 || frame_pointer_needed
);
14056 /* Reset the ARM-specific per-function variables. */
14057 after_arm_reorg
= 0;
14061 /* Generate and emit an insn that we will recognize as a push_multi.
14062 Unfortunately, since this insn does not reflect very well the actual
14063 semantics of the operation, we need to annotate the insn for the benefit
14064 of DWARF2 frame unwind information. */
14066 emit_multi_reg_push (unsigned long mask
)
14069 int num_dwarf_regs
;
14073 int dwarf_par_index
;
14076 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
14077 if (mask
& (1 << i
))
14080 gcc_assert (num_regs
&& num_regs
<= 16);
14082 /* We don't record the PC in the dwarf frame information. */
14083 num_dwarf_regs
= num_regs
;
14084 if (mask
& (1 << PC_REGNUM
))
14087 /* For the body of the insn we are going to generate an UNSPEC in
14088 parallel with several USEs. This allows the insn to be recognized
14089 by the push_multi pattern in the arm.md file.
14091 The body of the insn looks something like this:
14094 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14095 (const_int:SI <num>)))
14096 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
14102 For the frame note however, we try to be more explicit and actually
14103 show each register being stored into the stack frame, plus a (single)
14104 decrement of the stack pointer. We do it this way in order to be
14105 friendly to the stack unwinding code, which only wants to see a single
14106 stack decrement per instruction. The RTL we generate for the note looks
14107 something like this:
14110 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
14111 (set (mem:SI (reg:SI sp)) (reg:SI r4))
14112 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
14113 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
14117 FIXME:: In an ideal world the PRE_MODIFY would not exist and
14118 instead we'd have a parallel expression detailing all
14119 the stores to the various memory addresses so that debug
14120 information is more up-to-date. Remember however while writing
14121 this to take care of the constraints with the push instruction.
14123 Note also that this has to be taken care of for the VFP registers.
14125 For more see PR43399. */
14127 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
14128 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
14129 dwarf_par_index
= 1;
14131 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
14133 if (mask
& (1 << i
))
14135 reg
= gen_rtx_REG (SImode
, i
);
14137 XVECEXP (par
, 0, 0)
14138 = gen_rtx_SET (VOIDmode
,
14141 gen_rtx_PRE_MODIFY (Pmode
,
14144 (stack_pointer_rtx
,
14147 gen_rtx_UNSPEC (BLKmode
,
14148 gen_rtvec (1, reg
),
14149 UNSPEC_PUSH_MULT
));
14151 if (i
!= PC_REGNUM
)
14153 tmp
= gen_rtx_SET (VOIDmode
,
14154 gen_frame_mem (SImode
, stack_pointer_rtx
),
14156 RTX_FRAME_RELATED_P (tmp
) = 1;
14157 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
14165 for (j
= 1, i
++; j
< num_regs
; i
++)
14167 if (mask
& (1 << i
))
14169 reg
= gen_rtx_REG (SImode
, i
);
14171 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
14173 if (i
!= PC_REGNUM
)
14176 = gen_rtx_SET (VOIDmode
,
14179 plus_constant (stack_pointer_rtx
,
14182 RTX_FRAME_RELATED_P (tmp
) = 1;
14183 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
14190 par
= emit_insn (par
);
14192 tmp
= gen_rtx_SET (VOIDmode
,
14194 plus_constant (stack_pointer_rtx
, -4 * num_regs
));
14195 RTX_FRAME_RELATED_P (tmp
) = 1;
14196 XVECEXP (dwarf
, 0, 0) = tmp
;
14198 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
14203 /* Calculate the size of the return value that is passed in registers. */
14205 arm_size_return_regs (void)
14207 enum machine_mode mode
;
14209 if (crtl
->return_rtx
!= 0)
14210 mode
= GET_MODE (crtl
->return_rtx
);
14212 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
14214 return GET_MODE_SIZE (mode
);
14218 emit_sfm (int base_reg
, int count
)
14225 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
14226 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
14228 reg
= gen_rtx_REG (XFmode
, base_reg
++);
14230 XVECEXP (par
, 0, 0)
14231 = gen_rtx_SET (VOIDmode
,
14234 gen_rtx_PRE_MODIFY (Pmode
,
14237 (stack_pointer_rtx
,
14240 gen_rtx_UNSPEC (BLKmode
,
14241 gen_rtvec (1, reg
),
14242 UNSPEC_PUSH_MULT
));
14243 tmp
= gen_rtx_SET (VOIDmode
,
14244 gen_frame_mem (XFmode
, stack_pointer_rtx
), reg
);
14245 RTX_FRAME_RELATED_P (tmp
) = 1;
14246 XVECEXP (dwarf
, 0, 1) = tmp
;
14248 for (i
= 1; i
< count
; i
++)
14250 reg
= gen_rtx_REG (XFmode
, base_reg
++);
14251 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
14253 tmp
= gen_rtx_SET (VOIDmode
,
14254 gen_frame_mem (XFmode
,
14255 plus_constant (stack_pointer_rtx
,
14258 RTX_FRAME_RELATED_P (tmp
) = 1;
14259 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
14262 tmp
= gen_rtx_SET (VOIDmode
,
14264 plus_constant (stack_pointer_rtx
, -12 * count
));
14266 RTX_FRAME_RELATED_P (tmp
) = 1;
14267 XVECEXP (dwarf
, 0, 0) = tmp
;
14269 par
= emit_insn (par
);
14270 add_reg_note (par
, REG_FRAME_RELATED_EXPR
, dwarf
);
14276 /* Return true if the current function needs to save/restore LR. */
14279 thumb_force_lr_save (void)
14281 return !cfun
->machine
->lr_save_eliminated
14282 && (!leaf_function_p ()
14283 || thumb_far_jump_used_p ()
14284 || df_regs_ever_live_p (LR_REGNUM
));
14288 /* Compute the distance from register FROM to register TO.
14289 These can be the arg pointer (26), the soft frame pointer (25),
14290 the stack pointer (13) or the hard frame pointer (11).
14291 In thumb mode r7 is used as the soft frame pointer, if needed.
14292 Typical stack layout looks like this:
14294 old stack pointer -> | |
14297 | | saved arguments for
14298 | | vararg functions
14301 hard FP & arg pointer -> | | \
14309 soft frame pointer -> | | /
14314 locals base pointer -> | | /
14319 current stack pointer -> | | /
14322 For a given function some or all of these stack components
14323 may not be needed, giving rise to the possibility of
14324 eliminating some of the registers.
14326 The values returned by this function must reflect the behavior
14327 of arm_expand_prologue() and arm_compute_save_reg_mask().
14329 The sign of the number returned reflects the direction of stack
14330 growth, so the values are positive for all eliminations except
14331 from the soft frame pointer to the hard frame pointer.
14333 SFP may point just inside the local variables block to ensure correct
14337 /* Calculate stack offsets. These are used to calculate register elimination
14338 offsets and in prologue/epilogue code. Also calculates which registers
14339 should be saved. */
14341 static arm_stack_offsets
*
14342 arm_get_frame_offsets (void)
14344 struct arm_stack_offsets
*offsets
;
14345 unsigned long func_type
;
14349 HOST_WIDE_INT frame_size
;
14352 offsets
= &cfun
->machine
->stack_offsets
;
14354 /* We need to know if we are a leaf function. Unfortunately, it
14355 is possible to be called after start_sequence has been called,
14356 which causes get_insns to return the insns for the sequence,
14357 not the function, which will cause leaf_function_p to return
14358 the incorrect result.
14360 to know about leaf functions once reload has completed, and the
14361 frame size cannot be changed after that time, so we can safely
14362 use the cached value. */
14364 if (reload_completed
)
14367 /* Initially this is the size of the local variables. It will translated
14368 into an offset once we have determined the size of preceding data. */
14369 frame_size
= ROUND_UP_WORD (get_frame_size ());
14371 leaf
= leaf_function_p ();
14373 /* Space for variadic functions. */
14374 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
14376 /* In Thumb mode this is incorrect, but never used. */
14377 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0) +
14378 arm_compute_static_chain_stack_bytes();
14382 unsigned int regno
;
14384 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
14385 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
14386 saved
= core_saved
;
14388 /* We know that SP will be doubleword aligned on entry, and we must
14389 preserve that condition at any subroutine call. We also require the
14390 soft frame pointer to be doubleword aligned. */
14392 if (TARGET_REALLY_IWMMXT
)
14394 /* Check for the call-saved iWMMXt registers. */
14395 for (regno
= FIRST_IWMMXT_REGNUM
;
14396 regno
<= LAST_IWMMXT_REGNUM
;
14398 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
14402 func_type
= arm_current_func_type ();
14403 if (! IS_VOLATILE (func_type
))
14405 /* Space for saved FPA registers. */
14406 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
14407 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
14410 /* Space for saved VFP registers. */
14411 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14412 saved
+= arm_get_vfp_saved_size ();
14415 else /* TARGET_THUMB1 */
14417 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
14418 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
14419 saved
= core_saved
;
14420 if (TARGET_BACKTRACE
)
14424 /* Saved registers include the stack frame. */
14425 offsets
->saved_regs
= offsets
->saved_args
+ saved
+
14426 arm_compute_static_chain_stack_bytes();
14427 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
14428 /* A leaf function does not need any stack alignment if it has nothing
14430 if (leaf
&& frame_size
== 0)
14432 offsets
->outgoing_args
= offsets
->soft_frame
;
14433 offsets
->locals_base
= offsets
->soft_frame
;
14437 /* Ensure SFP has the correct alignment. */
14438 if (ARM_DOUBLEWORD_ALIGN
14439 && (offsets
->soft_frame
& 7))
14441 offsets
->soft_frame
+= 4;
14442 /* Try to align stack by pushing an extra reg. Don't bother doing this
14443 when there is a stack frame as the alignment will be rolled into
14444 the normal stack adjustment. */
14445 if (frame_size
+ crtl
->outgoing_args_size
== 0)
14449 /* If it is safe to use r3, then do so. This sometimes
14450 generates better code on Thumb-2 by avoiding the need to
14451 use 32-bit push/pop instructions. */
14452 if (!crtl
->tail_call_emit
14453 && arm_size_return_regs () <= 12)
14458 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
14460 if ((offsets
->saved_regs_mask
& (1 << i
)) == 0)
14469 offsets
->saved_regs
+= 4;
14470 offsets
->saved_regs_mask
|= (1 << reg
);
14475 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
14476 offsets
->outgoing_args
= (offsets
->locals_base
14477 + crtl
->outgoing_args_size
);
14479 if (ARM_DOUBLEWORD_ALIGN
)
14481 /* Ensure SP remains doubleword aligned. */
14482 if (offsets
->outgoing_args
& 7)
14483 offsets
->outgoing_args
+= 4;
14484 gcc_assert (!(offsets
->outgoing_args
& 7));
14491 /* Calculate the relative offsets for the different stack pointers. Positive
14492 offsets are in the direction of stack growth. */
14495 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
14497 arm_stack_offsets
*offsets
;
14499 offsets
= arm_get_frame_offsets ();
14501 /* OK, now we have enough information to compute the distances.
14502 There must be an entry in these switch tables for each pair
14503 of registers in ELIMINABLE_REGS, even if some of the entries
14504 seem to be redundant or useless. */
14507 case ARG_POINTER_REGNUM
:
14510 case THUMB_HARD_FRAME_POINTER_REGNUM
:
14513 case FRAME_POINTER_REGNUM
:
14514 /* This is the reverse of the soft frame pointer
14515 to hard frame pointer elimination below. */
14516 return offsets
->soft_frame
- offsets
->saved_args
;
14518 case ARM_HARD_FRAME_POINTER_REGNUM
:
14519 /* This is only non-zero in the case where the static chain register
14520 is stored above the frame. */
14521 return offsets
->frame
- offsets
->saved_args
- 4;
14523 case STACK_POINTER_REGNUM
:
14524 /* If nothing has been pushed on the stack at all
14525 then this will return -4. This *is* correct! */
14526 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
14529 gcc_unreachable ();
14531 gcc_unreachable ();
14533 case FRAME_POINTER_REGNUM
:
14536 case THUMB_HARD_FRAME_POINTER_REGNUM
:
14539 case ARM_HARD_FRAME_POINTER_REGNUM
:
14540 /* The hard frame pointer points to the top entry in the
14541 stack frame. The soft frame pointer to the bottom entry
14542 in the stack frame. If there is no stack frame at all,
14543 then they are identical. */
14545 return offsets
->frame
- offsets
->soft_frame
;
14547 case STACK_POINTER_REGNUM
:
14548 return offsets
->outgoing_args
- offsets
->soft_frame
;
14551 gcc_unreachable ();
14553 gcc_unreachable ();
14556 /* You cannot eliminate from the stack pointer.
14557 In theory you could eliminate from the hard frame
14558 pointer to the stack pointer, but this will never
14559 happen, since if a stack frame is not needed the
14560 hard frame pointer will never be used. */
14561 gcc_unreachable ();
14565 /* Given FROM and TO register numbers, say whether this elimination is
14566 allowed. Frame pointer elimination is automatically handled.
14568 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
14569 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
14570 pointer, we must eliminate FRAME_POINTER_REGNUM into
14571 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
14572 ARG_POINTER_REGNUM. */
14575 arm_can_eliminate (const int from
, const int to
)
14577 return ((to
== FRAME_POINTER_REGNUM
&& from
== ARG_POINTER_REGNUM
) ? false :
14578 (to
== STACK_POINTER_REGNUM
&& frame_pointer_needed
) ? false :
14579 (to
== ARM_HARD_FRAME_POINTER_REGNUM
&& TARGET_THUMB
) ? false :
14580 (to
== THUMB_HARD_FRAME_POINTER_REGNUM
&& TARGET_ARM
) ? false :
14584 /* Emit RTL to save coprocessor registers on function entry. Returns the
14585 number of bytes pushed. */
14588 arm_save_coproc_regs(void)
14590 int saved_size
= 0;
14592 unsigned start_reg
;
14595 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
14596 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
14598 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
14599 insn
= gen_rtx_MEM (V2SImode
, insn
);
14600 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
14601 RTX_FRAME_RELATED_P (insn
) = 1;
14605 /* Save any floating point call-saved registers used by this
14607 if (TARGET_FPA_EMU2
)
14609 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14610 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14612 insn
= gen_rtx_PRE_DEC (Pmode
, stack_pointer_rtx
);
14613 insn
= gen_rtx_MEM (XFmode
, insn
);
14614 insn
= emit_set_insn (insn
, gen_rtx_REG (XFmode
, reg
));
14615 RTX_FRAME_RELATED_P (insn
) = 1;
14621 start_reg
= LAST_FPA_REGNUM
;
14623 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
14625 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
14627 if (start_reg
- reg
== 3)
14629 insn
= emit_sfm (reg
, 4);
14630 RTX_FRAME_RELATED_P (insn
) = 1;
14632 start_reg
= reg
- 1;
14637 if (start_reg
!= reg
)
14639 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
14640 RTX_FRAME_RELATED_P (insn
) = 1;
14641 saved_size
+= (start_reg
- reg
) * 12;
14643 start_reg
= reg
- 1;
14647 if (start_reg
!= reg
)
14649 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
14650 saved_size
+= (start_reg
- reg
) * 12;
14651 RTX_FRAME_RELATED_P (insn
) = 1;
14654 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
14656 start_reg
= FIRST_VFP_REGNUM
;
14658 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
14660 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
14661 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
14663 if (start_reg
!= reg
)
14664 saved_size
+= vfp_emit_fstmd (start_reg
,
14665 (reg
- start_reg
) / 2);
14666 start_reg
= reg
+ 2;
14669 if (start_reg
!= reg
)
14670 saved_size
+= vfp_emit_fstmd (start_reg
,
14671 (reg
- start_reg
) / 2);
14677 /* Set the Thumb frame pointer from the stack pointer. */
14680 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
14682 HOST_WIDE_INT amount
;
14685 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
14687 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
14688 stack_pointer_rtx
, GEN_INT (amount
)));
14691 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
14692 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
14693 expects the first two operands to be the same. */
14696 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
14698 hard_frame_pointer_rtx
));
14702 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
14703 hard_frame_pointer_rtx
,
14704 stack_pointer_rtx
));
14706 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
14707 plus_constant (stack_pointer_rtx
, amount
));
14708 RTX_FRAME_RELATED_P (dwarf
) = 1;
14709 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
14712 RTX_FRAME_RELATED_P (insn
) = 1;
14715 /* Generate the prologue instructions for entry into an ARM or Thumb-2
14718 arm_expand_prologue (void)
14723 unsigned long live_regs_mask
;
14724 unsigned long func_type
;
14726 int saved_pretend_args
= 0;
14727 int saved_regs
= 0;
14728 unsigned HOST_WIDE_INT args_to_push
;
14729 arm_stack_offsets
*offsets
;
14731 func_type
= arm_current_func_type ();
14733 /* Naked functions don't have prologues. */
14734 if (IS_NAKED (func_type
))
14737 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
14738 args_to_push
= crtl
->args
.pretend_args_size
;
14740 /* Compute which register we will have to save onto the stack. */
14741 offsets
= arm_get_frame_offsets ();
14742 live_regs_mask
= offsets
->saved_regs_mask
;
14744 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
14746 if (IS_STACKALIGN (func_type
))
14751 /* Handle a word-aligned stack pointer. We generate the following:
14756 <save and restore r0 in normal prologue/epilogue>
14760 The unwinder doesn't need to know about the stack realignment.
14761 Just tell it we saved SP in r0. */
14762 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
14764 r0
= gen_rtx_REG (SImode
, 0);
14765 r1
= gen_rtx_REG (SImode
, 1);
14766 /* Use a real rtvec rather than NULL_RTVEC so the rest of the
14767 compiler won't choke. */
14768 dwarf
= gen_rtx_UNSPEC (SImode
, rtvec_alloc (0), UNSPEC_STACK_ALIGN
);
14769 dwarf
= gen_rtx_SET (VOIDmode
, r0
, dwarf
);
14770 insn
= gen_movsi (r0
, stack_pointer_rtx
);
14771 RTX_FRAME_RELATED_P (insn
) = 1;
14772 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
14774 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
14775 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
14778 /* For APCS frames, if IP register is clobbered
14779 when creating frame, save that register in a special
14781 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
14783 if (IS_INTERRUPT (func_type
))
14785 /* Interrupt functions must not corrupt any registers.
14786 Creating a frame pointer however, corrupts the IP
14787 register, so we must push it first. */
14788 insn
= emit_multi_reg_push (1 << IP_REGNUM
);
14790 /* Do not set RTX_FRAME_RELATED_P on this insn.
14791 The dwarf stack unwinding code only wants to see one
14792 stack decrement per function, and this is not it. If
14793 this instruction is labeled as being part of the frame
14794 creation sequence then dwarf2out_frame_debug_expr will
14795 die when it encounters the assignment of IP to FP
14796 later on, since the use of SP here establishes SP as
14797 the CFA register and not IP.
14799 Anyway this instruction is not really part of the stack
14800 frame creation although it is part of the prologue. */
14802 else if (IS_NESTED (func_type
))
14804 /* The Static chain register is the same as the IP register
14805 used as a scratch register during stack frame creation.
14806 To get around this need to find somewhere to store IP
14807 whilst the frame is being created. We try the following
14810 1. The last argument register.
14811 2. A slot on the stack above the frame. (This only
14812 works if the function is not a varargs function).
14813 3. Register r3, after pushing the argument registers
14816 Note - we only need to tell the dwarf2 backend about the SP
14817 adjustment in the second variant; the static chain register
14818 doesn't need to be unwound, as it doesn't contain a value
14819 inherited from the caller. */
14821 if (df_regs_ever_live_p (3) == false)
14822 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
14823 else if (args_to_push
== 0)
14827 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
14830 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
14831 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
14834 /* Just tell the dwarf backend that we adjusted SP. */
14835 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
14836 plus_constant (stack_pointer_rtx
,
14838 RTX_FRAME_RELATED_P (insn
) = 1;
14839 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
14843 /* Store the args on the stack. */
14844 if (cfun
->machine
->uses_anonymous_args
)
14845 insn
= emit_multi_reg_push
14846 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
14849 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
14850 GEN_INT (- args_to_push
)));
14852 RTX_FRAME_RELATED_P (insn
) = 1;
14854 saved_pretend_args
= 1;
14855 fp_offset
= args_to_push
;
14858 /* Now reuse r3 to preserve IP. */
14859 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
14863 insn
= emit_set_insn (ip_rtx
,
14864 plus_constant (stack_pointer_rtx
, fp_offset
));
14865 RTX_FRAME_RELATED_P (insn
) = 1;
14870 /* Push the argument registers, or reserve space for them. */
14871 if (cfun
->machine
->uses_anonymous_args
)
14872 insn
= emit_multi_reg_push
14873 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
14876 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
14877 GEN_INT (- args_to_push
)));
14878 RTX_FRAME_RELATED_P (insn
) = 1;
14881 /* If this is an interrupt service routine, and the link register
14882 is going to be pushed, and we're not generating extra
14883 push of IP (needed when frame is needed and frame layout if apcs),
14884 subtracting four from LR now will mean that the function return
14885 can be done with a single instruction. */
14886 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
14887 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
14888 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
14891 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
14893 emit_set_insn (lr
, plus_constant (lr
, -4));
14896 if (live_regs_mask
)
14898 saved_regs
+= bit_count (live_regs_mask
) * 4;
14899 if (optimize_size
&& !frame_pointer_needed
14900 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
14902 /* If no coprocessor registers are being pushed and we don't have
14903 to worry about a frame pointer then push extra registers to
14904 create the stack frame. This is done is a way that does not
14905 alter the frame layout, so is independent of the epilogue. */
14909 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
14911 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
14912 if (frame
&& n
* 4 >= frame
)
14915 live_regs_mask
|= (1 << n
) - 1;
14916 saved_regs
+= frame
;
14919 insn
= emit_multi_reg_push (live_regs_mask
);
14920 RTX_FRAME_RELATED_P (insn
) = 1;
14923 if (! IS_VOLATILE (func_type
))
14924 saved_regs
+= arm_save_coproc_regs ();
14926 if (frame_pointer_needed
&& TARGET_ARM
)
14928 /* Create the new frame pointer. */
14929 if (TARGET_APCS_FRAME
)
14931 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
14932 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
14933 RTX_FRAME_RELATED_P (insn
) = 1;
14935 if (IS_NESTED (func_type
))
14937 /* Recover the static chain register. */
14938 if (!df_regs_ever_live_p (3)
14939 || saved_pretend_args
)
14940 insn
= gen_rtx_REG (SImode
, 3);
14941 else /* if (crtl->args.pretend_args_size == 0) */
14943 insn
= plus_constant (hard_frame_pointer_rtx
, 4);
14944 insn
= gen_frame_mem (SImode
, insn
);
14946 emit_set_insn (ip_rtx
, insn
);
14947 /* Add a USE to stop propagate_one_insn() from barfing. */
14948 emit_insn (gen_prologue_use (ip_rtx
));
14953 insn
= GEN_INT (saved_regs
- 4);
14954 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
14955 stack_pointer_rtx
, insn
));
14956 RTX_FRAME_RELATED_P (insn
) = 1;
14960 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
14962 /* This add can produce multiple insns for a large constant, so we
14963 need to get tricky. */
14964 rtx last
= get_last_insn ();
14966 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
14967 - offsets
->outgoing_args
);
14969 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
14973 last
= last
? NEXT_INSN (last
) : get_insns ();
14974 RTX_FRAME_RELATED_P (last
) = 1;
14976 while (last
!= insn
);
14978 /* If the frame pointer is needed, emit a special barrier that
14979 will prevent the scheduler from moving stores to the frame
14980 before the stack adjustment. */
14981 if (frame_pointer_needed
)
14982 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
14983 hard_frame_pointer_rtx
));
14987 if (frame_pointer_needed
&& TARGET_THUMB2
)
14988 thumb_set_frame_pointer (offsets
);
14990 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
14992 unsigned long mask
;
14994 mask
= live_regs_mask
;
14995 mask
&= THUMB2_WORK_REGS
;
14996 if (!IS_NESTED (func_type
))
14997 mask
|= (1 << IP_REGNUM
);
14998 arm_load_pic_register (mask
);
15001 /* If we are profiling, make sure no instructions are scheduled before
15002 the call to mcount. Similarly if the user has requested no
15003 scheduling in the prolog. Similarly if we want non-call exceptions
15004 using the EABI unwinder, to prevent faulting instructions from being
15005 swapped with a stack adjustment. */
15006 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
15007 || (ARM_EABI_UNWIND_TABLES
&& flag_non_call_exceptions
))
15008 emit_insn (gen_blockage ());
15010 /* If the link register is being kept alive, with the return address in it,
15011 then make sure that it does not get reused by the ce2 pass. */
15012 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
15013 cfun
->machine
->lr_save_eliminated
= 1;
15016 /* Print condition code to STREAM. Helper function for arm_print_operand. */
15018 arm_print_condition (FILE *stream
)
15020 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
15022 /* Branch conversion is not implemented for Thumb-2. */
15025 output_operand_lossage ("predicated Thumb instruction");
15028 if (current_insn_predicate
!= NULL
)
15030 output_operand_lossage
15031 ("predicated instruction in conditional sequence");
15035 fputs (arm_condition_codes
[arm_current_cc
], stream
);
15037 else if (current_insn_predicate
)
15039 enum arm_cond_code code
;
15043 output_operand_lossage ("predicated Thumb instruction");
15047 code
= get_arm_condition_code (current_insn_predicate
);
15048 fputs (arm_condition_codes
[code
], stream
);
15053 /* If CODE is 'd', then the X is a condition operand and the instruction
15054 should only be executed if the condition is true.
15055 if CODE is 'D', then the X is a condition operand and the instruction
15056 should only be executed if the condition is false: however, if the mode
15057 of the comparison is CCFPEmode, then always execute the instruction -- we
15058 do this because in these circumstances !GE does not necessarily imply LT;
15059 in these cases the instruction pattern will take care to make sure that
15060 an instruction containing %d will follow, thereby undoing the effects of
15061 doing this instruction unconditionally.
15062 If CODE is 'N' then X is a floating point operand that must be negated
15064 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15065 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15067 arm_print_operand (FILE *stream
, rtx x
, int code
)
15072 fputs (ASM_COMMENT_START
, stream
);
15076 fputs (user_label_prefix
, stream
);
15080 fputs (REGISTER_PREFIX
, stream
);
15084 arm_print_condition (stream
);
15088 /* Nothing in unified syntax, otherwise the current condition code. */
15089 if (!TARGET_UNIFIED_ASM
)
15090 arm_print_condition (stream
);
15094 /* The current condition code in unified syntax, otherwise nothing. */
15095 if (TARGET_UNIFIED_ASM
)
15096 arm_print_condition (stream
);
15100 /* The current condition code for a condition code setting instruction.
15101 Preceded by 's' in unified syntax, otherwise followed by 's'. */
15102 if (TARGET_UNIFIED_ASM
)
15104 fputc('s', stream
);
15105 arm_print_condition (stream
);
15109 arm_print_condition (stream
);
15110 fputc('s', stream
);
15115 /* If the instruction is conditionally executed then print
15116 the current condition code, otherwise print 's'. */
15117 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
15118 if (current_insn_predicate
)
15119 arm_print_condition (stream
);
15121 fputc('s', stream
);
15124 /* %# is a "break" sequence. It doesn't output anything, but is used to
15125 separate e.g. operand numbers from following text, if that text consists
15126 of further digits which we don't want to be part of the operand
15134 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
15135 r
= REAL_VALUE_NEGATE (r
);
15136 fprintf (stream
, "%s", fp_const_from_val (&r
));
15140 /* An integer or symbol address without a preceding # sign. */
15142 switch (GET_CODE (x
))
15145 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
15149 output_addr_const (stream
, x
);
15153 gcc_unreachable ();
15158 if (GET_CODE (x
) == CONST_INT
)
15161 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
15162 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
15166 putc ('~', stream
);
15167 output_addr_const (stream
, x
);
15172 /* The low 16 bits of an immediate constant. */
15173 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
15177 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
15180 /* Truncate Cirrus shift counts. */
15182 if (GET_CODE (x
) == CONST_INT
)
15184 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 0x3f);
15187 arm_print_operand (stream
, x
, 0);
15191 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
15199 if (!shift_operator (x
, SImode
))
15201 output_operand_lossage ("invalid shift operand");
15205 shift
= shift_op (x
, &val
);
15209 fprintf (stream
, ", %s ", shift
);
15211 arm_print_operand (stream
, XEXP (x
, 1), 0);
15213 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
15218 /* An explanation of the 'Q', 'R' and 'H' register operands:
15220 In a pair of registers containing a DI or DF value the 'Q'
15221 operand returns the register number of the register containing
15222 the least significant part of the value. The 'R' operand returns
15223 the register number of the register containing the most
15224 significant part of the value.
15226 The 'H' operand returns the higher of the two register numbers.
15227 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
15228 same as the 'Q' operand, since the most significant part of the
15229 value is held in the lower number register. The reverse is true
15230 on systems where WORDS_BIG_ENDIAN is false.
15232 The purpose of these operands is to distinguish between cases
15233 where the endian-ness of the values is important (for example
15234 when they are added together), and cases where the endian-ness
15235 is irrelevant, but the order of register operations is important.
15236 For example when loading a value from memory into a register
15237 pair, the endian-ness does not matter. Provided that the value
15238 from the lower memory address is put into the lower numbered
15239 register, and the value from the higher address is put into the
15240 higher numbered register, the load will work regardless of whether
15241 the value being loaded is big-wordian or little-wordian. The
15242 order of the two register loads can matter however, if the address
15243 of the memory location is actually held in one of the registers
15244 being overwritten by the load. */
15246 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15248 output_operand_lossage ("invalid operand for code '%c'", code
);
15252 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
15256 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15258 output_operand_lossage ("invalid operand for code '%c'", code
);
15262 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
15266 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15268 output_operand_lossage ("invalid operand for code '%c'", code
);
15272 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
15276 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15278 output_operand_lossage ("invalid operand for code '%c'", code
);
15282 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
15286 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
15288 output_operand_lossage ("invalid operand for code '%c'", code
);
15292 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
15296 asm_fprintf (stream
, "%r",
15297 GET_CODE (XEXP (x
, 0)) == REG
15298 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
15302 asm_fprintf (stream
, "{%r-%r}",
15304 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
15307 /* Like 'M', but writing doubleword vector registers, for use by Neon
15311 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
15312 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
15314 asm_fprintf (stream
, "{d%d}", regno
);
15316 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
15321 /* CONST_TRUE_RTX means always -- that's the default. */
15322 if (x
== const_true_rtx
)
15325 if (!COMPARISON_P (x
))
15327 output_operand_lossage ("invalid operand for code '%c'", code
);
15331 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
15336 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
15337 want to do that. */
15338 if (x
== const_true_rtx
)
15340 output_operand_lossage ("instruction never executed");
15343 if (!COMPARISON_P (x
))
15345 output_operand_lossage ("invalid operand for code '%c'", code
);
15349 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
15350 (get_arm_condition_code (x
))],
15354 /* Cirrus registers can be accessed in a variety of ways:
15355 single floating point (f)
15356 double floating point (d)
15358 64bit integer (dx). */
15359 case 'W': /* Cirrus register in F mode. */
15360 case 'X': /* Cirrus register in D mode. */
15361 case 'Y': /* Cirrus register in FX mode. */
15362 case 'Z': /* Cirrus register in DX mode. */
15363 gcc_assert (GET_CODE (x
) == REG
15364 && REGNO_REG_CLASS (REGNO (x
)) == CIRRUS_REGS
);
15366 fprintf (stream
, "mv%s%s",
15368 : code
== 'X' ? "d"
15369 : code
== 'Y' ? "fx" : "dx", reg_names
[REGNO (x
)] + 2);
15373 /* Print cirrus register in the mode specified by the register's mode. */
15376 int mode
= GET_MODE (x
);
15378 if (GET_CODE (x
) != REG
|| REGNO_REG_CLASS (REGNO (x
)) != CIRRUS_REGS
)
15380 output_operand_lossage ("invalid operand for code '%c'", code
);
15384 fprintf (stream
, "mv%s%s",
15385 mode
== DFmode
? "d"
15386 : mode
== SImode
? "fx"
15387 : mode
== DImode
? "dx"
15388 : "f", reg_names
[REGNO (x
)] + 2);
15394 if (GET_CODE (x
) != REG
15395 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
15396 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
15397 /* Bad value for wCG register number. */
15399 output_operand_lossage ("invalid operand for code '%c'", code
);
15404 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
15407 /* Print an iWMMXt control register name. */
15409 if (GET_CODE (x
) != CONST_INT
15411 || INTVAL (x
) >= 16)
15412 /* Bad value for wC register number. */
15414 output_operand_lossage ("invalid operand for code '%c'", code
);
15420 static const char * wc_reg_names
[16] =
15422 "wCID", "wCon", "wCSSF", "wCASF",
15423 "wC4", "wC5", "wC6", "wC7",
15424 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
15425 "wC12", "wC13", "wC14", "wC15"
15428 fprintf (stream
, wc_reg_names
[INTVAL (x
)]);
15432 /* Print the high single-precision register of a VFP double-precision
15436 int mode
= GET_MODE (x
);
15439 if (GET_MODE_SIZE (mode
) != 8 || GET_CODE (x
) != REG
)
15441 output_operand_lossage ("invalid operand for code '%c'", code
);
15446 if (!VFP_REGNO_OK_FOR_DOUBLE (regno
))
15448 output_operand_lossage ("invalid operand for code '%c'", code
);
15452 fprintf (stream
, "s%d", regno
- FIRST_VFP_REGNUM
+ 1);
15456 /* Print a VFP/Neon double precision or quad precision register name. */
15460 int mode
= GET_MODE (x
);
15461 int is_quad
= (code
== 'q');
15464 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
15466 output_operand_lossage ("invalid operand for code '%c'", code
);
15470 if (GET_CODE (x
) != REG
15471 || !IS_VFP_REGNUM (REGNO (x
)))
15473 output_operand_lossage ("invalid operand for code '%c'", code
);
15478 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
15479 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
15481 output_operand_lossage ("invalid operand for code '%c'", code
);
15485 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
15486 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
15490 /* These two codes print the low/high doubleword register of a Neon quad
15491 register, respectively. For pair-structure types, can also print
15492 low/high quadword registers. */
15496 int mode
= GET_MODE (x
);
15499 if ((GET_MODE_SIZE (mode
) != 16
15500 && GET_MODE_SIZE (mode
) != 32) || GET_CODE (x
) != REG
)
15502 output_operand_lossage ("invalid operand for code '%c'", code
);
15507 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
15509 output_operand_lossage ("invalid operand for code '%c'", code
);
15513 if (GET_MODE_SIZE (mode
) == 16)
15514 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
15515 + (code
== 'f' ? 1 : 0));
15517 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
15518 + (code
== 'f' ? 1 : 0));
15522 /* Print a VFPv3 floating-point constant, represented as an integer
15526 int index
= vfp3_const_double_index (x
);
15527 gcc_assert (index
!= -1);
15528 fprintf (stream
, "%d", index
);
15532 /* Print bits representing opcode features for Neon.
15534 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
15535 and polynomials as unsigned.
15537 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
15539 Bit 2 is 1 for rounding functions, 0 otherwise. */
15541 /* Identify the type as 's', 'u', 'p' or 'f'. */
15544 HOST_WIDE_INT bits
= INTVAL (x
);
15545 fputc ("uspf"[bits
& 3], stream
);
15549 /* Likewise, but signed and unsigned integers are both 'i'. */
15552 HOST_WIDE_INT bits
= INTVAL (x
);
15553 fputc ("iipf"[bits
& 3], stream
);
15557 /* As for 'T', but emit 'u' instead of 'p'. */
15560 HOST_WIDE_INT bits
= INTVAL (x
);
15561 fputc ("usuf"[bits
& 3], stream
);
15565 /* Bit 2: rounding (vs none). */
15568 HOST_WIDE_INT bits
= INTVAL (x
);
15569 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
15573 /* Memory operand for vld1/vst1 instruction. */
15577 bool postinc
= FALSE
;
15578 gcc_assert (GET_CODE (x
) == MEM
);
15579 addr
= XEXP (x
, 0);
15580 if (GET_CODE (addr
) == POST_INC
)
15583 addr
= XEXP (addr
, 0);
15585 asm_fprintf (stream
, "[%r]", REGNO (addr
));
15587 fputs("!", stream
);
15591 /* Translate an S register number into a D register number and element index. */
15594 int mode
= GET_MODE (x
);
15597 if (GET_MODE_SIZE (mode
) != 4 || GET_CODE (x
) != REG
)
15599 output_operand_lossage ("invalid operand for code '%c'", code
);
15604 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
15606 output_operand_lossage ("invalid operand for code '%c'", code
);
15610 regno
= regno
- FIRST_VFP_REGNUM
;
15611 fprintf (stream
, "d%d[%d]", regno
/ 2, regno
% 2);
15615 /* Register specifier for vld1.16/vst1.16. Translate the S register
15616 number into a D register number and element index. */
15619 int mode
= GET_MODE (x
);
15622 if (GET_MODE_SIZE (mode
) != 2 || GET_CODE (x
) != REG
)
15624 output_operand_lossage ("invalid operand for code '%c'", code
);
15629 if (!VFP_REGNO_OK_FOR_SINGLE (regno
))
15631 output_operand_lossage ("invalid operand for code '%c'", code
);
15635 regno
= regno
- FIRST_VFP_REGNUM
;
15636 fprintf (stream
, "d%d[%d]", regno
/2, ((regno
% 2) ? 2 : 0));
15643 output_operand_lossage ("missing operand");
15647 switch (GET_CODE (x
))
15650 asm_fprintf (stream
, "%r", REGNO (x
));
15654 output_memory_reference_mode
= GET_MODE (x
);
15655 output_address (XEXP (x
, 0));
15662 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
15663 sizeof (fpstr
), 0, 1);
15664 fprintf (stream
, "#%s", fpstr
);
15667 fprintf (stream
, "#%s", fp_immediate_constant (x
));
15671 gcc_assert (GET_CODE (x
) != NEG
);
15672 fputc ('#', stream
);
15673 if (GET_CODE (x
) == HIGH
)
15675 fputs (":lower16:", stream
);
15679 output_addr_const (stream
, x
);
15685 /* Target hook for assembling integer objects. The ARM version needs to
15686 handle word-sized values specially. */
15688 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
15690 enum machine_mode mode
;
15692 if (size
== UNITS_PER_WORD
&& aligned_p
)
15694 fputs ("\t.word\t", asm_out_file
);
15695 output_addr_const (asm_out_file
, x
);
15697 /* Mark symbols as position independent. We only do this in the
15698 .text segment, not in the .data segment. */
15699 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
15700 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
15702 /* See legitimize_pic_address for an explanation of the
15703 TARGET_VXWORKS_RTP check. */
15704 if (TARGET_VXWORKS_RTP
15705 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
15706 fputs ("(GOT)", asm_out_file
);
15708 fputs ("(GOTOFF)", asm_out_file
);
15710 fputc ('\n', asm_out_file
);
15714 mode
= GET_MODE (x
);
15716 if (arm_vector_mode_supported_p (mode
))
15720 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
15722 units
= CONST_VECTOR_NUNITS (x
);
15723 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
15725 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
15726 for (i
= 0; i
< units
; i
++)
15728 rtx elt
= CONST_VECTOR_ELT (x
, i
);
15730 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
15733 for (i
= 0; i
< units
; i
++)
15735 rtx elt
= CONST_VECTOR_ELT (x
, i
);
15736 REAL_VALUE_TYPE rval
;
15738 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
15741 (rval
, GET_MODE_INNER (mode
),
15742 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
15748 return default_assemble_integer (x
, size
, aligned_p
);
15752 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
15756 if (!TARGET_AAPCS_BASED
)
15759 default_named_section_asm_out_constructor
15760 : default_named_section_asm_out_destructor
) (symbol
, priority
);
15764 /* Put these in the .init_array section, using a special relocation. */
15765 if (priority
!= DEFAULT_INIT_PRIORITY
)
15768 sprintf (buf
, "%s.%.5u",
15769 is_ctor
? ".init_array" : ".fini_array",
15771 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
15778 switch_to_section (s
);
15779 assemble_align (POINTER_SIZE
);
15780 fputs ("\t.word\t", asm_out_file
);
15781 output_addr_const (asm_out_file
, symbol
);
15782 fputs ("(target1)\n", asm_out_file
);
15785 /* Add a function to the list of static constructors. */
15788 arm_elf_asm_constructor (rtx symbol
, int priority
)
15790 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
15793 /* Add a function to the list of static destructors. */
15796 arm_elf_asm_destructor (rtx symbol
, int priority
)
15798 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
15801 /* A finite state machine takes care of noticing whether or not instructions
15802 can be conditionally executed, and thus decrease execution time and code
15803 size by deleting branch instructions. The fsm is controlled by
15804 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
15806 /* The state of the fsm controlling condition codes are:
15807 0: normal, do nothing special
15808 1: make ASM_OUTPUT_OPCODE not output this instruction
15809 2: make ASM_OUTPUT_OPCODE not output this instruction
15810 3: make instructions conditional
15811 4: make instructions conditional
15813 State transitions (state->state by whom under condition):
15814 0 -> 1 final_prescan_insn if the `target' is a label
15815 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
15816 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
15817 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
15818 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
15819 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
15820 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
15821 (the target insn is arm_target_insn).
15823 If the jump clobbers the conditions then we use states 2 and 4.
15825 A similar thing can be done with conditional return insns.
15827 XXX In case the `target' is an unconditional branch, this conditionalising
15828 of the instructions always reduces code size, but not always execution
15829 time. But then, I want to reduce the code size to somewhere near what
15830 /bin/cc produces. */
15832 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
15833 instructions. When a COND_EXEC instruction is seen the subsequent
15834 instructions are scanned so that multiple conditional instructions can be
15835 combined into a single IT block. arm_condexec_count and arm_condexec_mask
15836 specify the length and true/false mask for the IT block. These will be
15837 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
15839 /* Returns the index of the ARM condition code string in
15840 `arm_condition_codes'. COMPARISON should be an rtx like
15841 `(eq (...) (...))'. */
15842 static enum arm_cond_code
15843 get_arm_condition_code (rtx comparison
)
15845 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
15846 enum arm_cond_code code
;
15847 enum rtx_code comp_code
= GET_CODE (comparison
);
15849 if (GET_MODE_CLASS (mode
) != MODE_CC
)
15850 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
15851 XEXP (comparison
, 1));
15855 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
15856 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
15857 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
15858 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
15859 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
15860 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
15861 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
15862 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
15863 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
15864 case CC_DLTUmode
: code
= ARM_CC
;
15867 gcc_assert (comp_code
== EQ
|| comp_code
== NE
);
15869 if (comp_code
== EQ
)
15870 return ARM_INVERSE_CONDITION_CODE (code
);
15876 case NE
: return ARM_NE
;
15877 case EQ
: return ARM_EQ
;
15878 case GE
: return ARM_PL
;
15879 case LT
: return ARM_MI
;
15880 default: gcc_unreachable ();
15886 case NE
: return ARM_NE
;
15887 case EQ
: return ARM_EQ
;
15888 default: gcc_unreachable ();
15894 case NE
: return ARM_MI
;
15895 case EQ
: return ARM_PL
;
15896 default: gcc_unreachable ();
15901 /* These encodings assume that AC=1 in the FPA system control
15902 byte. This allows us to handle all cases except UNEQ and
15906 case GE
: return ARM_GE
;
15907 case GT
: return ARM_GT
;
15908 case LE
: return ARM_LS
;
15909 case LT
: return ARM_MI
;
15910 case NE
: return ARM_NE
;
15911 case EQ
: return ARM_EQ
;
15912 case ORDERED
: return ARM_VC
;
15913 case UNORDERED
: return ARM_VS
;
15914 case UNLT
: return ARM_LT
;
15915 case UNLE
: return ARM_LE
;
15916 case UNGT
: return ARM_HI
;
15917 case UNGE
: return ARM_PL
;
15918 /* UNEQ and LTGT do not have a representation. */
15919 case UNEQ
: /* Fall through. */
15920 case LTGT
: /* Fall through. */
15921 default: gcc_unreachable ();
15927 case NE
: return ARM_NE
;
15928 case EQ
: return ARM_EQ
;
15929 case GE
: return ARM_LE
;
15930 case GT
: return ARM_LT
;
15931 case LE
: return ARM_GE
;
15932 case LT
: return ARM_GT
;
15933 case GEU
: return ARM_LS
;
15934 case GTU
: return ARM_CC
;
15935 case LEU
: return ARM_CS
;
15936 case LTU
: return ARM_HI
;
15937 default: gcc_unreachable ();
15943 case LTU
: return ARM_CS
;
15944 case GEU
: return ARM_CC
;
15945 default: gcc_unreachable ();
15951 case NE
: return ARM_NE
;
15952 case EQ
: return ARM_EQ
;
15953 case GE
: return ARM_GE
;
15954 case GT
: return ARM_GT
;
15955 case LE
: return ARM_LE
;
15956 case LT
: return ARM_LT
;
15957 case GEU
: return ARM_CS
;
15958 case GTU
: return ARM_HI
;
15959 case LEU
: return ARM_LS
;
15960 case LTU
: return ARM_CC
;
15961 default: gcc_unreachable ();
15964 default: gcc_unreachable ();
15968 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
15971 thumb2_final_prescan_insn (rtx insn
)
15973 rtx first_insn
= insn
;
15974 rtx body
= PATTERN (insn
);
15976 enum arm_cond_code code
;
15980 /* Remove the previous insn from the count of insns to be output. */
15981 if (arm_condexec_count
)
15982 arm_condexec_count
--;
15984 /* Nothing to do if we are already inside a conditional block. */
15985 if (arm_condexec_count
)
15988 if (GET_CODE (body
) != COND_EXEC
)
15991 /* Conditional jumps are implemented directly. */
15992 if (GET_CODE (insn
) == JUMP_INSN
)
15995 predicate
= COND_EXEC_TEST (body
);
15996 arm_current_cc
= get_arm_condition_code (predicate
);
15998 n
= get_attr_ce_count (insn
);
15999 arm_condexec_count
= 1;
16000 arm_condexec_mask
= (1 << n
) - 1;
16001 arm_condexec_masklen
= n
;
16002 /* See if subsequent instructions can be combined into the same block. */
16005 insn
= next_nonnote_insn (insn
);
16007 /* Jumping into the middle of an IT block is illegal, so a label or
16008 barrier terminates the block. */
16009 if (GET_CODE (insn
) != INSN
&& GET_CODE(insn
) != JUMP_INSN
)
16012 body
= PATTERN (insn
);
16013 /* USE and CLOBBER aren't really insns, so just skip them. */
16014 if (GET_CODE (body
) == USE
16015 || GET_CODE (body
) == CLOBBER
)
16018 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
16019 if (GET_CODE (body
) != COND_EXEC
)
16021 /* Allow up to 4 conditionally executed instructions in a block. */
16022 n
= get_attr_ce_count (insn
);
16023 if (arm_condexec_masklen
+ n
> 4)
16026 predicate
= COND_EXEC_TEST (body
);
16027 code
= get_arm_condition_code (predicate
);
16028 mask
= (1 << n
) - 1;
16029 if (arm_current_cc
== code
)
16030 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
16031 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
16034 arm_condexec_count
++;
16035 arm_condexec_masklen
+= n
;
16037 /* A jump must be the last instruction in a conditional block. */
16038 if (GET_CODE(insn
) == JUMP_INSN
)
16041 /* Restore recog_data (getting the attributes of other insns can
16042 destroy this array, but final.c assumes that it remains intact
16043 across this call). */
16044 extract_constrain_insn_cached (first_insn
);
16048 arm_final_prescan_insn (rtx insn
)
16050 /* BODY will hold the body of INSN. */
16051 rtx body
= PATTERN (insn
);
16053 /* This will be 1 if trying to repeat the trick, and things need to be
16054 reversed if it appears to fail. */
16057 /* If we start with a return insn, we only succeed if we find another one. */
16058 int seeking_return
= 0;
16060 /* START_INSN will hold the insn from where we start looking. This is the
16061 first insn after the following code_label if REVERSE is true. */
16062 rtx start_insn
= insn
;
16064 /* If in state 4, check if the target branch is reached, in order to
16065 change back to state 0. */
16066 if (arm_ccfsm_state
== 4)
16068 if (insn
== arm_target_insn
)
16070 arm_target_insn
= NULL
;
16071 arm_ccfsm_state
= 0;
16076 /* If in state 3, it is possible to repeat the trick, if this insn is an
16077 unconditional branch to a label, and immediately following this branch
16078 is the previous target label which is only used once, and the label this
16079 branch jumps to is not too far off. */
16080 if (arm_ccfsm_state
== 3)
16082 if (simplejump_p (insn
))
16084 start_insn
= next_nonnote_insn (start_insn
);
16085 if (GET_CODE (start_insn
) == BARRIER
)
16087 /* XXX Isn't this always a barrier? */
16088 start_insn
= next_nonnote_insn (start_insn
);
16090 if (GET_CODE (start_insn
) == CODE_LABEL
16091 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
16092 && LABEL_NUSES (start_insn
) == 1)
16097 else if (GET_CODE (body
) == RETURN
)
16099 start_insn
= next_nonnote_insn (start_insn
);
16100 if (GET_CODE (start_insn
) == BARRIER
)
16101 start_insn
= next_nonnote_insn (start_insn
);
16102 if (GET_CODE (start_insn
) == CODE_LABEL
16103 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
16104 && LABEL_NUSES (start_insn
) == 1)
16107 seeking_return
= 1;
16116 gcc_assert (!arm_ccfsm_state
|| reverse
);
16117 if (GET_CODE (insn
) != JUMP_INSN
)
16120 /* This jump might be paralleled with a clobber of the condition codes
16121 the jump should always come first */
16122 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
16123 body
= XVECEXP (body
, 0, 0);
16126 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
16127 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
16130 int fail
= FALSE
, succeed
= FALSE
;
16131 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
16132 int then_not_else
= TRUE
;
16133 rtx this_insn
= start_insn
, label
= 0;
16135 /* Register the insn jumped to. */
16138 if (!seeking_return
)
16139 label
= XEXP (SET_SRC (body
), 0);
16141 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
16142 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
16143 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
16145 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
16146 then_not_else
= FALSE
;
16148 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == RETURN
)
16149 seeking_return
= 1;
16150 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == RETURN
)
16152 seeking_return
= 1;
16153 then_not_else
= FALSE
;
16156 gcc_unreachable ();
16158 /* See how many insns this branch skips, and what kind of insns. If all
16159 insns are okay, and the label or unconditional branch to the same
16160 label is not too far away, succeed. */
16161 for (insns_skipped
= 0;
16162 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
16166 this_insn
= next_nonnote_insn (this_insn
);
16170 switch (GET_CODE (this_insn
))
16173 /* Succeed if it is the target label, otherwise fail since
16174 control falls in from somewhere else. */
16175 if (this_insn
== label
)
16177 arm_ccfsm_state
= 1;
16185 /* Succeed if the following insn is the target label.
16187 If return insns are used then the last insn in a function
16188 will be a barrier. */
16189 this_insn
= next_nonnote_insn (this_insn
);
16190 if (this_insn
&& this_insn
== label
)
16192 arm_ccfsm_state
= 1;
16200 /* The AAPCS says that conditional calls should not be
16201 used since they make interworking inefficient (the
16202 linker can't transform BL<cond> into BLX). That's
16203 only a problem if the machine has BLX. */
16210 /* Succeed if the following insn is the target label, or
16211 if the following two insns are a barrier and the
16213 this_insn
= next_nonnote_insn (this_insn
);
16214 if (this_insn
&& GET_CODE (this_insn
) == BARRIER
)
16215 this_insn
= next_nonnote_insn (this_insn
);
16217 if (this_insn
&& this_insn
== label
16218 && insns_skipped
< max_insns_skipped
)
16220 arm_ccfsm_state
= 1;
16228 /* If this is an unconditional branch to the same label, succeed.
16229 If it is to another label, do nothing. If it is conditional,
16231 /* XXX Probably, the tests for SET and the PC are
16234 scanbody
= PATTERN (this_insn
);
16235 if (GET_CODE (scanbody
) == SET
16236 && GET_CODE (SET_DEST (scanbody
)) == PC
)
16238 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
16239 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
16241 arm_ccfsm_state
= 2;
16244 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
16247 /* Fail if a conditional return is undesirable (e.g. on a
16248 StrongARM), but still allow this if optimizing for size. */
16249 else if (GET_CODE (scanbody
) == RETURN
16250 && !use_return_insn (TRUE
, NULL
)
16253 else if (GET_CODE (scanbody
) == RETURN
16256 arm_ccfsm_state
= 2;
16259 else if (GET_CODE (scanbody
) == PARALLEL
)
16261 switch (get_attr_conds (this_insn
))
16271 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
16276 /* Instructions using or affecting the condition codes make it
16278 scanbody
= PATTERN (this_insn
);
16279 if (!(GET_CODE (scanbody
) == SET
16280 || GET_CODE (scanbody
) == PARALLEL
)
16281 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
16284 /* A conditional cirrus instruction must be followed by
16285 a non Cirrus instruction. However, since we
16286 conditionalize instructions in this function and by
16287 the time we get here we can't add instructions
16288 (nops), because shorten_branches() has already been
16289 called, we will disable conditionalizing Cirrus
16290 instructions to be safe. */
16291 if (GET_CODE (scanbody
) != USE
16292 && GET_CODE (scanbody
) != CLOBBER
16293 && get_attr_cirrus (this_insn
) != CIRRUS_NOT
)
16303 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
16304 arm_target_label
= CODE_LABEL_NUMBER (label
);
16307 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
16309 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
16311 this_insn
= next_nonnote_insn (this_insn
);
16312 gcc_assert (!this_insn
16313 || (GET_CODE (this_insn
) != BARRIER
16314 && GET_CODE (this_insn
) != CODE_LABEL
));
16318 /* Oh, dear! we ran off the end.. give up. */
16319 extract_constrain_insn_cached (insn
);
16320 arm_ccfsm_state
= 0;
16321 arm_target_insn
= NULL
;
16324 arm_target_insn
= this_insn
;
16327 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
16330 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
), 0));
16332 if (reverse
|| then_not_else
)
16333 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
16336 /* Restore recog_data (getting the attributes of other insns can
16337 destroy this array, but final.c assumes that it remains intact
16338 across this call. */
16339 extract_constrain_insn_cached (insn
);
16343 /* Output IT instructions. */
16345 thumb2_asm_output_opcode (FILE * stream
)
16350 if (arm_condexec_mask
)
16352 for (n
= 0; n
< arm_condexec_masklen
; n
++)
16353 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
16355 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
16356 arm_condition_codes
[arm_current_cc
]);
16357 arm_condexec_mask
= 0;
16361 /* Returns true if REGNO is a valid register
16362 for holding a quantity of type MODE. */
16364 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
16366 if (GET_MODE_CLASS (mode
) == MODE_CC
)
16367 return (regno
== CC_REGNUM
16368 || (TARGET_HARD_FLOAT
&& TARGET_VFP
16369 && regno
== VFPCC_REGNUM
));
16372 /* For the Thumb we only allow values bigger than SImode in
16373 registers 0 - 6, so that there is always a second low
16374 register available to hold the upper part of the value.
16375 We probably we ought to ensure that the register is the
16376 start of an even numbered register pair. */
16377 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
16379 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
16380 && IS_CIRRUS_REGNUM (regno
))
16381 /* We have outlawed SI values in Cirrus registers because they
16382 reside in the lower 32 bits, but SF values reside in the
16383 upper 32 bits. This causes gcc all sorts of grief. We can't
16384 even split the registers into pairs because Cirrus SI values
16385 get sign extended to 64bits-- aldyh. */
16386 return (GET_MODE_CLASS (mode
) == MODE_FLOAT
) || (mode
== DImode
);
16388 if (TARGET_HARD_FLOAT
&& TARGET_VFP
16389 && IS_VFP_REGNUM (regno
))
16391 if (mode
== SFmode
|| mode
== SImode
)
16392 return VFP_REGNO_OK_FOR_SINGLE (regno
);
16394 if (mode
== DFmode
)
16395 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
16397 /* VFP registers can hold HFmode values, but there is no point in
16398 putting them there unless we have hardware conversion insns. */
16399 if (mode
== HFmode
)
16400 return TARGET_FP16
&& VFP_REGNO_OK_FOR_SINGLE (regno
);
16403 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
16404 || (VALID_NEON_QREG_MODE (mode
)
16405 && NEON_REGNO_OK_FOR_QUAD (regno
))
16406 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
16407 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
16408 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
16409 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
16410 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
16415 if (TARGET_REALLY_IWMMXT
)
16417 if (IS_IWMMXT_GR_REGNUM (regno
))
16418 return mode
== SImode
;
16420 if (IS_IWMMXT_REGNUM (regno
))
16421 return VALID_IWMMXT_REG_MODE (mode
);
16424 /* We allow almost any value to be stored in the general registers.
16425 Restrict doubleword quantities to even register pairs so that we can
16426 use ldrd. Do not allow very large Neon structure opaque modes in
16427 general registers; they would use too many. */
16428 if (regno
<= LAST_ARM_REGNUM
)
16429 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
16430 && ARM_NUM_REGS (mode
) <= 4;
16432 if (regno
== FRAME_POINTER_REGNUM
16433 || regno
== ARG_POINTER_REGNUM
)
16434 /* We only allow integers in the fake hard registers. */
16435 return GET_MODE_CLASS (mode
) == MODE_INT
;
16437 /* The only registers left are the FPA registers
16438 which we only allow to hold FP values. */
16439 return (TARGET_HARD_FLOAT
&& TARGET_FPA
16440 && GET_MODE_CLASS (mode
) == MODE_FLOAT
16441 && regno
>= FIRST_FPA_REGNUM
16442 && regno
<= LAST_FPA_REGNUM
);
16445 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
16446 not used in arm mode. */
16449 arm_regno_class (int regno
)
16453 if (regno
== STACK_POINTER_REGNUM
)
16455 if (regno
== CC_REGNUM
)
16462 if (TARGET_THUMB2
&& regno
< 8)
16465 if ( regno
<= LAST_ARM_REGNUM
16466 || regno
== FRAME_POINTER_REGNUM
16467 || regno
== ARG_POINTER_REGNUM
)
16468 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
16470 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
16471 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
16473 if (IS_CIRRUS_REGNUM (regno
))
16474 return CIRRUS_REGS
;
16476 if (IS_VFP_REGNUM (regno
))
16478 if (regno
<= D7_VFP_REGNUM
)
16479 return VFP_D0_D7_REGS
;
16480 else if (regno
<= LAST_LO_VFP_REGNUM
)
16481 return VFP_LO_REGS
;
16483 return VFP_HI_REGS
;
16486 if (IS_IWMMXT_REGNUM (regno
))
16487 return IWMMXT_REGS
;
16489 if (IS_IWMMXT_GR_REGNUM (regno
))
16490 return IWMMXT_GR_REGS
;
16495 /* Handle a special case when computing the offset
16496 of an argument from the frame pointer. */
16498 arm_debugger_arg_offset (int value
, rtx addr
)
16502 /* We are only interested if dbxout_parms() failed to compute the offset. */
16506 /* We can only cope with the case where the address is held in a register. */
16507 if (GET_CODE (addr
) != REG
)
16510 /* If we are using the frame pointer to point at the argument, then
16511 an offset of 0 is correct. */
16512 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
16515 /* If we are using the stack pointer to point at the
16516 argument, then an offset of 0 is correct. */
16517 /* ??? Check this is consistent with thumb2 frame layout. */
16518 if ((TARGET_THUMB
|| !frame_pointer_needed
)
16519 && REGNO (addr
) == SP_REGNUM
)
16522 /* Oh dear. The argument is pointed to by a register rather
16523 than being held in a register, or being stored at a known
16524 offset from the frame pointer. Since GDB only understands
16525 those two kinds of argument we must translate the address
16526 held in the register into an offset from the frame pointer.
16527 We do this by searching through the insns for the function
16528 looking to see where this register gets its value. If the
16529 register is initialized from the frame pointer plus an offset
16530 then we are in luck and we can continue, otherwise we give up.
16532 This code is exercised by producing debugging information
16533 for a function with arguments like this:
16535 double func (double a, double b, int c, double d) {return d;}
16537 Without this code the stab for parameter 'd' will be set to
16538 an offset of 0 from the frame pointer, rather than 8. */
16540 /* The if() statement says:
16542 If the insn is a normal instruction
16543 and if the insn is setting the value in a register
16544 and if the register being set is the register holding the address of the argument
16545 and if the address is computing by an addition
16546 that involves adding to a register
16547 which is the frame pointer
16552 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
16554 if ( GET_CODE (insn
) == INSN
16555 && GET_CODE (PATTERN (insn
)) == SET
16556 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
16557 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
16558 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 0)) == REG
16559 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
16560 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 1)) == CONST_INT
16563 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
16572 warning (0, "unable to compute real location of stacked parameter");
16573 value
= 8; /* XXX magic hack */
16579 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
16582 if ((MASK) & insn_flags) \
16583 add_builtin_function ((NAME), (TYPE), (CODE), \
16584 BUILT_IN_MD, NULL, NULL_TREE); \
16588 struct builtin_description
16590 const unsigned int mask
;
16591 const enum insn_code icode
;
16592 const char * const name
;
16593 const enum arm_builtins code
;
16594 const enum rtx_code comparison
;
16595 const unsigned int flag
;
16598 static const struct builtin_description bdesc_2arg
[] =
16600 #define IWMMXT_BUILTIN(code, string, builtin) \
16601 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
16602 ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16604 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
16605 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
16606 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
16607 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
16608 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
16609 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
16610 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
16611 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
16612 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
16613 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
16614 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
16615 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
16616 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
16617 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
16618 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
16619 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
16620 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
16621 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
16622 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
16623 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
16624 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
16625 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
16626 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
16627 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
16628 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
16629 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
16630 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
16631 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
16632 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
16633 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
16634 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
16635 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
16636 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
16637 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
16638 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
16639 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
16640 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
16641 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
16642 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
16643 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
16644 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
16645 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
16646 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
16647 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
16648 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
16649 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
16650 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
16651 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
16652 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
16653 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
16654 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
16655 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
16656 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
16657 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
16658 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
16659 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
16660 IWMMXT_BUILTIN (iwmmxt_wmadds
, "wmadds", WMADDS
)
16661 IWMMXT_BUILTIN (iwmmxt_wmaddu
, "wmaddu", WMADDU
)
16663 #define IWMMXT_BUILTIN2(code, builtin) \
16664 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, UNKNOWN, 0 },
16666 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
16667 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
16668 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
16669 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
16670 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
16671 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
16672 IWMMXT_BUILTIN2 (ashlv4hi3_di
, WSLLH
)
16673 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt
, WSLLHI
)
16674 IWMMXT_BUILTIN2 (ashlv2si3_di
, WSLLW
)
16675 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt
, WSLLWI
)
16676 IWMMXT_BUILTIN2 (ashldi3_di
, WSLLD
)
16677 IWMMXT_BUILTIN2 (ashldi3_iwmmxt
, WSLLDI
)
16678 IWMMXT_BUILTIN2 (lshrv4hi3_di
, WSRLH
)
16679 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt
, WSRLHI
)
16680 IWMMXT_BUILTIN2 (lshrv2si3_di
, WSRLW
)
16681 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt
, WSRLWI
)
16682 IWMMXT_BUILTIN2 (lshrdi3_di
, WSRLD
)
16683 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt
, WSRLDI
)
16684 IWMMXT_BUILTIN2 (ashrv4hi3_di
, WSRAH
)
16685 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt
, WSRAHI
)
16686 IWMMXT_BUILTIN2 (ashrv2si3_di
, WSRAW
)
16687 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt
, WSRAWI
)
16688 IWMMXT_BUILTIN2 (ashrdi3_di
, WSRAD
)
16689 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt
, WSRADI
)
16690 IWMMXT_BUILTIN2 (rorv4hi3_di
, WRORH
)
16691 IWMMXT_BUILTIN2 (rorv4hi3
, WRORHI
)
16692 IWMMXT_BUILTIN2 (rorv2si3_di
, WRORW
)
16693 IWMMXT_BUILTIN2 (rorv2si3
, WRORWI
)
16694 IWMMXT_BUILTIN2 (rordi3_di
, WRORD
)
16695 IWMMXT_BUILTIN2 (rordi3
, WRORDI
)
16696 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
16697 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
16700 static const struct builtin_description bdesc_1arg
[] =
16702 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
16703 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
16704 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
16705 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
16706 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
16707 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
16708 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
16709 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
16710 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
16711 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
16712 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
16713 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
16714 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
16715 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
16716 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
16717 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
16718 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
16719 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
16722 /* Set up all the iWMMXt builtins. This is
16723 not called if TARGET_IWMMXT is zero. */
16726 arm_init_iwmmxt_builtins (void)
16728 const struct builtin_description
* d
;
16730 tree endlink
= void_list_node
;
16732 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
16733 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
16734 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
16737 = build_function_type (integer_type_node
,
16738 tree_cons (NULL_TREE
, integer_type_node
, endlink
));
16739 tree v8qi_ftype_v8qi_v8qi_int
16740 = build_function_type (V8QI_type_node
,
16741 tree_cons (NULL_TREE
, V8QI_type_node
,
16742 tree_cons (NULL_TREE
, V8QI_type_node
,
16743 tree_cons (NULL_TREE
,
16746 tree v4hi_ftype_v4hi_int
16747 = build_function_type (V4HI_type_node
,
16748 tree_cons (NULL_TREE
, V4HI_type_node
,
16749 tree_cons (NULL_TREE
, integer_type_node
,
16751 tree v2si_ftype_v2si_int
16752 = build_function_type (V2SI_type_node
,
16753 tree_cons (NULL_TREE
, V2SI_type_node
,
16754 tree_cons (NULL_TREE
, integer_type_node
,
16756 tree v2si_ftype_di_di
16757 = build_function_type (V2SI_type_node
,
16758 tree_cons (NULL_TREE
, long_long_integer_type_node
,
16759 tree_cons (NULL_TREE
, long_long_integer_type_node
,
16761 tree di_ftype_di_int
16762 = build_function_type (long_long_integer_type_node
,
16763 tree_cons (NULL_TREE
, long_long_integer_type_node
,
16764 tree_cons (NULL_TREE
, integer_type_node
,
16766 tree di_ftype_di_int_int
16767 = build_function_type (long_long_integer_type_node
,
16768 tree_cons (NULL_TREE
, long_long_integer_type_node
,
16769 tree_cons (NULL_TREE
, integer_type_node
,
16770 tree_cons (NULL_TREE
,
16773 tree int_ftype_v8qi
16774 = build_function_type (integer_type_node
,
16775 tree_cons (NULL_TREE
, V8QI_type_node
,
16777 tree int_ftype_v4hi
16778 = build_function_type (integer_type_node
,
16779 tree_cons (NULL_TREE
, V4HI_type_node
,
16781 tree int_ftype_v2si
16782 = build_function_type (integer_type_node
,
16783 tree_cons (NULL_TREE
, V2SI_type_node
,
16785 tree int_ftype_v8qi_int
16786 = build_function_type (integer_type_node
,
16787 tree_cons (NULL_TREE
, V8QI_type_node
,
16788 tree_cons (NULL_TREE
, integer_type_node
,
16790 tree int_ftype_v4hi_int
16791 = build_function_type (integer_type_node
,
16792 tree_cons (NULL_TREE
, V4HI_type_node
,
16793 tree_cons (NULL_TREE
, integer_type_node
,
16795 tree int_ftype_v2si_int
16796 = build_function_type (integer_type_node
,
16797 tree_cons (NULL_TREE
, V2SI_type_node
,
16798 tree_cons (NULL_TREE
, integer_type_node
,
16800 tree v8qi_ftype_v8qi_int_int
16801 = build_function_type (V8QI_type_node
,
16802 tree_cons (NULL_TREE
, V8QI_type_node
,
16803 tree_cons (NULL_TREE
, integer_type_node
,
16804 tree_cons (NULL_TREE
,
16807 tree v4hi_ftype_v4hi_int_int
16808 = build_function_type (V4HI_type_node
,
16809 tree_cons (NULL_TREE
, V4HI_type_node
,
16810 tree_cons (NULL_TREE
, integer_type_node
,
16811 tree_cons (NULL_TREE
,
16814 tree v2si_ftype_v2si_int_int
16815 = build_function_type (V2SI_type_node
,
16816 tree_cons (NULL_TREE
, V2SI_type_node
,
16817 tree_cons (NULL_TREE
, integer_type_node
,
16818 tree_cons (NULL_TREE
,
16821 /* Miscellaneous. */
16822 tree v8qi_ftype_v4hi_v4hi
16823 = build_function_type (V8QI_type_node
,
16824 tree_cons (NULL_TREE
, V4HI_type_node
,
16825 tree_cons (NULL_TREE
, V4HI_type_node
,
16827 tree v4hi_ftype_v2si_v2si
16828 = build_function_type (V4HI_type_node
,
16829 tree_cons (NULL_TREE
, V2SI_type_node
,
16830 tree_cons (NULL_TREE
, V2SI_type_node
,
16832 tree v2si_ftype_v4hi_v4hi
16833 = build_function_type (V2SI_type_node
,
16834 tree_cons (NULL_TREE
, V4HI_type_node
,
16835 tree_cons (NULL_TREE
, V4HI_type_node
,
16837 tree v2si_ftype_v8qi_v8qi
16838 = build_function_type (V2SI_type_node
,
16839 tree_cons (NULL_TREE
, V8QI_type_node
,
16840 tree_cons (NULL_TREE
, V8QI_type_node
,
16842 tree v4hi_ftype_v4hi_di
16843 = build_function_type (V4HI_type_node
,
16844 tree_cons (NULL_TREE
, V4HI_type_node
,
16845 tree_cons (NULL_TREE
,
16846 long_long_integer_type_node
,
16848 tree v2si_ftype_v2si_di
16849 = build_function_type (V2SI_type_node
,
16850 tree_cons (NULL_TREE
, V2SI_type_node
,
16851 tree_cons (NULL_TREE
,
16852 long_long_integer_type_node
,
16854 tree void_ftype_int_int
16855 = build_function_type (void_type_node
,
16856 tree_cons (NULL_TREE
, integer_type_node
,
16857 tree_cons (NULL_TREE
, integer_type_node
,
16860 = build_function_type (long_long_unsigned_type_node
, endlink
);
16862 = build_function_type (long_long_integer_type_node
,
16863 tree_cons (NULL_TREE
, V8QI_type_node
,
16866 = build_function_type (long_long_integer_type_node
,
16867 tree_cons (NULL_TREE
, V4HI_type_node
,
16870 = build_function_type (long_long_integer_type_node
,
16871 tree_cons (NULL_TREE
, V2SI_type_node
,
16873 tree v2si_ftype_v4hi
16874 = build_function_type (V2SI_type_node
,
16875 tree_cons (NULL_TREE
, V4HI_type_node
,
16877 tree v4hi_ftype_v8qi
16878 = build_function_type (V4HI_type_node
,
16879 tree_cons (NULL_TREE
, V8QI_type_node
,
16882 tree di_ftype_di_v4hi_v4hi
16883 = build_function_type (long_long_unsigned_type_node
,
16884 tree_cons (NULL_TREE
,
16885 long_long_unsigned_type_node
,
16886 tree_cons (NULL_TREE
, V4HI_type_node
,
16887 tree_cons (NULL_TREE
,
16891 tree di_ftype_v4hi_v4hi
16892 = build_function_type (long_long_unsigned_type_node
,
16893 tree_cons (NULL_TREE
, V4HI_type_node
,
16894 tree_cons (NULL_TREE
, V4HI_type_node
,
16897 /* Normal vector binops. */
16898 tree v8qi_ftype_v8qi_v8qi
16899 = build_function_type (V8QI_type_node
,
16900 tree_cons (NULL_TREE
, V8QI_type_node
,
16901 tree_cons (NULL_TREE
, V8QI_type_node
,
16903 tree v4hi_ftype_v4hi_v4hi
16904 = build_function_type (V4HI_type_node
,
16905 tree_cons (NULL_TREE
, V4HI_type_node
,
16906 tree_cons (NULL_TREE
, V4HI_type_node
,
16908 tree v2si_ftype_v2si_v2si
16909 = build_function_type (V2SI_type_node
,
16910 tree_cons (NULL_TREE
, V2SI_type_node
,
16911 tree_cons (NULL_TREE
, V2SI_type_node
,
16913 tree di_ftype_di_di
16914 = build_function_type (long_long_unsigned_type_node
,
16915 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
16916 tree_cons (NULL_TREE
,
16917 long_long_unsigned_type_node
,
16920 /* Add all builtins that are more or less simple operations on two
16922 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16924 /* Use one of the operands; the target can have a different mode for
16925 mask-generating compares. */
16926 enum machine_mode mode
;
16932 mode
= insn_data
[d
->icode
].operand
[1].mode
;
16937 type
= v8qi_ftype_v8qi_v8qi
;
16940 type
= v4hi_ftype_v4hi_v4hi
;
16943 type
= v2si_ftype_v2si_v2si
;
16946 type
= di_ftype_di_di
;
16950 gcc_unreachable ();
16953 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
16956 /* Add the remaining MMX insns with somewhat more complicated types. */
16957 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wzero", di_ftype_void
, ARM_BUILTIN_WZERO
);
16958 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_setwcx", void_ftype_int_int
, ARM_BUILTIN_SETWCX
);
16959 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_getwcx", int_ftype_int
, ARM_BUILTIN_GETWCX
);
16961 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSLLH
);
16962 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSLLW
);
16963 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wslld", di_ftype_di_di
, ARM_BUILTIN_WSLLD
);
16964 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSLLHI
);
16965 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSLLWI
);
16966 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wslldi", di_ftype_di_int
, ARM_BUILTIN_WSLLDI
);
16968 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSRLH
);
16969 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSRLW
);
16970 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrld", di_ftype_di_di
, ARM_BUILTIN_WSRLD
);
16971 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSRLHI
);
16972 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSRLWI
);
16973 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrldi", di_ftype_di_int
, ARM_BUILTIN_WSRLDI
);
16975 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSRAH
);
16976 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsraw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSRAW
);
16977 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrad", di_ftype_di_di
, ARM_BUILTIN_WSRAD
);
16978 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSRAHI
);
16979 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrawi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSRAWI
);
16980 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsradi", di_ftype_di_int
, ARM_BUILTIN_WSRADI
);
16982 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WRORH
);
16983 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorw", v2si_ftype_v2si_di
, ARM_BUILTIN_WRORW
);
16984 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrord", di_ftype_di_di
, ARM_BUILTIN_WRORD
);
16985 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WRORHI
);
16986 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WRORWI
);
16987 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrordi", di_ftype_di_int
, ARM_BUILTIN_WRORDI
);
16989 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSHUFH
);
16991 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi
, ARM_BUILTIN_WSADB
);
16992 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi
, ARM_BUILTIN_WSADH
);
16993 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi
, ARM_BUILTIN_WSADBZ
);
16994 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi
, ARM_BUILTIN_WSADHZ
);
16996 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsb", int_ftype_v8qi_int
, ARM_BUILTIN_TEXTRMSB
);
16997 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsh", int_ftype_v4hi_int
, ARM_BUILTIN_TEXTRMSH
);
16998 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsw", int_ftype_v2si_int
, ARM_BUILTIN_TEXTRMSW
);
16999 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmub", int_ftype_v8qi_int
, ARM_BUILTIN_TEXTRMUB
);
17000 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmuh", int_ftype_v4hi_int
, ARM_BUILTIN_TEXTRMUH
);
17001 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmuw", int_ftype_v2si_int
, ARM_BUILTIN_TEXTRMUW
);
17002 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int
, ARM_BUILTIN_TINSRB
);
17003 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int
, ARM_BUILTIN_TINSRH
);
17004 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int
, ARM_BUILTIN_TINSRW
);
17006 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_waccb", di_ftype_v8qi
, ARM_BUILTIN_WACCB
);
17007 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wacch", di_ftype_v4hi
, ARM_BUILTIN_WACCH
);
17008 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_waccw", di_ftype_v2si
, ARM_BUILTIN_WACCW
);
17010 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskb", int_ftype_v8qi
, ARM_BUILTIN_TMOVMSKB
);
17011 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskh", int_ftype_v4hi
, ARM_BUILTIN_TMOVMSKH
);
17012 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskw", int_ftype_v2si
, ARM_BUILTIN_TMOVMSKW
);
17014 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi
, ARM_BUILTIN_WPACKHSS
);
17015 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi
, ARM_BUILTIN_WPACKHUS
);
17016 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si
, ARM_BUILTIN_WPACKWUS
);
17017 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si
, ARM_BUILTIN_WPACKWSS
);
17018 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackdus", v2si_ftype_di_di
, ARM_BUILTIN_WPACKDUS
);
17019 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackdss", v2si_ftype_di_di
, ARM_BUILTIN_WPACKDSS
);
17021 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKEHUB
);
17022 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKEHUH
);
17023 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehuw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKEHUW
);
17024 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKEHSB
);
17025 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKEHSH
);
17026 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKEHSW
);
17027 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKELUB
);
17028 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKELUH
);
17029 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckeluw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKELUW
);
17030 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKELSB
);
17031 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKELSH
);
17032 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKELSW
);
17034 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi
, ARM_BUILTIN_WMACS
);
17035 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi
, ARM_BUILTIN_WMACSZ
);
17036 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi
, ARM_BUILTIN_WMACU
);
17037 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi
, ARM_BUILTIN_WMACUZ
);
17039 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int
, ARM_BUILTIN_WALIGN
);
17040 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmia", di_ftype_di_int_int
, ARM_BUILTIN_TMIA
);
17041 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiaph", di_ftype_di_int_int
, ARM_BUILTIN_TMIAPH
);
17042 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiabb", di_ftype_di_int_int
, ARM_BUILTIN_TMIABB
);
17043 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiabt", di_ftype_di_int_int
, ARM_BUILTIN_TMIABT
);
17044 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiatb", di_ftype_di_int_int
, ARM_BUILTIN_TMIATB
);
17045 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiatt", di_ftype_di_int_int
, ARM_BUILTIN_TMIATT
);
17049 arm_init_tls_builtins (void)
17053 ftype
= build_function_type (ptr_type_node
, void_list_node
);
17054 decl
= add_builtin_function ("__builtin_thread_pointer", ftype
,
17055 ARM_BUILTIN_THREAD_POINTER
, BUILT_IN_MD
,
17057 TREE_NOTHROW (decl
) = 1;
17058 TREE_READONLY (decl
) = 1;
17061 enum neon_builtin_type_bits
{
17077 #define v8qi_UP T_V8QI
17078 #define v4hi_UP T_V4HI
17079 #define v2si_UP T_V2SI
17080 #define v2sf_UP T_V2SF
17082 #define v16qi_UP T_V16QI
17083 #define v8hi_UP T_V8HI
17084 #define v4si_UP T_V4SI
17085 #define v4sf_UP T_V4SF
17086 #define v2di_UP T_V2DI
17091 #define UP(X) X##_UP
17126 NEON_LOADSTRUCTLANE
,
17128 NEON_STORESTRUCTLANE
,
17137 const neon_itype itype
;
17139 const enum insn_code codes
[T_MAX
];
17140 const unsigned int num_vars
;
17141 unsigned int base_fcode
;
17142 } neon_builtin_datum
;
17144 #define CF(N,X) CODE_FOR_neon_##N##X
17146 #define VAR1(T, N, A) \
17147 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
17148 #define VAR2(T, N, A, B) \
17149 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
17150 #define VAR3(T, N, A, B, C) \
17151 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
17152 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
17153 #define VAR4(T, N, A, B, C, D) \
17154 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
17155 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
17156 #define VAR5(T, N, A, B, C, D, E) \
17157 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
17158 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
17159 #define VAR6(T, N, A, B, C, D, E, F) \
17160 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
17161 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
17162 #define VAR7(T, N, A, B, C, D, E, F, G) \
17163 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
17164 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17166 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
17167 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17169 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17170 CF (N, G), CF (N, H) }, 8, 0
17171 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
17172 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17173 | UP (H) | UP (I), \
17174 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17175 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
17176 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
17177 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
17178 | UP (H) | UP (I) | UP (J), \
17179 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
17180 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
17182 /* The mode entries in the following table correspond to the "key" type of the
17183 instruction variant, i.e. equivalent to that which would be specified after
17184 the assembler mnemonic, which usually refers to the last vector operand.
17185 (Signed/unsigned/polynomial types are not differentiated between though, and
17186 are all mapped onto the same mode for a given element size.) The modes
17187 listed per instruction should be the same as those defined for that
17188 instruction's pattern in neon.md.
17189 WARNING: Variants should be listed in the same increasing order as
17190 neon_builtin_type_bits. */
17192 static neon_builtin_datum neon_builtin_data
[] =
17194 { VAR10 (BINOP
, vadd
,
17195 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17196 { VAR3 (BINOP
, vaddl
, v8qi
, v4hi
, v2si
) },
17197 { VAR3 (BINOP
, vaddw
, v8qi
, v4hi
, v2si
) },
17198 { VAR6 (BINOP
, vhadd
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17199 { VAR8 (BINOP
, vqadd
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17200 { VAR3 (BINOP
, vaddhn
, v8hi
, v4si
, v2di
) },
17201 { VAR8 (BINOP
, vmul
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17202 { VAR8 (TERNOP
, vmla
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17203 { VAR3 (TERNOP
, vmlal
, v8qi
, v4hi
, v2si
) },
17204 { VAR8 (TERNOP
, vmls
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17205 { VAR3 (TERNOP
, vmlsl
, v8qi
, v4hi
, v2si
) },
17206 { VAR4 (BINOP
, vqdmulh
, v4hi
, v2si
, v8hi
, v4si
) },
17207 { VAR2 (TERNOP
, vqdmlal
, v4hi
, v2si
) },
17208 { VAR2 (TERNOP
, vqdmlsl
, v4hi
, v2si
) },
17209 { VAR3 (BINOP
, vmull
, v8qi
, v4hi
, v2si
) },
17210 { VAR2 (SCALARMULL
, vmull_n
, v4hi
, v2si
) },
17211 { VAR2 (LANEMULL
, vmull_lane
, v4hi
, v2si
) },
17212 { VAR2 (SCALARMULL
, vqdmull_n
, v4hi
, v2si
) },
17213 { VAR2 (LANEMULL
, vqdmull_lane
, v4hi
, v2si
) },
17214 { VAR4 (SCALARMULH
, vqdmulh_n
, v4hi
, v2si
, v8hi
, v4si
) },
17215 { VAR4 (LANEMULH
, vqdmulh_lane
, v4hi
, v2si
, v8hi
, v4si
) },
17216 { VAR2 (BINOP
, vqdmull
, v4hi
, v2si
) },
17217 { VAR8 (BINOP
, vshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17218 { VAR8 (BINOP
, vqshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17219 { VAR8 (SHIFTIMM
, vshr_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17220 { VAR3 (SHIFTIMM
, vshrn_n
, v8hi
, v4si
, v2di
) },
17221 { VAR3 (SHIFTIMM
, vqshrn_n
, v8hi
, v4si
, v2di
) },
17222 { VAR3 (SHIFTIMM
, vqshrun_n
, v8hi
, v4si
, v2di
) },
17223 { VAR8 (SHIFTIMM
, vshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17224 { VAR8 (SHIFTIMM
, vqshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17225 { VAR8 (SHIFTIMM
, vqshlu_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17226 { VAR3 (SHIFTIMM
, vshll_n
, v8qi
, v4hi
, v2si
) },
17227 { VAR8 (SHIFTACC
, vsra_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17228 { VAR10 (BINOP
, vsub
,
17229 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17230 { VAR3 (BINOP
, vsubl
, v8qi
, v4hi
, v2si
) },
17231 { VAR3 (BINOP
, vsubw
, v8qi
, v4hi
, v2si
) },
17232 { VAR8 (BINOP
, vqsub
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17233 { VAR6 (BINOP
, vhsub
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17234 { VAR3 (BINOP
, vsubhn
, v8hi
, v4si
, v2di
) },
17235 { VAR8 (BINOP
, vceq
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17236 { VAR8 (BINOP
, vcge
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17237 { VAR8 (BINOP
, vcgt
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17238 { VAR2 (BINOP
, vcage
, v2sf
, v4sf
) },
17239 { VAR2 (BINOP
, vcagt
, v2sf
, v4sf
) },
17240 { VAR6 (BINOP
, vtst
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17241 { VAR8 (BINOP
, vabd
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17242 { VAR3 (BINOP
, vabdl
, v8qi
, v4hi
, v2si
) },
17243 { VAR6 (TERNOP
, vaba
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17244 { VAR3 (TERNOP
, vabal
, v8qi
, v4hi
, v2si
) },
17245 { VAR8 (BINOP
, vmax
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17246 { VAR8 (BINOP
, vmin
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17247 { VAR4 (BINOP
, vpadd
, v8qi
, v4hi
, v2si
, v2sf
) },
17248 { VAR6 (UNOP
, vpaddl
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17249 { VAR6 (BINOP
, vpadal
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17250 { VAR4 (BINOP
, vpmax
, v8qi
, v4hi
, v2si
, v2sf
) },
17251 { VAR4 (BINOP
, vpmin
, v8qi
, v4hi
, v2si
, v2sf
) },
17252 { VAR2 (BINOP
, vrecps
, v2sf
, v4sf
) },
17253 { VAR2 (BINOP
, vrsqrts
, v2sf
, v4sf
) },
17254 { VAR8 (SHIFTINSERT
, vsri_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17255 { VAR8 (SHIFTINSERT
, vsli_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
17256 { VAR8 (UNOP
, vabs
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17257 { VAR6 (UNOP
, vqabs
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17258 { VAR8 (UNOP
, vneg
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17259 { VAR6 (UNOP
, vqneg
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17260 { VAR6 (UNOP
, vcls
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17261 { VAR6 (UNOP
, vclz
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17262 { VAR2 (UNOP
, vcnt
, v8qi
, v16qi
) },
17263 { VAR4 (UNOP
, vrecpe
, v2si
, v2sf
, v4si
, v4sf
) },
17264 { VAR4 (UNOP
, vrsqrte
, v2si
, v2sf
, v4si
, v4sf
) },
17265 { VAR6 (UNOP
, vmvn
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
17266 /* FIXME: vget_lane supports more variants than this! */
17267 { VAR10 (GETLANE
, vget_lane
,
17268 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17269 { VAR10 (SETLANE
, vset_lane
,
17270 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17271 { VAR5 (CREATE
, vcreate
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17272 { VAR10 (DUP
, vdup_n
,
17273 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17274 { VAR10 (DUPLANE
, vdup_lane
,
17275 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17276 { VAR5 (COMBINE
, vcombine
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17277 { VAR5 (SPLIT
, vget_high
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17278 { VAR5 (SPLIT
, vget_low
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17279 { VAR3 (UNOP
, vmovn
, v8hi
, v4si
, v2di
) },
17280 { VAR3 (UNOP
, vqmovn
, v8hi
, v4si
, v2di
) },
17281 { VAR3 (UNOP
, vqmovun
, v8hi
, v4si
, v2di
) },
17282 { VAR3 (UNOP
, vmovl
, v8qi
, v4hi
, v2si
) },
17283 { VAR6 (LANEMUL
, vmul_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17284 { VAR6 (LANEMAC
, vmla_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17285 { VAR2 (LANEMAC
, vmlal_lane
, v4hi
, v2si
) },
17286 { VAR2 (LANEMAC
, vqdmlal_lane
, v4hi
, v2si
) },
17287 { VAR6 (LANEMAC
, vmls_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17288 { VAR2 (LANEMAC
, vmlsl_lane
, v4hi
, v2si
) },
17289 { VAR2 (LANEMAC
, vqdmlsl_lane
, v4hi
, v2si
) },
17290 { VAR6 (SCALARMUL
, vmul_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17291 { VAR6 (SCALARMAC
, vmla_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17292 { VAR2 (SCALARMAC
, vmlal_n
, v4hi
, v2si
) },
17293 { VAR2 (SCALARMAC
, vqdmlal_n
, v4hi
, v2si
) },
17294 { VAR6 (SCALARMAC
, vmls_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17295 { VAR2 (SCALARMAC
, vmlsl_n
, v4hi
, v2si
) },
17296 { VAR2 (SCALARMAC
, vqdmlsl_n
, v4hi
, v2si
) },
17297 { VAR10 (BINOP
, vext
,
17298 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17299 { VAR8 (UNOP
, vrev64
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17300 { VAR4 (UNOP
, vrev32
, v8qi
, v4hi
, v16qi
, v8hi
) },
17301 { VAR2 (UNOP
, vrev16
, v8qi
, v16qi
) },
17302 { VAR4 (CONVERT
, vcvt
, v2si
, v2sf
, v4si
, v4sf
) },
17303 { VAR4 (FIXCONV
, vcvt_n
, v2si
, v2sf
, v4si
, v4sf
) },
17304 { VAR10 (SELECT
, vbsl
,
17305 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17306 { VAR1 (VTBL
, vtbl1
, v8qi
) },
17307 { VAR1 (VTBL
, vtbl2
, v8qi
) },
17308 { VAR1 (VTBL
, vtbl3
, v8qi
) },
17309 { VAR1 (VTBL
, vtbl4
, v8qi
) },
17310 { VAR1 (VTBX
, vtbx1
, v8qi
) },
17311 { VAR1 (VTBX
, vtbx2
, v8qi
) },
17312 { VAR1 (VTBX
, vtbx3
, v8qi
) },
17313 { VAR1 (VTBX
, vtbx4
, v8qi
) },
17314 { VAR8 (RESULTPAIR
, vtrn
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17315 { VAR8 (RESULTPAIR
, vzip
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17316 { VAR8 (RESULTPAIR
, vuzp
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
17317 { VAR5 (REINTERP
, vreinterpretv8qi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17318 { VAR5 (REINTERP
, vreinterpretv4hi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17319 { VAR5 (REINTERP
, vreinterpretv2si
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17320 { VAR5 (REINTERP
, vreinterpretv2sf
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17321 { VAR5 (REINTERP
, vreinterpretdi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17322 { VAR5 (REINTERP
, vreinterpretv16qi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17323 { VAR5 (REINTERP
, vreinterpretv8hi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17324 { VAR5 (REINTERP
, vreinterpretv4si
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17325 { VAR5 (REINTERP
, vreinterpretv4sf
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17326 { VAR5 (REINTERP
, vreinterpretv2di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17327 { VAR10 (LOAD1
, vld1
,
17328 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17329 { VAR10 (LOAD1LANE
, vld1_lane
,
17330 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17331 { VAR10 (LOAD1
, vld1_dup
,
17332 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17333 { VAR10 (STORE1
, vst1
,
17334 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17335 { VAR10 (STORE1LANE
, vst1_lane
,
17336 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17337 { VAR9 (LOADSTRUCT
,
17338 vld2
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17339 { VAR7 (LOADSTRUCTLANE
, vld2_lane
,
17340 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17341 { VAR5 (LOADSTRUCT
, vld2_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17342 { VAR9 (STORESTRUCT
, vst2
,
17343 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17344 { VAR7 (STORESTRUCTLANE
, vst2_lane
,
17345 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17346 { VAR9 (LOADSTRUCT
,
17347 vld3
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17348 { VAR7 (LOADSTRUCTLANE
, vld3_lane
,
17349 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17350 { VAR5 (LOADSTRUCT
, vld3_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17351 { VAR9 (STORESTRUCT
, vst3
,
17352 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17353 { VAR7 (STORESTRUCTLANE
, vst3_lane
,
17354 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17355 { VAR9 (LOADSTRUCT
, vld4
,
17356 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17357 { VAR7 (LOADSTRUCTLANE
, vld4_lane
,
17358 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17359 { VAR5 (LOADSTRUCT
, vld4_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
17360 { VAR9 (STORESTRUCT
, vst4
,
17361 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
17362 { VAR7 (STORESTRUCTLANE
, vst4_lane
,
17363 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
17364 { VAR10 (LOGICBINOP
, vand
,
17365 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17366 { VAR10 (LOGICBINOP
, vorr
,
17367 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17368 { VAR10 (BINOP
, veor
,
17369 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17370 { VAR10 (LOGICBINOP
, vbic
,
17371 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
17372 { VAR10 (LOGICBINOP
, vorn
,
17373 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) }
17389 arm_init_neon_builtins (void)
17391 unsigned int i
, fcode
= ARM_BUILTIN_NEON_BASE
;
17393 tree neon_intQI_type_node
;
17394 tree neon_intHI_type_node
;
17395 tree neon_polyQI_type_node
;
17396 tree neon_polyHI_type_node
;
17397 tree neon_intSI_type_node
;
17398 tree neon_intDI_type_node
;
17399 tree neon_float_type_node
;
17401 tree intQI_pointer_node
;
17402 tree intHI_pointer_node
;
17403 tree intSI_pointer_node
;
17404 tree intDI_pointer_node
;
17405 tree float_pointer_node
;
17407 tree const_intQI_node
;
17408 tree const_intHI_node
;
17409 tree const_intSI_node
;
17410 tree const_intDI_node
;
17411 tree const_float_node
;
17413 tree const_intQI_pointer_node
;
17414 tree const_intHI_pointer_node
;
17415 tree const_intSI_pointer_node
;
17416 tree const_intDI_pointer_node
;
17417 tree const_float_pointer_node
;
17419 tree V8QI_type_node
;
17420 tree V4HI_type_node
;
17421 tree V2SI_type_node
;
17422 tree V2SF_type_node
;
17423 tree V16QI_type_node
;
17424 tree V8HI_type_node
;
17425 tree V4SI_type_node
;
17426 tree V4SF_type_node
;
17427 tree V2DI_type_node
;
17429 tree intUQI_type_node
;
17430 tree intUHI_type_node
;
17431 tree intUSI_type_node
;
17432 tree intUDI_type_node
;
17434 tree intEI_type_node
;
17435 tree intOI_type_node
;
17436 tree intCI_type_node
;
17437 tree intXI_type_node
;
17439 tree V8QI_pointer_node
;
17440 tree V4HI_pointer_node
;
17441 tree V2SI_pointer_node
;
17442 tree V2SF_pointer_node
;
17443 tree V16QI_pointer_node
;
17444 tree V8HI_pointer_node
;
17445 tree V4SI_pointer_node
;
17446 tree V4SF_pointer_node
;
17447 tree V2DI_pointer_node
;
17449 tree void_ftype_pv8qi_v8qi_v8qi
;
17450 tree void_ftype_pv4hi_v4hi_v4hi
;
17451 tree void_ftype_pv2si_v2si_v2si
;
17452 tree void_ftype_pv2sf_v2sf_v2sf
;
17453 tree void_ftype_pdi_di_di
;
17454 tree void_ftype_pv16qi_v16qi_v16qi
;
17455 tree void_ftype_pv8hi_v8hi_v8hi
;
17456 tree void_ftype_pv4si_v4si_v4si
;
17457 tree void_ftype_pv4sf_v4sf_v4sf
;
17458 tree void_ftype_pv2di_v2di_v2di
;
17460 tree reinterp_ftype_dreg
[5][5];
17461 tree reinterp_ftype_qreg
[5][5];
17462 tree dreg_types
[5], qreg_types
[5];
17464 /* Create distinguished type nodes for NEON vector element types,
17465 and pointers to values of such types, so we can detect them later. */
17466 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
17467 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
17468 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
17469 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
17470 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
17471 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
17472 neon_float_type_node
= make_node (REAL_TYPE
);
17473 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
17474 layout_type (neon_float_type_node
);
17476 /* Define typedefs which exactly correspond to the modes we are basing vector
17477 types on. If you change these names you'll need to change
17478 the table used by arm_mangle_type too. */
17479 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
17480 "__builtin_neon_qi");
17481 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
17482 "__builtin_neon_hi");
17483 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
17484 "__builtin_neon_si");
17485 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
17486 "__builtin_neon_sf");
17487 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
17488 "__builtin_neon_di");
17489 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
17490 "__builtin_neon_poly8");
17491 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
17492 "__builtin_neon_poly16");
17494 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
17495 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
17496 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
17497 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
17498 float_pointer_node
= build_pointer_type (neon_float_type_node
);
17500 /* Next create constant-qualified versions of the above types. */
17501 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
17503 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
17505 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
17507 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
17509 const_float_node
= build_qualified_type (neon_float_type_node
,
17512 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
17513 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
17514 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
17515 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
17516 const_float_pointer_node
= build_pointer_type (const_float_node
);
17518 /* Now create vector types based on our NEON element types. */
17519 /* 64-bit vectors. */
17521 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
17523 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
17525 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
17527 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
17528 /* 128-bit vectors. */
17530 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
17532 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
17534 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
17536 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
17538 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
17540 /* Unsigned integer types for various mode sizes. */
17541 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
17542 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
17543 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
17544 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
17546 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
17547 "__builtin_neon_uqi");
17548 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
17549 "__builtin_neon_uhi");
17550 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
17551 "__builtin_neon_usi");
17552 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
17553 "__builtin_neon_udi");
17555 /* Opaque integer types for structures of vectors. */
17556 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
17557 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
17558 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
17559 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
17561 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
17562 "__builtin_neon_ti");
17563 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
17564 "__builtin_neon_ei");
17565 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
17566 "__builtin_neon_oi");
17567 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
17568 "__builtin_neon_ci");
17569 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
17570 "__builtin_neon_xi");
17572 /* Pointers to vector types. */
17573 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
17574 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
17575 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
17576 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
17577 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
17578 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
17579 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
17580 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
17581 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
17583 /* Operations which return results as pairs. */
17584 void_ftype_pv8qi_v8qi_v8qi
=
17585 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
17586 V8QI_type_node
, NULL
);
17587 void_ftype_pv4hi_v4hi_v4hi
=
17588 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
17589 V4HI_type_node
, NULL
);
17590 void_ftype_pv2si_v2si_v2si
=
17591 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
17592 V2SI_type_node
, NULL
);
17593 void_ftype_pv2sf_v2sf_v2sf
=
17594 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
17595 V2SF_type_node
, NULL
);
17596 void_ftype_pdi_di_di
=
17597 build_function_type_list (void_type_node
, intDI_pointer_node
,
17598 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
17599 void_ftype_pv16qi_v16qi_v16qi
=
17600 build_function_type_list (void_type_node
, V16QI_pointer_node
,
17601 V16QI_type_node
, V16QI_type_node
, NULL
);
17602 void_ftype_pv8hi_v8hi_v8hi
=
17603 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
17604 V8HI_type_node
, NULL
);
17605 void_ftype_pv4si_v4si_v4si
=
17606 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
17607 V4SI_type_node
, NULL
);
17608 void_ftype_pv4sf_v4sf_v4sf
=
17609 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
17610 V4SF_type_node
, NULL
);
17611 void_ftype_pv2di_v2di_v2di
=
17612 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
17613 V2DI_type_node
, NULL
);
17615 dreg_types
[0] = V8QI_type_node
;
17616 dreg_types
[1] = V4HI_type_node
;
17617 dreg_types
[2] = V2SI_type_node
;
17618 dreg_types
[3] = V2SF_type_node
;
17619 dreg_types
[4] = neon_intDI_type_node
;
17621 qreg_types
[0] = V16QI_type_node
;
17622 qreg_types
[1] = V8HI_type_node
;
17623 qreg_types
[2] = V4SI_type_node
;
17624 qreg_types
[3] = V4SF_type_node
;
17625 qreg_types
[4] = V2DI_type_node
;
17627 for (i
= 0; i
< 5; i
++)
17630 for (j
= 0; j
< 5; j
++)
17632 reinterp_ftype_dreg
[i
][j
]
17633 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
17634 reinterp_ftype_qreg
[i
][j
]
17635 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
17639 for (i
= 0; i
< ARRAY_SIZE (neon_builtin_data
); i
++)
17641 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
17642 unsigned int j
, codeidx
= 0;
17644 d
->base_fcode
= fcode
;
17646 for (j
= 0; j
< T_MAX
; j
++)
17648 const char* const modenames
[] = {
17649 "v8qi", "v4hi", "v2si", "v2sf", "di",
17650 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
17654 enum insn_code icode
;
17655 int is_load
= 0, is_store
= 0;
17657 if ((d
->bits
& (1 << j
)) == 0)
17660 icode
= d
->codes
[codeidx
++];
17665 case NEON_LOAD1LANE
:
17666 case NEON_LOADSTRUCT
:
17667 case NEON_LOADSTRUCTLANE
:
17669 /* Fall through. */
17671 case NEON_STORE1LANE
:
17672 case NEON_STORESTRUCT
:
17673 case NEON_STORESTRUCTLANE
:
17676 /* Fall through. */
17679 case NEON_LOGICBINOP
:
17680 case NEON_SHIFTINSERT
:
17687 case NEON_SHIFTIMM
:
17688 case NEON_SHIFTACC
:
17694 case NEON_LANEMULL
:
17695 case NEON_LANEMULH
:
17697 case NEON_SCALARMUL
:
17698 case NEON_SCALARMULL
:
17699 case NEON_SCALARMULH
:
17700 case NEON_SCALARMAC
:
17706 tree return_type
= void_type_node
, args
= void_list_node
;
17708 /* Build a function type directly from the insn_data for this
17709 builtin. The build_function_type() function takes care of
17710 removing duplicates for us. */
17711 for (k
= insn_data
[icode
].n_operands
- 1; k
>= 0; k
--)
17715 if (is_load
&& k
== 1)
17717 /* Neon load patterns always have the memory operand
17718 (a SImode pointer) in the operand 1 position. We
17719 want a const pointer to the element type in that
17721 gcc_assert (insn_data
[icode
].operand
[k
].mode
== SImode
);
17727 eltype
= const_intQI_pointer_node
;
17732 eltype
= const_intHI_pointer_node
;
17737 eltype
= const_intSI_pointer_node
;
17742 eltype
= const_float_pointer_node
;
17747 eltype
= const_intDI_pointer_node
;
17750 default: gcc_unreachable ();
17753 else if (is_store
&& k
== 0)
17755 /* Similarly, Neon store patterns use operand 0 as
17756 the memory location to store to (a SImode pointer).
17757 Use a pointer to the element type of the store in
17759 gcc_assert (insn_data
[icode
].operand
[k
].mode
== SImode
);
17765 eltype
= intQI_pointer_node
;
17770 eltype
= intHI_pointer_node
;
17775 eltype
= intSI_pointer_node
;
17780 eltype
= float_pointer_node
;
17785 eltype
= intDI_pointer_node
;
17788 default: gcc_unreachable ();
17793 switch (insn_data
[icode
].operand
[k
].mode
)
17795 case VOIDmode
: eltype
= void_type_node
; break;
17797 case QImode
: eltype
= neon_intQI_type_node
; break;
17798 case HImode
: eltype
= neon_intHI_type_node
; break;
17799 case SImode
: eltype
= neon_intSI_type_node
; break;
17800 case SFmode
: eltype
= neon_float_type_node
; break;
17801 case DImode
: eltype
= neon_intDI_type_node
; break;
17802 case TImode
: eltype
= intTI_type_node
; break;
17803 case EImode
: eltype
= intEI_type_node
; break;
17804 case OImode
: eltype
= intOI_type_node
; break;
17805 case CImode
: eltype
= intCI_type_node
; break;
17806 case XImode
: eltype
= intXI_type_node
; break;
17807 /* 64-bit vectors. */
17808 case V8QImode
: eltype
= V8QI_type_node
; break;
17809 case V4HImode
: eltype
= V4HI_type_node
; break;
17810 case V2SImode
: eltype
= V2SI_type_node
; break;
17811 case V2SFmode
: eltype
= V2SF_type_node
; break;
17812 /* 128-bit vectors. */
17813 case V16QImode
: eltype
= V16QI_type_node
; break;
17814 case V8HImode
: eltype
= V8HI_type_node
; break;
17815 case V4SImode
: eltype
= V4SI_type_node
; break;
17816 case V4SFmode
: eltype
= V4SF_type_node
; break;
17817 case V2DImode
: eltype
= V2DI_type_node
; break;
17818 default: gcc_unreachable ();
17822 if (k
== 0 && !is_store
)
17823 return_type
= eltype
;
17825 args
= tree_cons (NULL_TREE
, eltype
, args
);
17828 ftype
= build_function_type (return_type
, args
);
17832 case NEON_RESULTPAIR
:
17834 switch (insn_data
[icode
].operand
[1].mode
)
17836 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
17837 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
17838 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
17839 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
17840 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
17841 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
17842 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
17843 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
17844 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
17845 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
17846 default: gcc_unreachable ();
17851 case NEON_REINTERP
:
17853 /* We iterate over 5 doubleword types, then 5 quadword
17856 switch (insn_data
[icode
].operand
[0].mode
)
17858 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
17859 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
17860 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
17861 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
17862 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
17863 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
17864 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
17865 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
17866 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
17867 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
17868 default: gcc_unreachable ();
17874 gcc_unreachable ();
17877 gcc_assert (ftype
!= NULL
);
17879 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[j
]);
17881 add_builtin_function (namebuf
, ftype
, fcode
++, BUILT_IN_MD
, NULL
,
17888 arm_init_fp16_builtins (void)
17890 tree fp16_type
= make_node (REAL_TYPE
);
17891 TYPE_PRECISION (fp16_type
) = 16;
17892 layout_type (fp16_type
);
17893 (*lang_hooks
.types
.register_builtin_type
) (fp16_type
, "__fp16");
17897 arm_init_builtins (void)
17899 arm_init_tls_builtins ();
17901 if (TARGET_REALLY_IWMMXT
)
17902 arm_init_iwmmxt_builtins ();
17905 arm_init_neon_builtins ();
17907 if (arm_fp16_format
)
17908 arm_init_fp16_builtins ();
17911 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17913 static const char *
17914 arm_invalid_parameter_type (const_tree t
)
17916 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
17917 return N_("function parameters cannot have __fp16 type");
17921 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
17923 static const char *
17924 arm_invalid_return_type (const_tree t
)
17926 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
17927 return N_("functions cannot return __fp16 type");
17931 /* Implement TARGET_PROMOTED_TYPE. */
17934 arm_promoted_type (const_tree t
)
17936 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
17937 return float_type_node
;
17941 /* Implement TARGET_CONVERT_TO_TYPE.
17942 Specifically, this hook implements the peculiarity of the ARM
17943 half-precision floating-point C semantics that requires conversions between
17944 __fp16 to or from double to do an intermediate conversion to float. */
17947 arm_convert_to_type (tree type
, tree expr
)
17949 tree fromtype
= TREE_TYPE (expr
);
17950 if (!SCALAR_FLOAT_TYPE_P (fromtype
) || !SCALAR_FLOAT_TYPE_P (type
))
17952 if ((TYPE_PRECISION (fromtype
) == 16 && TYPE_PRECISION (type
) > 32)
17953 || (TYPE_PRECISION (type
) == 16 && TYPE_PRECISION (fromtype
) > 32))
17954 return convert (type
, convert (float_type_node
, expr
));
17958 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
17959 This simply adds HFmode as a supported mode; even though we don't
17960 implement arithmetic on this type directly, it's supported by
17961 optabs conversions, much the way the double-word arithmetic is
17962 special-cased in the default hook. */
17965 arm_scalar_mode_supported_p (enum machine_mode mode
)
17967 if (mode
== HFmode
)
17968 return (arm_fp16_format
!= ARM_FP16_FORMAT_NONE
);
17970 return default_scalar_mode_supported_p (mode
);
17973 /* Errors in the source file can cause expand_expr to return const0_rtx
17974 where we expect a vector. To avoid crashing, use one of the vector
17975 clear instructions. */
17978 safe_vector_operand (rtx x
, enum machine_mode mode
)
17980 if (x
!= const0_rtx
)
17982 x
= gen_reg_rtx (mode
);
17984 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
17985 : gen_rtx_SUBREG (DImode
, x
, 0)));
17989 /* Subroutine of arm_expand_builtin to take care of binop insns. */
17992 arm_expand_binop_builtin (enum insn_code icode
,
17993 tree exp
, rtx target
)
17996 tree arg0
= CALL_EXPR_ARG (exp
, 0);
17997 tree arg1
= CALL_EXPR_ARG (exp
, 1);
17998 rtx op0
= expand_normal (arg0
);
17999 rtx op1
= expand_normal (arg1
);
18000 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18001 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18002 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
18004 if (VECTOR_MODE_P (mode0
))
18005 op0
= safe_vector_operand (op0
, mode0
);
18006 if (VECTOR_MODE_P (mode1
))
18007 op1
= safe_vector_operand (op1
, mode1
);
18010 || GET_MODE (target
) != tmode
18011 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18012 target
= gen_reg_rtx (tmode
);
18014 gcc_assert (GET_MODE (op0
) == mode0
&& GET_MODE (op1
) == mode1
);
18016 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18017 op0
= copy_to_mode_reg (mode0
, op0
);
18018 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18019 op1
= copy_to_mode_reg (mode1
, op1
);
18021 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18028 /* Subroutine of arm_expand_builtin to take care of unop insns. */
18031 arm_expand_unop_builtin (enum insn_code icode
,
18032 tree exp
, rtx target
, int do_load
)
18035 tree arg0
= CALL_EXPR_ARG (exp
, 0);
18036 rtx op0
= expand_normal (arg0
);
18037 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18038 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
18041 || GET_MODE (target
) != tmode
18042 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18043 target
= gen_reg_rtx (tmode
);
18045 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
18048 if (VECTOR_MODE_P (mode0
))
18049 op0
= safe_vector_operand (op0
, mode0
);
18051 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18052 op0
= copy_to_mode_reg (mode0
, op0
);
18055 pat
= GEN_FCN (icode
) (target
, op0
);
18063 neon_builtin_compare (const void *a
, const void *b
)
18065 const neon_builtin_datum
*const key
= (const neon_builtin_datum
*) a
;
18066 const neon_builtin_datum
*const memb
= (const neon_builtin_datum
*) b
;
18067 unsigned int soughtcode
= key
->base_fcode
;
18069 if (soughtcode
>= memb
->base_fcode
18070 && soughtcode
< memb
->base_fcode
+ memb
->num_vars
)
18072 else if (soughtcode
< memb
->base_fcode
)
18078 static enum insn_code
18079 locate_neon_builtin_icode (int fcode
, neon_itype
*itype
)
18081 neon_builtin_datum key
, *found
;
18084 key
.base_fcode
= fcode
;
18085 found
= (neon_builtin_datum
*)
18086 bsearch (&key
, &neon_builtin_data
[0], ARRAY_SIZE (neon_builtin_data
),
18087 sizeof (neon_builtin_data
[0]), neon_builtin_compare
);
18088 gcc_assert (found
);
18089 idx
= fcode
- (int) found
->base_fcode
;
18090 gcc_assert (idx
>= 0 && idx
< T_MAX
&& idx
< (int)found
->num_vars
);
18093 *itype
= found
->itype
;
18095 return found
->codes
[idx
];
18099 NEON_ARG_COPY_TO_REG
,
18104 #define NEON_MAX_BUILTIN_ARGS 5
18106 /* Expand a Neon builtin. */
18108 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
18113 tree arg
[NEON_MAX_BUILTIN_ARGS
];
18114 rtx op
[NEON_MAX_BUILTIN_ARGS
];
18115 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
18116 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
18121 || GET_MODE (target
) != tmode
18122 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
18123 target
= gen_reg_rtx (tmode
);
18125 va_start (ap
, exp
);
18129 builtin_arg thisarg
= (builtin_arg
) va_arg (ap
, int);
18131 if (thisarg
== NEON_ARG_STOP
)
18135 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
18136 op
[argc
] = expand_normal (arg
[argc
]);
18137 mode
[argc
] = insn_data
[icode
].operand
[argc
+ have_retval
].mode
;
18141 case NEON_ARG_COPY_TO_REG
:
18142 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
18143 if (!(*insn_data
[icode
].operand
[argc
+ have_retval
].predicate
)
18144 (op
[argc
], mode
[argc
]))
18145 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
18148 case NEON_ARG_CONSTANT
:
18149 /* FIXME: This error message is somewhat unhelpful. */
18150 if (!(*insn_data
[icode
].operand
[argc
+ have_retval
].predicate
)
18151 (op
[argc
], mode
[argc
]))
18152 error ("argument must be a constant");
18155 case NEON_ARG_STOP
:
18156 gcc_unreachable ();
18169 pat
= GEN_FCN (icode
) (target
, op
[0]);
18173 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
18177 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
18181 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
18185 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
18189 gcc_unreachable ();
18195 pat
= GEN_FCN (icode
) (op
[0]);
18199 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
18203 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
18207 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
18211 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
18215 gcc_unreachable ();
18226 /* Expand a Neon builtin. These are "special" because they don't have symbolic
18227 constants defined per-instruction or per instruction-variant. Instead, the
18228 required info is looked up in the table neon_builtin_data. */
18230 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
18233 enum insn_code icode
= locate_neon_builtin_icode (fcode
, &itype
);
18240 return arm_expand_neon_args (target
, icode
, 1, exp
,
18241 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18245 case NEON_SCALARMUL
:
18246 case NEON_SCALARMULL
:
18247 case NEON_SCALARMULH
:
18248 case NEON_SHIFTINSERT
:
18249 case NEON_LOGICBINOP
:
18250 return arm_expand_neon_args (target
, icode
, 1, exp
,
18251 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
18255 return arm_expand_neon_args (target
, icode
, 1, exp
,
18256 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
18257 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18261 case NEON_SHIFTIMM
:
18262 return arm_expand_neon_args (target
, icode
, 1, exp
,
18263 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
18267 return arm_expand_neon_args (target
, icode
, 1, exp
,
18268 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
18272 case NEON_REINTERP
:
18273 return arm_expand_neon_args (target
, icode
, 1, exp
,
18274 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
18278 return arm_expand_neon_args (target
, icode
, 1, exp
,
18279 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
18281 case NEON_RESULTPAIR
:
18282 return arm_expand_neon_args (target
, icode
, 0, exp
,
18283 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
18287 case NEON_LANEMULL
:
18288 case NEON_LANEMULH
:
18289 return arm_expand_neon_args (target
, icode
, 1, exp
,
18290 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
18291 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18294 return arm_expand_neon_args (target
, icode
, 1, exp
,
18295 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
18296 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18298 case NEON_SHIFTACC
:
18299 return arm_expand_neon_args (target
, icode
, 1, exp
,
18300 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
18301 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18303 case NEON_SCALARMAC
:
18304 return arm_expand_neon_args (target
, icode
, 1, exp
,
18305 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
18306 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
18310 return arm_expand_neon_args (target
, icode
, 1, exp
,
18311 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
18315 case NEON_LOADSTRUCT
:
18316 return arm_expand_neon_args (target
, icode
, 1, exp
,
18317 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
18319 case NEON_LOAD1LANE
:
18320 case NEON_LOADSTRUCTLANE
:
18321 return arm_expand_neon_args (target
, icode
, 1, exp
,
18322 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
18326 case NEON_STORESTRUCT
:
18327 return arm_expand_neon_args (target
, icode
, 0, exp
,
18328 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
18330 case NEON_STORE1LANE
:
18331 case NEON_STORESTRUCTLANE
:
18332 return arm_expand_neon_args (target
, icode
, 0, exp
,
18333 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
18337 gcc_unreachable ();
18340 /* Emit code to reinterpret one Neon type as another, without altering bits. */
18342 neon_reinterpret (rtx dest
, rtx src
)
18344 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
18347 /* Emit code to place a Neon pair result in memory locations (with equal
18350 neon_emit_pair_result_insn (enum machine_mode mode
,
18351 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
18354 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
18355 rtx tmp1
= gen_reg_rtx (mode
);
18356 rtx tmp2
= gen_reg_rtx (mode
);
18358 emit_insn (intfn (tmp1
, op1
, tmp2
, op2
));
18360 emit_move_insn (mem
, tmp1
);
18361 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
18362 emit_move_insn (mem
, tmp2
);
18365 /* Set up operands for a register copy from src to dest, taking care not to
18366 clobber registers in the process.
18367 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
18368 be called with a large N, so that should be OK. */
18371 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
18373 unsigned int copied
= 0, opctr
= 0;
18374 unsigned int done
= (1 << count
) - 1;
18377 while (copied
!= done
)
18379 for (i
= 0; i
< count
; i
++)
18383 for (j
= 0; good
&& j
< count
; j
++)
18384 if (i
!= j
&& (copied
& (1 << j
)) == 0
18385 && reg_overlap_mentioned_p (src
[j
], dest
[i
]))
18390 operands
[opctr
++] = dest
[i
];
18391 operands
[opctr
++] = src
[i
];
18397 gcc_assert (opctr
== count
* 2);
18400 /* Expand an expression EXP that calls a built-in function,
18401 with result going to TARGET if that's convenient
18402 (and in mode MODE if that's convenient).
18403 SUBTARGET may be used as the target for computing one of EXP's operands.
18404 IGNORE is nonzero if the value is to be ignored. */
18407 arm_expand_builtin (tree exp
,
18409 rtx subtarget ATTRIBUTE_UNUSED
,
18410 enum machine_mode mode ATTRIBUTE_UNUSED
,
18411 int ignore ATTRIBUTE_UNUSED
)
18413 const struct builtin_description
* d
;
18414 enum insn_code icode
;
18415 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
18423 int fcode
= DECL_FUNCTION_CODE (fndecl
);
18425 enum machine_mode tmode
;
18426 enum machine_mode mode0
;
18427 enum machine_mode mode1
;
18428 enum machine_mode mode2
;
18430 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
18431 return arm_expand_neon_builtin (fcode
, exp
, target
);
18435 case ARM_BUILTIN_TEXTRMSB
:
18436 case ARM_BUILTIN_TEXTRMUB
:
18437 case ARM_BUILTIN_TEXTRMSH
:
18438 case ARM_BUILTIN_TEXTRMUH
:
18439 case ARM_BUILTIN_TEXTRMSW
:
18440 case ARM_BUILTIN_TEXTRMUW
:
18441 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
18442 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
18443 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
18444 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
18445 : CODE_FOR_iwmmxt_textrmw
);
18447 arg0
= CALL_EXPR_ARG (exp
, 0);
18448 arg1
= CALL_EXPR_ARG (exp
, 1);
18449 op0
= expand_normal (arg0
);
18450 op1
= expand_normal (arg1
);
18451 tmode
= insn_data
[icode
].operand
[0].mode
;
18452 mode0
= insn_data
[icode
].operand
[1].mode
;
18453 mode1
= insn_data
[icode
].operand
[2].mode
;
18455 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18456 op0
= copy_to_mode_reg (mode0
, op0
);
18457 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18459 /* @@@ better error message */
18460 error ("selector must be an immediate");
18461 return gen_reg_rtx (tmode
);
18464 || GET_MODE (target
) != tmode
18465 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18466 target
= gen_reg_rtx (tmode
);
18467 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18473 case ARM_BUILTIN_TINSRB
:
18474 case ARM_BUILTIN_TINSRH
:
18475 case ARM_BUILTIN_TINSRW
:
18476 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
18477 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
18478 : CODE_FOR_iwmmxt_tinsrw
);
18479 arg0
= CALL_EXPR_ARG (exp
, 0);
18480 arg1
= CALL_EXPR_ARG (exp
, 1);
18481 arg2
= CALL_EXPR_ARG (exp
, 2);
18482 op0
= expand_normal (arg0
);
18483 op1
= expand_normal (arg1
);
18484 op2
= expand_normal (arg2
);
18485 tmode
= insn_data
[icode
].operand
[0].mode
;
18486 mode0
= insn_data
[icode
].operand
[1].mode
;
18487 mode1
= insn_data
[icode
].operand
[2].mode
;
18488 mode2
= insn_data
[icode
].operand
[3].mode
;
18490 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18491 op0
= copy_to_mode_reg (mode0
, op0
);
18492 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18493 op1
= copy_to_mode_reg (mode1
, op1
);
18494 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
18496 /* @@@ better error message */
18497 error ("selector must be an immediate");
18501 || GET_MODE (target
) != tmode
18502 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18503 target
= gen_reg_rtx (tmode
);
18504 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18510 case ARM_BUILTIN_SETWCX
:
18511 arg0
= CALL_EXPR_ARG (exp
, 0);
18512 arg1
= CALL_EXPR_ARG (exp
, 1);
18513 op0
= force_reg (SImode
, expand_normal (arg0
));
18514 op1
= expand_normal (arg1
);
18515 emit_insn (gen_iwmmxt_tmcr (op1
, op0
));
18518 case ARM_BUILTIN_GETWCX
:
18519 arg0
= CALL_EXPR_ARG (exp
, 0);
18520 op0
= expand_normal (arg0
);
18521 target
= gen_reg_rtx (SImode
);
18522 emit_insn (gen_iwmmxt_tmrc (target
, op0
));
18525 case ARM_BUILTIN_WSHUFH
:
18526 icode
= CODE_FOR_iwmmxt_wshufh
;
18527 arg0
= CALL_EXPR_ARG (exp
, 0);
18528 arg1
= CALL_EXPR_ARG (exp
, 1);
18529 op0
= expand_normal (arg0
);
18530 op1
= expand_normal (arg1
);
18531 tmode
= insn_data
[icode
].operand
[0].mode
;
18532 mode1
= insn_data
[icode
].operand
[1].mode
;
18533 mode2
= insn_data
[icode
].operand
[2].mode
;
18535 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
18536 op0
= copy_to_mode_reg (mode1
, op0
);
18537 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
18539 /* @@@ better error message */
18540 error ("mask must be an immediate");
18544 || GET_MODE (target
) != tmode
18545 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18546 target
= gen_reg_rtx (tmode
);
18547 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
18553 case ARM_BUILTIN_WSADB
:
18554 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb
, exp
, target
);
18555 case ARM_BUILTIN_WSADH
:
18556 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh
, exp
, target
);
18557 case ARM_BUILTIN_WSADBZ
:
18558 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
18559 case ARM_BUILTIN_WSADHZ
:
18560 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
18562 /* Several three-argument builtins. */
18563 case ARM_BUILTIN_WMACS
:
18564 case ARM_BUILTIN_WMACU
:
18565 case ARM_BUILTIN_WALIGN
:
18566 case ARM_BUILTIN_TMIA
:
18567 case ARM_BUILTIN_TMIAPH
:
18568 case ARM_BUILTIN_TMIATT
:
18569 case ARM_BUILTIN_TMIATB
:
18570 case ARM_BUILTIN_TMIABT
:
18571 case ARM_BUILTIN_TMIABB
:
18572 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
18573 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
18574 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
18575 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
18576 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
18577 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
18578 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
18579 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
18580 : CODE_FOR_iwmmxt_walign
);
18581 arg0
= CALL_EXPR_ARG (exp
, 0);
18582 arg1
= CALL_EXPR_ARG (exp
, 1);
18583 arg2
= CALL_EXPR_ARG (exp
, 2);
18584 op0
= expand_normal (arg0
);
18585 op1
= expand_normal (arg1
);
18586 op2
= expand_normal (arg2
);
18587 tmode
= insn_data
[icode
].operand
[0].mode
;
18588 mode0
= insn_data
[icode
].operand
[1].mode
;
18589 mode1
= insn_data
[icode
].operand
[2].mode
;
18590 mode2
= insn_data
[icode
].operand
[3].mode
;
18592 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
18593 op0
= copy_to_mode_reg (mode0
, op0
);
18594 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
18595 op1
= copy_to_mode_reg (mode1
, op1
);
18596 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
18597 op2
= copy_to_mode_reg (mode2
, op2
);
18599 || GET_MODE (target
) != tmode
18600 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
18601 target
= gen_reg_rtx (tmode
);
18602 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
18608 case ARM_BUILTIN_WZERO
:
18609 target
= gen_reg_rtx (DImode
);
18610 emit_insn (gen_iwmmxt_clrdi (target
));
18613 case ARM_BUILTIN_THREAD_POINTER
:
18614 return arm_load_tp (target
);
18620 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
18621 if (d
->code
== (const enum arm_builtins
) fcode
)
18622 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
18624 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
18625 if (d
->code
== (const enum arm_builtins
) fcode
)
18626 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
18628 /* @@@ Should really do something sensible here. */
18632 /* Return the number (counting from 0) of
18633 the least significant set bit in MASK. */
18636 number_of_first_bit_set (unsigned mask
)
18641 (mask
& (1 << bit
)) == 0;
18648 /* Emit code to push or pop registers to or from the stack. F is the
18649 assembly file. MASK is the registers to push or pop. PUSH is
18650 nonzero if we should push, and zero if we should pop. For debugging
18651 output, if pushing, adjust CFA_OFFSET by the amount of space added
18652 to the stack. REAL_REGS should have the same number of bits set as
18653 MASK, and will be used instead (in the same order) to describe which
18654 registers were saved - this is used to mark the save slots when we
18655 push high registers after moving them to low registers. */
18657 thumb_pushpop (FILE *f
, unsigned long mask
, int push
, int *cfa_offset
,
18658 unsigned long real_regs
)
18661 int lo_mask
= mask
& 0xFF;
18662 int pushed_words
= 0;
18666 if (lo_mask
== 0 && !push
&& (mask
& (1 << PC_REGNUM
)))
18668 /* Special case. Do not generate a POP PC statement here, do it in
18670 thumb_exit (f
, -1);
18674 if (ARM_EABI_UNWIND_TABLES
&& push
)
18676 fprintf (f
, "\t.save\t{");
18677 for (regno
= 0; regno
< 15; regno
++)
18679 if (real_regs
& (1 << regno
))
18681 if (real_regs
& ((1 << regno
) -1))
18683 asm_fprintf (f
, "%r", regno
);
18686 fprintf (f
, "}\n");
18689 fprintf (f
, "\t%s\t{", push
? "push" : "pop");
18691 /* Look at the low registers first. */
18692 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
18696 asm_fprintf (f
, "%r", regno
);
18698 if ((lo_mask
& ~1) != 0)
18705 if (push
&& (mask
& (1 << LR_REGNUM
)))
18707 /* Catch pushing the LR. */
18711 asm_fprintf (f
, "%r", LR_REGNUM
);
18715 else if (!push
&& (mask
& (1 << PC_REGNUM
)))
18717 /* Catch popping the PC. */
18718 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
18719 || crtl
->calls_eh_return
)
18721 /* The PC is never poped directly, instead
18722 it is popped into r3 and then BX is used. */
18723 fprintf (f
, "}\n");
18725 thumb_exit (f
, -1);
18734 asm_fprintf (f
, "%r", PC_REGNUM
);
18738 fprintf (f
, "}\n");
18740 if (push
&& pushed_words
&& dwarf2out_do_frame ())
18742 char *l
= dwarf2out_cfi_label (false);
18743 int pushed_mask
= real_regs
;
18745 *cfa_offset
+= pushed_words
* 4;
18746 dwarf2out_def_cfa (l
, SP_REGNUM
, *cfa_offset
);
18749 pushed_mask
= real_regs
;
18750 for (regno
= 0; regno
<= 14; regno
++, pushed_mask
>>= 1)
18752 if (pushed_mask
& 1)
18753 dwarf2out_reg_save (l
, regno
, 4 * pushed_words
++ - *cfa_offset
);
18758 /* Generate code to return from a thumb function.
18759 If 'reg_containing_return_addr' is -1, then the return address is
18760 actually on the stack, at the stack pointer. */
18762 thumb_exit (FILE *f
, int reg_containing_return_addr
)
18764 unsigned regs_available_for_popping
;
18765 unsigned regs_to_pop
;
18767 unsigned available
;
18771 int restore_a4
= FALSE
;
18773 /* Compute the registers we need to pop. */
18777 if (reg_containing_return_addr
== -1)
18779 regs_to_pop
|= 1 << LR_REGNUM
;
18783 if (TARGET_BACKTRACE
)
18785 /* Restore the (ARM) frame pointer and stack pointer. */
18786 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
18790 /* If there is nothing to pop then just emit the BX instruction and
18792 if (pops_needed
== 0)
18794 if (crtl
->calls_eh_return
)
18795 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
18797 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
18800 /* Otherwise if we are not supporting interworking and we have not created
18801 a backtrace structure and the function was not entered in ARM mode then
18802 just pop the return address straight into the PC. */
18803 else if (!TARGET_INTERWORK
18804 && !TARGET_BACKTRACE
18805 && !is_called_in_ARM_mode (current_function_decl
)
18806 && !crtl
->calls_eh_return
)
18808 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
18812 /* Find out how many of the (return) argument registers we can corrupt. */
18813 regs_available_for_popping
= 0;
18815 /* If returning via __builtin_eh_return, the bottom three registers
18816 all contain information needed for the return. */
18817 if (crtl
->calls_eh_return
)
18821 /* If we can deduce the registers used from the function's
18822 return value. This is more reliable that examining
18823 df_regs_ever_live_p () because that will be set if the register is
18824 ever used in the function, not just if the register is used
18825 to hold a return value. */
18827 if (crtl
->return_rtx
!= 0)
18828 mode
= GET_MODE (crtl
->return_rtx
);
18830 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
18832 size
= GET_MODE_SIZE (mode
);
18836 /* In a void function we can use any argument register.
18837 In a function that returns a structure on the stack
18838 we can use the second and third argument registers. */
18839 if (mode
== VOIDmode
)
18840 regs_available_for_popping
=
18841 (1 << ARG_REGISTER (1))
18842 | (1 << ARG_REGISTER (2))
18843 | (1 << ARG_REGISTER (3));
18845 regs_available_for_popping
=
18846 (1 << ARG_REGISTER (2))
18847 | (1 << ARG_REGISTER (3));
18849 else if (size
<= 4)
18850 regs_available_for_popping
=
18851 (1 << ARG_REGISTER (2))
18852 | (1 << ARG_REGISTER (3));
18853 else if (size
<= 8)
18854 regs_available_for_popping
=
18855 (1 << ARG_REGISTER (3));
18858 /* Match registers to be popped with registers into which we pop them. */
18859 for (available
= regs_available_for_popping
,
18860 required
= regs_to_pop
;
18861 required
!= 0 && available
!= 0;
18862 available
&= ~(available
& - available
),
18863 required
&= ~(required
& - required
))
18866 /* If we have any popping registers left over, remove them. */
18868 regs_available_for_popping
&= ~available
;
18870 /* Otherwise if we need another popping register we can use
18871 the fourth argument register. */
18872 else if (pops_needed
)
18874 /* If we have not found any free argument registers and
18875 reg a4 contains the return address, we must move it. */
18876 if (regs_available_for_popping
== 0
18877 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
18879 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
18880 reg_containing_return_addr
= LR_REGNUM
;
18882 else if (size
> 12)
18884 /* Register a4 is being used to hold part of the return value,
18885 but we have dire need of a free, low register. */
18888 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
18891 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
18893 /* The fourth argument register is available. */
18894 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
18900 /* Pop as many registers as we can. */
18901 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
18902 regs_available_for_popping
);
18904 /* Process the registers we popped. */
18905 if (reg_containing_return_addr
== -1)
18907 /* The return address was popped into the lowest numbered register. */
18908 regs_to_pop
&= ~(1 << LR_REGNUM
);
18910 reg_containing_return_addr
=
18911 number_of_first_bit_set (regs_available_for_popping
);
18913 /* Remove this register for the mask of available registers, so that
18914 the return address will not be corrupted by further pops. */
18915 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
18918 /* If we popped other registers then handle them here. */
18919 if (regs_available_for_popping
)
18923 /* Work out which register currently contains the frame pointer. */
18924 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
18926 /* Move it into the correct place. */
18927 asm_fprintf (f
, "\tmov\t%r, %r\n",
18928 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
18930 /* (Temporarily) remove it from the mask of popped registers. */
18931 regs_available_for_popping
&= ~(1 << frame_pointer
);
18932 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
18934 if (regs_available_for_popping
)
18938 /* We popped the stack pointer as well,
18939 find the register that contains it. */
18940 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
18942 /* Move it into the stack register. */
18943 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
18945 /* At this point we have popped all necessary registers, so
18946 do not worry about restoring regs_available_for_popping
18947 to its correct value:
18949 assert (pops_needed == 0)
18950 assert (regs_available_for_popping == (1 << frame_pointer))
18951 assert (regs_to_pop == (1 << STACK_POINTER)) */
18955 /* Since we have just move the popped value into the frame
18956 pointer, the popping register is available for reuse, and
18957 we know that we still have the stack pointer left to pop. */
18958 regs_available_for_popping
|= (1 << frame_pointer
);
18962 /* If we still have registers left on the stack, but we no longer have
18963 any registers into which we can pop them, then we must move the return
18964 address into the link register and make available the register that
18966 if (regs_available_for_popping
== 0 && pops_needed
> 0)
18968 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
18970 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
18971 reg_containing_return_addr
);
18973 reg_containing_return_addr
= LR_REGNUM
;
18976 /* If we have registers left on the stack then pop some more.
18977 We know that at most we will want to pop FP and SP. */
18978 if (pops_needed
> 0)
18983 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
18984 regs_available_for_popping
);
18986 /* We have popped either FP or SP.
18987 Move whichever one it is into the correct register. */
18988 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
18989 move_to
= number_of_first_bit_set (regs_to_pop
);
18991 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
18993 regs_to_pop
&= ~(1 << move_to
);
18998 /* If we still have not popped everything then we must have only
18999 had one register available to us and we are now popping the SP. */
19000 if (pops_needed
> 0)
19004 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
19005 regs_available_for_popping
);
19007 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
19009 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
19011 assert (regs_to_pop == (1 << STACK_POINTER))
19012 assert (pops_needed == 1)
19016 /* If necessary restore the a4 register. */
19019 if (reg_containing_return_addr
!= LR_REGNUM
)
19021 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
19022 reg_containing_return_addr
= LR_REGNUM
;
19025 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
19028 if (crtl
->calls_eh_return
)
19029 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
19031 /* Return to caller. */
19032 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
19037 thumb1_final_prescan_insn (rtx insn
)
19039 if (flag_print_asm_name
)
19040 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
19041 INSN_ADDRESSES (INSN_UID (insn
)));
19045 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
19047 unsigned HOST_WIDE_INT mask
= 0xff;
19050 val
= val
& (unsigned HOST_WIDE_INT
)0xffffffffu
;
19051 if (val
== 0) /* XXX */
19054 for (i
= 0; i
< 25; i
++)
19055 if ((val
& (mask
<< i
)) == val
)
19061 /* Returns nonzero if the current function contains,
19062 or might contain a far jump. */
19064 thumb_far_jump_used_p (void)
19068 /* This test is only important for leaf functions. */
19069 /* assert (!leaf_function_p ()); */
19071 /* If we have already decided that far jumps may be used,
19072 do not bother checking again, and always return true even if
19073 it turns out that they are not being used. Once we have made
19074 the decision that far jumps are present (and that hence the link
19075 register will be pushed onto the stack) we cannot go back on it. */
19076 if (cfun
->machine
->far_jump_used
)
19079 /* If this function is not being called from the prologue/epilogue
19080 generation code then it must be being called from the
19081 INITIAL_ELIMINATION_OFFSET macro. */
19082 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
19084 /* In this case we know that we are being asked about the elimination
19085 of the arg pointer register. If that register is not being used,
19086 then there are no arguments on the stack, and we do not have to
19087 worry that a far jump might force the prologue to push the link
19088 register, changing the stack offsets. In this case we can just
19089 return false, since the presence of far jumps in the function will
19090 not affect stack offsets.
19092 If the arg pointer is live (or if it was live, but has now been
19093 eliminated and so set to dead) then we do have to test to see if
19094 the function might contain a far jump. This test can lead to some
19095 false negatives, since before reload is completed, then length of
19096 branch instructions is not known, so gcc defaults to returning their
19097 longest length, which in turn sets the far jump attribute to true.
19099 A false negative will not result in bad code being generated, but it
19100 will result in a needless push and pop of the link register. We
19101 hope that this does not occur too often.
19103 If we need doubleword stack alignment this could affect the other
19104 elimination offsets so we can't risk getting it wrong. */
19105 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
19106 cfun
->machine
->arg_pointer_live
= 1;
19107 else if (!cfun
->machine
->arg_pointer_live
)
19111 /* Check to see if the function contains a branch
19112 insn with the far jump attribute set. */
19113 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
19115 if (GET_CODE (insn
) == JUMP_INSN
19116 /* Ignore tablejump patterns. */
19117 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
19118 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
19119 && get_attr_far_jump (insn
) == FAR_JUMP_YES
19122 /* Record the fact that we have decided that
19123 the function does use far jumps. */
19124 cfun
->machine
->far_jump_used
= 1;
19132 /* Return nonzero if FUNC must be entered in ARM mode. */
19134 is_called_in_ARM_mode (tree func
)
19136 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
19138 /* Ignore the problem about functions whose address is taken. */
19139 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
19143 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
19149 /* The bits which aren't usefully expanded as rtl. */
19151 thumb_unexpanded_epilogue (void)
19153 arm_stack_offsets
*offsets
;
19155 unsigned long live_regs_mask
= 0;
19156 int high_regs_pushed
= 0;
19157 int had_to_push_lr
;
19160 if (cfun
->machine
->return_used_this_function
!= 0)
19163 if (IS_NAKED (arm_current_func_type ()))
19166 offsets
= arm_get_frame_offsets ();
19167 live_regs_mask
= offsets
->saved_regs_mask
;
19168 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
19170 /* If we can deduce the registers used from the function's return value.
19171 This is more reliable that examining df_regs_ever_live_p () because that
19172 will be set if the register is ever used in the function, not just if
19173 the register is used to hold a return value. */
19174 size
= arm_size_return_regs ();
19176 /* The prolog may have pushed some high registers to use as
19177 work registers. e.g. the testsuite file:
19178 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19179 compiles to produce:
19180 push {r4, r5, r6, r7, lr}
19184 as part of the prolog. We have to undo that pushing here. */
19186 if (high_regs_pushed
)
19188 unsigned long mask
= live_regs_mask
& 0xff;
19191 /* The available low registers depend on the size of the value we are
19199 /* Oh dear! We have no low registers into which we can pop
19202 ("no low registers available for popping high registers");
19204 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
19205 if (live_regs_mask
& (1 << next_hi_reg
))
19208 while (high_regs_pushed
)
19210 /* Find lo register(s) into which the high register(s) can
19212 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
19214 if (mask
& (1 << regno
))
19215 high_regs_pushed
--;
19216 if (high_regs_pushed
== 0)
19220 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
19222 /* Pop the values into the low register(s). */
19223 thumb_pushpop (asm_out_file
, mask
, 0, NULL
, mask
);
19225 /* Move the value(s) into the high registers. */
19226 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
19228 if (mask
& (1 << regno
))
19230 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
19233 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
19234 if (live_regs_mask
& (1 << next_hi_reg
))
19239 live_regs_mask
&= ~0x0f00;
19242 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
19243 live_regs_mask
&= 0xff;
19245 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
19247 /* Pop the return address into the PC. */
19248 if (had_to_push_lr
)
19249 live_regs_mask
|= 1 << PC_REGNUM
;
19251 /* Either no argument registers were pushed or a backtrace
19252 structure was created which includes an adjusted stack
19253 pointer, so just pop everything. */
19254 if (live_regs_mask
)
19255 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
19258 /* We have either just popped the return address into the
19259 PC or it is was kept in LR for the entire function. */
19260 if (!had_to_push_lr
)
19261 thumb_exit (asm_out_file
, LR_REGNUM
);
19265 /* Pop everything but the return address. */
19266 if (live_regs_mask
)
19267 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
19270 if (had_to_push_lr
)
19274 /* We have no free low regs, so save one. */
19275 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
19279 /* Get the return address into a temporary register. */
19280 thumb_pushpop (asm_out_file
, 1 << LAST_ARG_REGNUM
, 0, NULL
,
19281 1 << LAST_ARG_REGNUM
);
19285 /* Move the return address to lr. */
19286 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
19288 /* Restore the low register. */
19289 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
19294 regno
= LAST_ARG_REGNUM
;
19299 /* Remove the argument registers that were pushed onto the stack. */
19300 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
19301 SP_REGNUM
, SP_REGNUM
,
19302 crtl
->args
.pretend_args_size
);
19304 thumb_exit (asm_out_file
, regno
);
19310 /* Functions to save and restore machine-specific function data. */
19311 static struct machine_function
*
19312 arm_init_machine_status (void)
19314 struct machine_function
*machine
;
19315 machine
= (machine_function
*) ggc_alloc_cleared (sizeof (machine_function
));
19317 #if ARM_FT_UNKNOWN != 0
19318 machine
->func_type
= ARM_FT_UNKNOWN
;
19323 /* Return an RTX indicating where the return address to the
19324 calling function can be found. */
19326 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
19331 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
19334 /* Do anything needed before RTL is emitted for each function. */
19336 arm_init_expanders (void)
19338 /* Arrange to initialize and mark the machine per-function status. */
19339 init_machine_status
= arm_init_machine_status
;
19341 /* This is to stop the combine pass optimizing away the alignment
19342 adjustment of va_arg. */
19343 /* ??? It is claimed that this should not be necessary. */
19345 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
19349 /* Like arm_compute_initial_elimination offset. Simpler because there
19350 isn't an ABI specified frame pointer for Thumb. Instead, we set it
19351 to point at the base of the local variables after static stack
19352 space for a function has been allocated. */
19355 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
19357 arm_stack_offsets
*offsets
;
19359 offsets
= arm_get_frame_offsets ();
19363 case ARG_POINTER_REGNUM
:
19366 case STACK_POINTER_REGNUM
:
19367 return offsets
->outgoing_args
- offsets
->saved_args
;
19369 case FRAME_POINTER_REGNUM
:
19370 return offsets
->soft_frame
- offsets
->saved_args
;
19372 case ARM_HARD_FRAME_POINTER_REGNUM
:
19373 return offsets
->saved_regs
- offsets
->saved_args
;
19375 case THUMB_HARD_FRAME_POINTER_REGNUM
:
19376 return offsets
->locals_base
- offsets
->saved_args
;
19379 gcc_unreachable ();
19383 case FRAME_POINTER_REGNUM
:
19386 case STACK_POINTER_REGNUM
:
19387 return offsets
->outgoing_args
- offsets
->soft_frame
;
19389 case ARM_HARD_FRAME_POINTER_REGNUM
:
19390 return offsets
->saved_regs
- offsets
->soft_frame
;
19392 case THUMB_HARD_FRAME_POINTER_REGNUM
:
19393 return offsets
->locals_base
- offsets
->soft_frame
;
19396 gcc_unreachable ();
19401 gcc_unreachable ();
19405 /* Generate the rest of a function's prologue. */
19407 thumb1_expand_prologue (void)
19411 HOST_WIDE_INT amount
;
19412 arm_stack_offsets
*offsets
;
19413 unsigned long func_type
;
19415 unsigned long live_regs_mask
;
19417 func_type
= arm_current_func_type ();
19419 /* Naked functions don't have prologues. */
19420 if (IS_NAKED (func_type
))
19423 if (IS_INTERRUPT (func_type
))
19425 error ("interrupt Service Routines cannot be coded in Thumb mode");
19429 offsets
= arm_get_frame_offsets ();
19430 live_regs_mask
= offsets
->saved_regs_mask
;
19431 /* Load the pic register before setting the frame pointer,
19432 so we can use r7 as a temporary work register. */
19433 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
19434 arm_load_pic_register (live_regs_mask
);
19436 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
19437 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
19438 stack_pointer_rtx
);
19440 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
19445 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
19446 GEN_INT (- amount
)));
19447 RTX_FRAME_RELATED_P (insn
) = 1;
19453 /* The stack decrement is too big for an immediate value in a single
19454 insn. In theory we could issue multiple subtracts, but after
19455 three of them it becomes more space efficient to place the full
19456 value in the constant pool and load into a register. (Also the
19457 ARM debugger really likes to see only one stack decrement per
19458 function). So instead we look for a scratch register into which
19459 we can load the decrement, and then we subtract this from the
19460 stack pointer. Unfortunately on the thumb the only available
19461 scratch registers are the argument registers, and we cannot use
19462 these as they may hold arguments to the function. Instead we
19463 attempt to locate a call preserved register which is used by this
19464 function. If we can find one, then we know that it will have
19465 been pushed at the start of the prologue and so we can corrupt
19467 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
19468 if (live_regs_mask
& (1 << regno
))
19471 gcc_assert(regno
<= LAST_LO_REGNUM
);
19473 reg
= gen_rtx_REG (SImode
, regno
);
19475 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
19477 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
19478 stack_pointer_rtx
, reg
));
19479 RTX_FRAME_RELATED_P (insn
) = 1;
19480 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
19481 plus_constant (stack_pointer_rtx
,
19483 RTX_FRAME_RELATED_P (dwarf
) = 1;
19484 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, dwarf
);
19488 if (frame_pointer_needed
)
19489 thumb_set_frame_pointer (offsets
);
19491 /* If we are profiling, make sure no instructions are scheduled before
19492 the call to mcount. Similarly if the user has requested no
19493 scheduling in the prolog. Similarly if we want non-call exceptions
19494 using the EABI unwinder, to prevent faulting instructions from being
19495 swapped with a stack adjustment. */
19496 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
19497 || (ARM_EABI_UNWIND_TABLES
&& flag_non_call_exceptions
))
19498 emit_insn (gen_blockage ());
19500 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
19501 if (live_regs_mask
& 0xff)
19502 cfun
->machine
->lr_save_eliminated
= 0;
19507 thumb1_expand_epilogue (void)
19509 HOST_WIDE_INT amount
;
19510 arm_stack_offsets
*offsets
;
19513 /* Naked functions don't have prologues. */
19514 if (IS_NAKED (arm_current_func_type ()))
19517 offsets
= arm_get_frame_offsets ();
19518 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
19520 if (frame_pointer_needed
)
19522 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
19523 amount
= offsets
->locals_base
- offsets
->saved_regs
;
19526 gcc_assert (amount
>= 0);
19530 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
19531 GEN_INT (amount
)));
19534 /* r3 is always free in the epilogue. */
19535 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
19537 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
19538 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
19542 /* Emit a USE (stack_pointer_rtx), so that
19543 the stack adjustment will not be deleted. */
19544 emit_insn (gen_prologue_use (stack_pointer_rtx
));
19546 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
19547 emit_insn (gen_blockage ());
19549 /* Emit a clobber for each insn that will be restored in the epilogue,
19550 so that flow2 will get register lifetimes correct. */
19551 for (regno
= 0; regno
< 13; regno
++)
19552 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
19553 emit_clobber (gen_rtx_REG (SImode
, regno
));
19555 if (! df_regs_ever_live_p (LR_REGNUM
))
19556 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
19560 thumb1_output_function_prologue (FILE *f
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
19562 arm_stack_offsets
*offsets
;
19563 unsigned long live_regs_mask
= 0;
19564 unsigned long l_mask
;
19565 unsigned high_regs_pushed
= 0;
19566 int cfa_offset
= 0;
19569 if (IS_NAKED (arm_current_func_type ()))
19572 if (is_called_in_ARM_mode (current_function_decl
))
19576 gcc_assert (GET_CODE (DECL_RTL (current_function_decl
)) == MEM
);
19577 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
19579 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
19581 /* Generate code sequence to switch us into Thumb mode. */
19582 /* The .code 32 directive has already been emitted by
19583 ASM_DECLARE_FUNCTION_NAME. */
19584 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
19585 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
19587 /* Generate a label, so that the debugger will notice the
19588 change in instruction sets. This label is also used by
19589 the assembler to bypass the ARM code when this function
19590 is called from a Thumb encoded function elsewhere in the
19591 same file. Hence the definition of STUB_NAME here must
19592 agree with the definition in gas/config/tc-arm.c. */
19594 #define STUB_NAME ".real_start_of"
19596 fprintf (f
, "\t.code\t16\n");
19598 if (arm_dllexport_name_p (name
))
19599 name
= arm_strip_name_encoding (name
);
19601 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
19602 fprintf (f
, "\t.thumb_func\n");
19603 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
19606 if (crtl
->args
.pretend_args_size
)
19608 /* Output unwind directive for the stack adjustment. */
19609 if (ARM_EABI_UNWIND_TABLES
)
19610 fprintf (f
, "\t.pad #%d\n",
19611 crtl
->args
.pretend_args_size
);
19613 if (cfun
->machine
->uses_anonymous_args
)
19617 fprintf (f
, "\tpush\t{");
19619 num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
19621 for (regno
= LAST_ARG_REGNUM
+ 1 - num_pushes
;
19622 regno
<= LAST_ARG_REGNUM
;
19624 asm_fprintf (f
, "%r%s", regno
,
19625 regno
== LAST_ARG_REGNUM
? "" : ", ");
19627 fprintf (f
, "}\n");
19630 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n",
19631 SP_REGNUM
, SP_REGNUM
,
19632 crtl
->args
.pretend_args_size
);
19634 /* We don't need to record the stores for unwinding (would it
19635 help the debugger any if we did?), but record the change in
19636 the stack pointer. */
19637 if (dwarf2out_do_frame ())
19639 char *l
= dwarf2out_cfi_label (false);
19641 cfa_offset
= cfa_offset
+ crtl
->args
.pretend_args_size
;
19642 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
19646 /* Get the registers we are going to push. */
19647 offsets
= arm_get_frame_offsets ();
19648 live_regs_mask
= offsets
->saved_regs_mask
;
19649 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19650 l_mask
= live_regs_mask
& 0x40ff;
19651 /* Then count how many other high registers will need to be pushed. */
19652 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
19654 if (TARGET_BACKTRACE
)
19657 unsigned work_register
;
19659 /* We have been asked to create a stack backtrace structure.
19660 The code looks like this:
19664 0 sub SP, #16 Reserve space for 4 registers.
19665 2 push {R7} Push low registers.
19666 4 add R7, SP, #20 Get the stack pointer before the push.
19667 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
19668 8 mov R7, PC Get hold of the start of this code plus 12.
19669 10 str R7, [SP, #16] Store it.
19670 12 mov R7, FP Get hold of the current frame pointer.
19671 14 str R7, [SP, #4] Store it.
19672 16 mov R7, LR Get hold of the current return address.
19673 18 str R7, [SP, #12] Store it.
19674 20 add R7, SP, #16 Point at the start of the backtrace structure.
19675 22 mov FP, R7 Put this value into the frame pointer. */
19677 work_register
= thumb_find_work_register (live_regs_mask
);
19679 if (ARM_EABI_UNWIND_TABLES
)
19680 asm_fprintf (f
, "\t.pad #16\n");
19683 (f
, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19684 SP_REGNUM
, SP_REGNUM
);
19686 if (dwarf2out_do_frame ())
19688 char *l
= dwarf2out_cfi_label (false);
19690 cfa_offset
= cfa_offset
+ 16;
19691 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
19696 thumb_pushpop (f
, l_mask
, 1, &cfa_offset
, l_mask
);
19697 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
19702 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
19703 offset
+ 16 + crtl
->args
.pretend_args_size
);
19705 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19708 /* Make sure that the instruction fetching the PC is in the right place
19709 to calculate "start of backtrace creation code + 12". */
19712 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
19713 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19715 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
19716 ARM_HARD_FRAME_POINTER_REGNUM
);
19717 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19722 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
19723 ARM_HARD_FRAME_POINTER_REGNUM
);
19724 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19726 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
19727 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19731 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, LR_REGNUM
);
19732 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
19734 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
19736 asm_fprintf (f
, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
19737 ARM_HARD_FRAME_POINTER_REGNUM
, work_register
);
19739 /* Optimization: If we are not pushing any low registers but we are going
19740 to push some high registers then delay our first push. This will just
19741 be a push of LR and we can combine it with the push of the first high
19743 else if ((l_mask
& 0xff) != 0
19744 || (high_regs_pushed
== 0 && l_mask
))
19745 thumb_pushpop (f
, l_mask
, 1, &cfa_offset
, l_mask
);
19747 if (high_regs_pushed
)
19749 unsigned pushable_regs
;
19750 unsigned next_hi_reg
;
19752 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
19753 if (live_regs_mask
& (1 << next_hi_reg
))
19756 pushable_regs
= l_mask
& 0xff;
19758 if (pushable_regs
== 0)
19759 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
19761 while (high_regs_pushed
> 0)
19763 unsigned long real_regs_mask
= 0;
19765 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
19767 if (pushable_regs
& (1 << regno
))
19769 asm_fprintf (f
, "\tmov\t%r, %r\n", regno
, next_hi_reg
);
19771 high_regs_pushed
--;
19772 real_regs_mask
|= (1 << next_hi_reg
);
19774 if (high_regs_pushed
)
19776 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
19778 if (live_regs_mask
& (1 << next_hi_reg
))
19783 pushable_regs
&= ~((1 << regno
) - 1);
19789 /* If we had to find a work register and we have not yet
19790 saved the LR then add it to the list of regs to push. */
19791 if (l_mask
== (1 << LR_REGNUM
))
19793 thumb_pushpop (f
, pushable_regs
| (1 << LR_REGNUM
),
19795 real_regs_mask
| (1 << LR_REGNUM
));
19799 thumb_pushpop (f
, pushable_regs
, 1, &cfa_offset
, real_regs_mask
);
19804 /* Handle the case of a double word load into a low register from
19805 a computed memory address. The computed address may involve a
19806 register which is overwritten by the load. */
19808 thumb_load_double_from_address (rtx
*operands
)
19816 gcc_assert (GET_CODE (operands
[0]) == REG
);
19817 gcc_assert (GET_CODE (operands
[1]) == MEM
);
19819 /* Get the memory address. */
19820 addr
= XEXP (operands
[1], 0);
19822 /* Work out how the memory address is computed. */
19823 switch (GET_CODE (addr
))
19826 operands
[2] = adjust_address (operands
[1], SImode
, 4);
19828 if (REGNO (operands
[0]) == REGNO (addr
))
19830 output_asm_insn ("ldr\t%H0, %2", operands
);
19831 output_asm_insn ("ldr\t%0, %1", operands
);
19835 output_asm_insn ("ldr\t%0, %1", operands
);
19836 output_asm_insn ("ldr\t%H0, %2", operands
);
19841 /* Compute <address> + 4 for the high order load. */
19842 operands
[2] = adjust_address (operands
[1], SImode
, 4);
19844 output_asm_insn ("ldr\t%0, %1", operands
);
19845 output_asm_insn ("ldr\t%H0, %2", operands
);
19849 arg1
= XEXP (addr
, 0);
19850 arg2
= XEXP (addr
, 1);
19852 if (CONSTANT_P (arg1
))
19853 base
= arg2
, offset
= arg1
;
19855 base
= arg1
, offset
= arg2
;
19857 gcc_assert (GET_CODE (base
) == REG
);
19859 /* Catch the case of <address> = <reg> + <reg> */
19860 if (GET_CODE (offset
) == REG
)
19862 int reg_offset
= REGNO (offset
);
19863 int reg_base
= REGNO (base
);
19864 int reg_dest
= REGNO (operands
[0]);
19866 /* Add the base and offset registers together into the
19867 higher destination register. */
19868 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
19869 reg_dest
+ 1, reg_base
, reg_offset
);
19871 /* Load the lower destination register from the address in
19872 the higher destination register. */
19873 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
19874 reg_dest
, reg_dest
+ 1);
19876 /* Load the higher destination register from its own address
19878 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
19879 reg_dest
+ 1, reg_dest
+ 1);
19883 /* Compute <address> + 4 for the high order load. */
19884 operands
[2] = adjust_address (operands
[1], SImode
, 4);
19886 /* If the computed address is held in the low order register
19887 then load the high order register first, otherwise always
19888 load the low order register first. */
19889 if (REGNO (operands
[0]) == REGNO (base
))
19891 output_asm_insn ("ldr\t%H0, %2", operands
);
19892 output_asm_insn ("ldr\t%0, %1", operands
);
19896 output_asm_insn ("ldr\t%0, %1", operands
);
19897 output_asm_insn ("ldr\t%H0, %2", operands
);
19903 /* With no registers to worry about we can just load the value
19905 operands
[2] = adjust_address (operands
[1], SImode
, 4);
19907 output_asm_insn ("ldr\t%H0, %2", operands
);
19908 output_asm_insn ("ldr\t%0, %1", operands
);
19912 gcc_unreachable ();
19919 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
19926 if (REGNO (operands
[4]) > REGNO (operands
[5]))
19929 operands
[4] = operands
[5];
19932 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
19933 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
19937 if (REGNO (operands
[4]) > REGNO (operands
[5]))
19940 operands
[4] = operands
[5];
19943 if (REGNO (operands
[5]) > REGNO (operands
[6]))
19946 operands
[5] = operands
[6];
19949 if (REGNO (operands
[4]) > REGNO (operands
[5]))
19952 operands
[4] = operands
[5];
19956 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
19957 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
19961 gcc_unreachable ();
19967 /* Output a call-via instruction for thumb state. */
19969 thumb_call_via_reg (rtx reg
)
19971 int regno
= REGNO (reg
);
19974 gcc_assert (regno
< LR_REGNUM
);
19976 /* If we are in the normal text section we can use a single instance
19977 per compilation unit. If we are doing function sections, then we need
19978 an entry per section, since we can't rely on reachability. */
19979 if (in_section
== text_section
)
19981 thumb_call_reg_needed
= 1;
19983 if (thumb_call_via_label
[regno
] == NULL
)
19984 thumb_call_via_label
[regno
] = gen_label_rtx ();
19985 labelp
= thumb_call_via_label
+ regno
;
19989 if (cfun
->machine
->call_via
[regno
] == NULL
)
19990 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
19991 labelp
= cfun
->machine
->call_via
+ regno
;
19994 output_asm_insn ("bl\t%a0", labelp
);
19998 /* Routines for generating rtl. */
20000 thumb_expand_movmemqi (rtx
*operands
)
20002 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
20003 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
20004 HOST_WIDE_INT len
= INTVAL (operands
[2]);
20005 HOST_WIDE_INT offset
= 0;
20009 emit_insn (gen_movmem12b (out
, in
, out
, in
));
20015 emit_insn (gen_movmem8b (out
, in
, out
, in
));
20021 rtx reg
= gen_reg_rtx (SImode
);
20022 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
20023 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
20030 rtx reg
= gen_reg_rtx (HImode
);
20031 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
20032 plus_constant (in
, offset
))));
20033 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (out
, offset
)),
20041 rtx reg
= gen_reg_rtx (QImode
);
20042 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
20043 plus_constant (in
, offset
))));
20044 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (out
, offset
)),
20050 thumb_reload_out_hi (rtx
*operands
)
20052 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
20055 /* Handle reading a half-word from memory during reload. */
20057 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
20059 gcc_unreachable ();
20062 /* Return the length of a function name prefix
20063 that starts with the character 'c'. */
20065 arm_get_strip_length (int c
)
20069 ARM_NAME_ENCODING_LENGTHS
20074 /* Return a pointer to a function's name with any
20075 and all prefix encodings stripped from it. */
20077 arm_strip_name_encoding (const char *name
)
20081 while ((skip
= arm_get_strip_length (* name
)))
20087 /* If there is a '*' anywhere in the name's prefix, then
20088 emit the stripped name verbatim, otherwise prepend an
20089 underscore if leading underscores are being used. */
20091 arm_asm_output_labelref (FILE *stream
, const char *name
)
20096 while ((skip
= arm_get_strip_length (* name
)))
20098 verbatim
|= (*name
== '*');
20103 fputs (name
, stream
);
20105 asm_fprintf (stream
, "%U%s", name
);
20109 arm_file_start (void)
20113 if (TARGET_UNIFIED_ASM
)
20114 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
20118 const char *fpu_name
;
20119 if (arm_select
[0].string
)
20120 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_select
[0].string
);
20121 else if (arm_select
[1].string
)
20122 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_select
[1].string
);
20124 asm_fprintf (asm_out_file
, "\t.cpu %s\n",
20125 all_cores
[arm_default_cpu
].name
);
20127 if (TARGET_SOFT_FLOAT
)
20130 fpu_name
= "softvfp";
20132 fpu_name
= "softfpa";
20136 fpu_name
= arm_fpu_desc
->name
;
20137 if (arm_fpu_desc
->model
== ARM_FP_MODEL_VFP
)
20139 if (TARGET_HARD_FLOAT
)
20140 asm_fprintf (asm_out_file
, "\t.eabi_attribute 27, 3\n");
20141 if (TARGET_HARD_FLOAT_ABI
)
20142 asm_fprintf (asm_out_file
, "\t.eabi_attribute 28, 1\n");
20145 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
20147 /* Some of these attributes only apply when the corresponding features
20148 are used. However we don't have any easy way of figuring this out.
20149 Conservatively record the setting that would have been used. */
20151 /* Tag_ABI_FP_rounding. */
20152 if (flag_rounding_math
)
20153 asm_fprintf (asm_out_file
, "\t.eabi_attribute 19, 1\n");
20154 if (!flag_unsafe_math_optimizations
)
20156 /* Tag_ABI_FP_denomal. */
20157 asm_fprintf (asm_out_file
, "\t.eabi_attribute 20, 1\n");
20158 /* Tag_ABI_FP_exceptions. */
20159 asm_fprintf (asm_out_file
, "\t.eabi_attribute 21, 1\n");
20161 /* Tag_ABI_FP_user_exceptions. */
20162 if (flag_signaling_nans
)
20163 asm_fprintf (asm_out_file
, "\t.eabi_attribute 22, 1\n");
20164 /* Tag_ABI_FP_number_model. */
20165 asm_fprintf (asm_out_file
, "\t.eabi_attribute 23, %d\n",
20166 flag_finite_math_only
? 1 : 3);
20168 /* Tag_ABI_align8_needed. */
20169 asm_fprintf (asm_out_file
, "\t.eabi_attribute 24, 1\n");
20170 /* Tag_ABI_align8_preserved. */
20171 asm_fprintf (asm_out_file
, "\t.eabi_attribute 25, 1\n");
20172 /* Tag_ABI_enum_size. */
20173 asm_fprintf (asm_out_file
, "\t.eabi_attribute 26, %d\n",
20174 flag_short_enums
? 1 : 2);
20176 /* Tag_ABI_optimization_goals. */
20179 else if (optimize
>= 2)
20185 asm_fprintf (asm_out_file
, "\t.eabi_attribute 30, %d\n", val
);
20187 /* Tag_ABI_FP_16bit_format. */
20188 if (arm_fp16_format
)
20189 asm_fprintf (asm_out_file
, "\t.eabi_attribute 38, %d\n",
20190 (int)arm_fp16_format
);
20192 if (arm_lang_output_object_attributes_hook
)
20193 arm_lang_output_object_attributes_hook();
20195 default_file_start();
20199 arm_file_end (void)
20203 if (NEED_INDICATE_EXEC_STACK
)
20204 /* Add .note.GNU-stack. */
20205 file_end_indicate_exec_stack ();
20207 if (! thumb_call_reg_needed
)
20210 switch_to_section (text_section
);
20211 asm_fprintf (asm_out_file
, "\t.code 16\n");
20212 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
20214 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
20216 rtx label
= thumb_call_via_label
[regno
];
20220 targetm
.asm_out
.internal_label (asm_out_file
, "L",
20221 CODE_LABEL_NUMBER (label
));
20222 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
20228 /* Symbols in the text segment can be accessed without indirecting via the
20229 constant pool; it may take an extra binary operation, but this is still
20230 faster than indirecting via memory. Don't do this when not optimizing,
20231 since we won't be calculating al of the offsets necessary to do this
20235 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
20237 if (optimize
> 0 && TREE_CONSTANT (decl
))
20238 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
20240 default_encode_section_info (decl
, rtl
, first
);
20242 #endif /* !ARM_PE */
20245 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
20247 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
20248 && !strcmp (prefix
, "L"))
20250 arm_ccfsm_state
= 0;
20251 arm_target_insn
= NULL
;
20253 default_internal_label (stream
, prefix
, labelno
);
20256 /* Output code to add DELTA to the first argument, and then jump
20257 to FUNCTION. Used for C++ multiple inheritance. */
20259 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
20260 HOST_WIDE_INT delta
,
20261 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
20264 static int thunk_label
= 0;
20267 int mi_delta
= delta
;
20268 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
20270 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
20273 mi_delta
= - mi_delta
;
20277 int labelno
= thunk_label
++;
20278 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
20279 /* Thunks are entered in arm mode when avaiable. */
20280 if (TARGET_THUMB1_ONLY
)
20282 /* push r3 so we can use it as a temporary. */
20283 /* TODO: Omit this save if r3 is not used. */
20284 fputs ("\tpush {r3}\n", file
);
20285 fputs ("\tldr\tr3, ", file
);
20289 fputs ("\tldr\tr12, ", file
);
20291 assemble_name (file
, label
);
20292 fputc ('\n', file
);
20295 /* If we are generating PIC, the ldr instruction below loads
20296 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
20297 the address of the add + 8, so we have:
20299 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
20302 Note that we have "+ 1" because some versions of GNU ld
20303 don't set the low bit of the result for R_ARM_REL32
20304 relocations against thumb function symbols.
20305 On ARMv6M this is +4, not +8. */
20306 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
20307 assemble_name (file
, labelpc
);
20308 fputs (":\n", file
);
20309 if (TARGET_THUMB1_ONLY
)
20311 /* This is 2 insns after the start of the thunk, so we know it
20312 is 4-byte aligned. */
20313 fputs ("\tadd\tr3, pc, r3\n", file
);
20314 fputs ("\tmov r12, r3\n", file
);
20317 fputs ("\tadd\tr12, pc, r12\n", file
);
20319 else if (TARGET_THUMB1_ONLY
)
20320 fputs ("\tmov r12, r3\n", file
);
20322 if (TARGET_THUMB1_ONLY
)
20324 if (mi_delta
> 255)
20326 fputs ("\tldr\tr3, ", file
);
20327 assemble_name (file
, label
);
20328 fputs ("+4\n", file
);
20329 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
20330 mi_op
, this_regno
, this_regno
);
20332 else if (mi_delta
!= 0)
20334 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
20335 mi_op
, this_regno
, this_regno
,
20341 /* TODO: Use movw/movt for large constants when available. */
20342 while (mi_delta
!= 0)
20344 if ((mi_delta
& (3 << shift
)) == 0)
20348 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
20349 mi_op
, this_regno
, this_regno
,
20350 mi_delta
& (0xff << shift
));
20351 mi_delta
&= ~(0xff << shift
);
20358 if (TARGET_THUMB1_ONLY
)
20359 fputs ("\tpop\t{r3}\n", file
);
20361 fprintf (file
, "\tbx\tr12\n");
20362 ASM_OUTPUT_ALIGN (file
, 2);
20363 assemble_name (file
, label
);
20364 fputs (":\n", file
);
20367 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
20368 rtx tem
= XEXP (DECL_RTL (function
), 0);
20369 tem
= gen_rtx_PLUS (GET_MODE (tem
), tem
, GEN_INT (-7));
20370 tem
= gen_rtx_MINUS (GET_MODE (tem
),
20372 gen_rtx_SYMBOL_REF (Pmode
,
20373 ggc_strdup (labelpc
)));
20374 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
20377 /* Output ".word .LTHUNKn". */
20378 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
20380 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
20381 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
20385 fputs ("\tb\t", file
);
20386 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
20387 if (NEED_PLT_RELOC
)
20388 fputs ("(PLT)", file
);
20389 fputc ('\n', file
);
20394 arm_emit_vector_const (FILE *file
, rtx x
)
20397 const char * pattern
;
20399 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
20401 switch (GET_MODE (x
))
20403 case V2SImode
: pattern
= "%08x"; break;
20404 case V4HImode
: pattern
= "%04x"; break;
20405 case V8QImode
: pattern
= "%02x"; break;
20406 default: gcc_unreachable ();
20409 fprintf (file
, "0x");
20410 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
20414 element
= CONST_VECTOR_ELT (x
, i
);
20415 fprintf (file
, pattern
, INTVAL (element
));
20421 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
20422 HFmode constant pool entries are actually loaded with ldr. */
20424 arm_emit_fp16_const (rtx c
)
20429 REAL_VALUE_FROM_CONST_DOUBLE (r
, c
);
20430 bits
= real_to_target (NULL
, &r
, HFmode
);
20431 if (WORDS_BIG_ENDIAN
)
20432 assemble_zeros (2);
20433 assemble_integer (GEN_INT (bits
), 2, BITS_PER_WORD
, 1);
20434 if (!WORDS_BIG_ENDIAN
)
20435 assemble_zeros (2);
20439 arm_output_load_gr (rtx
*operands
)
20446 if (GET_CODE (operands
[1]) != MEM
20447 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
20448 || GET_CODE (reg
= XEXP (sum
, 0)) != REG
20449 || GET_CODE (offset
= XEXP (sum
, 1)) != CONST_INT
20450 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
20451 return "wldrw%?\t%0, %1";
20453 /* Fix up an out-of-range load of a GR register. */
20454 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
20455 wcgr
= operands
[0];
20457 output_asm_insn ("ldr%?\t%0, %1", operands
);
20459 operands
[0] = wcgr
;
20461 output_asm_insn ("tmcr%?\t%0, %1", operands
);
20462 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
20467 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
20469 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
20470 named arg and all anonymous args onto the stack.
20471 XXX I know the prologue shouldn't be pushing registers, but it is faster
20475 arm_setup_incoming_varargs (CUMULATIVE_ARGS
*pcum
,
20476 enum machine_mode mode
,
20479 int second_time ATTRIBUTE_UNUSED
)
20483 cfun
->machine
->uses_anonymous_args
= 1;
20484 if (pcum
->pcs_variant
<= ARM_PCS_AAPCS_LOCAL
)
20486 nregs
= pcum
->aapcs_ncrn
;
20487 if ((nregs
& 1) && arm_needs_doubleword_align (mode
, type
))
20491 nregs
= pcum
->nregs
;
20493 if (nregs
< NUM_ARG_REGS
)
20494 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
20497 /* Return nonzero if the CONSUMER instruction (a store) does not need
20498 PRODUCER's value to calculate the address. */
20501 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
20503 rtx value
= PATTERN (producer
);
20504 rtx addr
= PATTERN (consumer
);
20506 if (GET_CODE (value
) == COND_EXEC
)
20507 value
= COND_EXEC_CODE (value
);
20508 if (GET_CODE (value
) == PARALLEL
)
20509 value
= XVECEXP (value
, 0, 0);
20510 value
= XEXP (value
, 0);
20511 if (GET_CODE (addr
) == COND_EXEC
)
20512 addr
= COND_EXEC_CODE (addr
);
20513 if (GET_CODE (addr
) == PARALLEL
)
20514 addr
= XVECEXP (addr
, 0, 0);
20515 addr
= XEXP (addr
, 0);
20517 return !reg_overlap_mentioned_p (value
, addr
);
20520 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20521 have an early register shift value or amount dependency on the
20522 result of PRODUCER. */
20525 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
20527 rtx value
= PATTERN (producer
);
20528 rtx op
= PATTERN (consumer
);
20531 if (GET_CODE (value
) == COND_EXEC
)
20532 value
= COND_EXEC_CODE (value
);
20533 if (GET_CODE (value
) == PARALLEL
)
20534 value
= XVECEXP (value
, 0, 0);
20535 value
= XEXP (value
, 0);
20536 if (GET_CODE (op
) == COND_EXEC
)
20537 op
= COND_EXEC_CODE (op
);
20538 if (GET_CODE (op
) == PARALLEL
)
20539 op
= XVECEXP (op
, 0, 0);
20542 early_op
= XEXP (op
, 0);
20543 /* This is either an actual independent shift, or a shift applied to
20544 the first operand of another operation. We want the whole shift
20546 if (GET_CODE (early_op
) == REG
)
20549 return !reg_overlap_mentioned_p (value
, early_op
);
20552 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20553 have an early register shift value dependency on the result of
20557 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
20559 rtx value
= PATTERN (producer
);
20560 rtx op
= PATTERN (consumer
);
20563 if (GET_CODE (value
) == COND_EXEC
)
20564 value
= COND_EXEC_CODE (value
);
20565 if (GET_CODE (value
) == PARALLEL
)
20566 value
= XVECEXP (value
, 0, 0);
20567 value
= XEXP (value
, 0);
20568 if (GET_CODE (op
) == COND_EXEC
)
20569 op
= COND_EXEC_CODE (op
);
20570 if (GET_CODE (op
) == PARALLEL
)
20571 op
= XVECEXP (op
, 0, 0);
20574 early_op
= XEXP (op
, 0);
20576 /* This is either an actual independent shift, or a shift applied to
20577 the first operand of another operation. We want the value being
20578 shifted, in either case. */
20579 if (GET_CODE (early_op
) != REG
)
20580 early_op
= XEXP (early_op
, 0);
20582 return !reg_overlap_mentioned_p (value
, early_op
);
20585 /* Return nonzero if the CONSUMER (a mul or mac op) does not
20586 have an early register mult dependency on the result of
20590 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
20592 rtx value
= PATTERN (producer
);
20593 rtx op
= PATTERN (consumer
);
20595 if (GET_CODE (value
) == COND_EXEC
)
20596 value
= COND_EXEC_CODE (value
);
20597 if (GET_CODE (value
) == PARALLEL
)
20598 value
= XVECEXP (value
, 0, 0);
20599 value
= XEXP (value
, 0);
20600 if (GET_CODE (op
) == COND_EXEC
)
20601 op
= COND_EXEC_CODE (op
);
20602 if (GET_CODE (op
) == PARALLEL
)
20603 op
= XVECEXP (op
, 0, 0);
20606 if (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == MINUS
)
20608 if (GET_CODE (XEXP (op
, 0)) == MULT
)
20609 return !reg_overlap_mentioned_p (value
, XEXP (op
, 0));
20611 return !reg_overlap_mentioned_p (value
, XEXP (op
, 1));
20617 /* We can't rely on the caller doing the proper promotion when
20618 using APCS or ATPCS. */
20621 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
20623 return !TARGET_AAPCS_BASED
;
20626 static enum machine_mode
20627 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED
,
20628 enum machine_mode mode
,
20629 int *punsignedp ATTRIBUTE_UNUSED
,
20630 const_tree fntype ATTRIBUTE_UNUSED
,
20631 int for_return ATTRIBUTE_UNUSED
)
20633 if (GET_MODE_CLASS (mode
) == MODE_INT
20634 && GET_MODE_SIZE (mode
) < 4)
20640 /* AAPCS based ABIs use short enums by default. */
20643 arm_default_short_enums (void)
20645 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
20649 /* AAPCS requires that anonymous bitfields affect structure alignment. */
20652 arm_align_anon_bitfield (void)
20654 return TARGET_AAPCS_BASED
;
20658 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
20661 arm_cxx_guard_type (void)
20663 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
20666 /* Return non-zero if the consumer (a multiply-accumulate instruction)
20667 has an accumulator dependency on the result of the producer (a
20668 multiplication instruction) and no other dependency on that result. */
20670 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
20672 rtx mul
= PATTERN (producer
);
20673 rtx mac
= PATTERN (consumer
);
20675 rtx mac_op0
, mac_op1
, mac_acc
;
20677 if (GET_CODE (mul
) == COND_EXEC
)
20678 mul
= COND_EXEC_CODE (mul
);
20679 if (GET_CODE (mac
) == COND_EXEC
)
20680 mac
= COND_EXEC_CODE (mac
);
20682 /* Check that mul is of the form (set (...) (mult ...))
20683 and mla is of the form (set (...) (plus (mult ...) (...))). */
20684 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
20685 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
20686 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
20689 mul_result
= XEXP (mul
, 0);
20690 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
20691 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
20692 mac_acc
= XEXP (XEXP (mac
, 1), 1);
20694 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
20695 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
20696 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));
20700 /* The EABI says test the least significant bit of a guard variable. */
20703 arm_cxx_guard_mask_bit (void)
20705 return TARGET_AAPCS_BASED
;
20709 /* The EABI specifies that all array cookies are 8 bytes long. */
20712 arm_get_cookie_size (tree type
)
20716 if (!TARGET_AAPCS_BASED
)
20717 return default_cxx_get_cookie_size (type
);
20719 size
= build_int_cst (sizetype
, 8);
20724 /* The EABI says that array cookies should also contain the element size. */
20727 arm_cookie_has_size (void)
20729 return TARGET_AAPCS_BASED
;
20733 /* The EABI says constructors and destructors should return a pointer to
20734 the object constructed/destroyed. */
20737 arm_cxx_cdtor_returns_this (void)
20739 return TARGET_AAPCS_BASED
;
20742 /* The EABI says that an inline function may never be the key
20746 arm_cxx_key_method_may_be_inline (void)
20748 return !TARGET_AAPCS_BASED
;
20752 arm_cxx_determine_class_data_visibility (tree decl
)
20754 if (!TARGET_AAPCS_BASED
20755 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
20758 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
20759 is exported. However, on systems without dynamic vague linkage,
20760 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
20761 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
20762 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
20764 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
20765 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
20769 arm_cxx_class_data_always_comdat (void)
20771 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
20772 vague linkage if the class has no key function. */
20773 return !TARGET_AAPCS_BASED
;
20777 /* The EABI says __aeabi_atexit should be used to register static
20781 arm_cxx_use_aeabi_atexit (void)
20783 return TARGET_AAPCS_BASED
;
20788 arm_set_return_address (rtx source
, rtx scratch
)
20790 arm_stack_offsets
*offsets
;
20791 HOST_WIDE_INT delta
;
20793 unsigned long saved_regs
;
20795 offsets
= arm_get_frame_offsets ();
20796 saved_regs
= offsets
->saved_regs_mask
;
20798 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
20799 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
20802 if (frame_pointer_needed
)
20803 addr
= plus_constant(hard_frame_pointer_rtx
, -4);
20806 /* LR will be the first saved register. */
20807 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
20812 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
20813 GEN_INT (delta
& ~4095)));
20818 addr
= stack_pointer_rtx
;
20820 addr
= plus_constant (addr
, delta
);
20822 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
20828 thumb_set_return_address (rtx source
, rtx scratch
)
20830 arm_stack_offsets
*offsets
;
20831 HOST_WIDE_INT delta
;
20832 HOST_WIDE_INT limit
;
20835 unsigned long mask
;
20839 offsets
= arm_get_frame_offsets ();
20840 mask
= offsets
->saved_regs_mask
;
20841 if (mask
& (1 << LR_REGNUM
))
20844 /* Find the saved regs. */
20845 if (frame_pointer_needed
)
20847 delta
= offsets
->soft_frame
- offsets
->saved_args
;
20848 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
20854 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
20857 /* Allow for the stack frame. */
20858 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
20860 /* The link register is always the first saved register. */
20863 /* Construct the address. */
20864 addr
= gen_rtx_REG (SImode
, reg
);
20867 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
20868 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
20872 addr
= plus_constant (addr
, delta
);
20874 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
20877 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
20880 /* Implements target hook vector_mode_supported_p. */
20882 arm_vector_mode_supported_p (enum machine_mode mode
)
20884 /* Neon also supports V2SImode, etc. listed in the clause below. */
20885 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
20886 || mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
20889 if ((TARGET_NEON
|| TARGET_IWMMXT
)
20890 && ((mode
== V2SImode
)
20891 || (mode
== V4HImode
)
20892 || (mode
== V8QImode
)))
20898 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20899 ARM insns and therefore guarantee that the shift count is modulo 256.
20900 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20901 guarantee no particular behavior for out-of-range counts. */
20903 static unsigned HOST_WIDE_INT
20904 arm_shift_truncation_mask (enum machine_mode mode
)
20906 return mode
== SImode
? 255 : 0;
20910 /* Map internal gcc register numbers to DWARF2 register numbers. */
20913 arm_dbx_register_number (unsigned int regno
)
20918 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
20919 compatibility. The EABI defines them as registers 96-103. */
20920 if (IS_FPA_REGNUM (regno
))
20921 return (TARGET_AAPCS_BASED
? 96 : 16) + regno
- FIRST_FPA_REGNUM
;
20923 if (IS_VFP_REGNUM (regno
))
20925 /* See comment in arm_dwarf_register_span. */
20926 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
20927 return 64 + regno
- FIRST_VFP_REGNUM
;
20929 return 256 + (regno
- FIRST_VFP_REGNUM
) / 2;
20932 if (IS_IWMMXT_GR_REGNUM (regno
))
20933 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
20935 if (IS_IWMMXT_REGNUM (regno
))
20936 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
20938 gcc_unreachable ();
20941 /* Dwarf models VFPv3 registers as 32 64-bit registers.
20942 GCC models tham as 64 32-bit registers, so we need to describe this to
20943 the DWARF generation code. Other registers can use the default. */
20945 arm_dwarf_register_span (rtx rtl
)
20952 regno
= REGNO (rtl
);
20953 if (!IS_VFP_REGNUM (regno
))
20956 /* XXX FIXME: The EABI defines two VFP register ranges:
20957 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
20959 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
20960 corresponding D register. Until GDB supports this, we shall use the
20961 legacy encodings. We also use these encodings for D0-D15 for
20962 compatibility with older debuggers. */
20963 if (VFP_REGNO_OK_FOR_SINGLE (regno
))
20966 nregs
= GET_MODE_SIZE (GET_MODE (rtl
)) / 8;
20967 p
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (nregs
));
20968 regno
= (regno
- FIRST_VFP_REGNUM
) / 2;
20969 for (i
= 0; i
< nregs
; i
++)
20970 XVECEXP (p
, 0, i
) = gen_rtx_REG (DImode
, 256 + regno
+ i
);
20975 #ifdef TARGET_UNWIND_INFO
20976 /* Emit unwind directives for a store-multiple instruction or stack pointer
20977 push during alignment.
20978 These should only ever be generated by the function prologue code, so
20979 expect them to have a particular form. */
20982 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
20985 HOST_WIDE_INT offset
;
20986 HOST_WIDE_INT nregs
;
20992 e
= XVECEXP (p
, 0, 0);
20993 if (GET_CODE (e
) != SET
)
20996 /* First insn will adjust the stack pointer. */
20997 if (GET_CODE (e
) != SET
20998 || GET_CODE (XEXP (e
, 0)) != REG
20999 || REGNO (XEXP (e
, 0)) != SP_REGNUM
21000 || GET_CODE (XEXP (e
, 1)) != PLUS
)
21003 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
21004 nregs
= XVECLEN (p
, 0) - 1;
21006 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
21009 /* The function prologue may also push pc, but not annotate it as it is
21010 never restored. We turn this into a stack pointer adjustment. */
21011 if (nregs
* 4 == offset
- 4)
21013 fprintf (asm_out_file
, "\t.pad #4\n");
21017 fprintf (asm_out_file
, "\t.save {");
21019 else if (IS_VFP_REGNUM (reg
))
21022 fprintf (asm_out_file
, "\t.vsave {");
21024 else if (reg
>= FIRST_FPA_REGNUM
&& reg
<= LAST_FPA_REGNUM
)
21026 /* FPA registers are done differently. */
21027 asm_fprintf (asm_out_file
, "\t.save %r, %wd\n", reg
, nregs
);
21031 /* Unknown register type. */
21034 /* If the stack increment doesn't match the size of the saved registers,
21035 something has gone horribly wrong. */
21036 if (offset
!= nregs
* reg_size
)
21041 /* The remaining insns will describe the stores. */
21042 for (i
= 1; i
<= nregs
; i
++)
21044 /* Expect (set (mem <addr>) (reg)).
21045 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
21046 e
= XVECEXP (p
, 0, i
);
21047 if (GET_CODE (e
) != SET
21048 || GET_CODE (XEXP (e
, 0)) != MEM
21049 || GET_CODE (XEXP (e
, 1)) != REG
)
21052 reg
= REGNO (XEXP (e
, 1));
21057 fprintf (asm_out_file
, ", ");
21058 /* We can't use %r for vfp because we need to use the
21059 double precision register names. */
21060 if (IS_VFP_REGNUM (reg
))
21061 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
21063 asm_fprintf (asm_out_file
, "%r", reg
);
21065 #ifdef ENABLE_CHECKING
21066 /* Check that the addresses are consecutive. */
21067 e
= XEXP (XEXP (e
, 0), 0);
21068 if (GET_CODE (e
) == PLUS
)
21070 offset
+= reg_size
;
21071 if (GET_CODE (XEXP (e
, 0)) != REG
21072 || REGNO (XEXP (e
, 0)) != SP_REGNUM
21073 || GET_CODE (XEXP (e
, 1)) != CONST_INT
21074 || offset
!= INTVAL (XEXP (e
, 1)))
21078 || GET_CODE (e
) != REG
21079 || REGNO (e
) != SP_REGNUM
)
21083 fprintf (asm_out_file
, "}\n");
21086 /* Emit unwind directives for a SET. */
21089 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
21097 switch (GET_CODE (e0
))
21100 /* Pushing a single register. */
21101 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
21102 || GET_CODE (XEXP (XEXP (e0
, 0), 0)) != REG
21103 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
21106 asm_fprintf (asm_out_file
, "\t.save ");
21107 if (IS_VFP_REGNUM (REGNO (e1
)))
21108 asm_fprintf(asm_out_file
, "{d%d}\n",
21109 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
21111 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
21115 if (REGNO (e0
) == SP_REGNUM
)
21117 /* A stack increment. */
21118 if (GET_CODE (e1
) != PLUS
21119 || GET_CODE (XEXP (e1
, 0)) != REG
21120 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
21121 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
21124 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
21125 -INTVAL (XEXP (e1
, 1)));
21127 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
21129 HOST_WIDE_INT offset
;
21131 if (GET_CODE (e1
) == PLUS
)
21133 if (GET_CODE (XEXP (e1
, 0)) != REG
21134 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
21136 reg
= REGNO (XEXP (e1
, 0));
21137 offset
= INTVAL (XEXP (e1
, 1));
21138 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
21139 HARD_FRAME_POINTER_REGNUM
, reg
,
21142 else if (GET_CODE (e1
) == REG
)
21145 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
21146 HARD_FRAME_POINTER_REGNUM
, reg
);
21151 else if (GET_CODE (e1
) == REG
&& REGNO (e1
) == SP_REGNUM
)
21153 /* Move from sp to reg. */
21154 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
21156 else if (GET_CODE (e1
) == PLUS
21157 && GET_CODE (XEXP (e1
, 0)) == REG
21158 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
21159 && GET_CODE (XEXP (e1
, 1)) == CONST_INT
)
21161 /* Set reg to offset from sp. */
21162 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
21163 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
21165 else if (GET_CODE (e1
) == UNSPEC
&& XINT (e1
, 1) == UNSPEC_STACK_ALIGN
)
21167 /* Stack pointer save before alignment. */
21169 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
21182 /* Emit unwind directives for the given insn. */
21185 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
21189 if (!ARM_EABI_UNWIND_TABLES
)
21192 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
21193 && (TREE_NOTHROW (current_function_decl
)
21194 || crtl
->all_throwers_are_sibcalls
))
21197 if (GET_CODE (insn
) == NOTE
|| !RTX_FRAME_RELATED_P (insn
))
21200 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
21202 pat
= XEXP (pat
, 0);
21204 pat
= PATTERN (insn
);
21206 switch (GET_CODE (pat
))
21209 arm_unwind_emit_set (asm_out_file
, pat
);
21213 /* Store multiple. */
21214 arm_unwind_emit_sequence (asm_out_file
, pat
);
21223 /* Output a reference from a function exception table to the type_info
21224 object X. The EABI specifies that the symbol should be relocated by
21225 an R_ARM_TARGET2 relocation. */
21228 arm_output_ttype (rtx x
)
21230 fputs ("\t.word\t", asm_out_file
);
21231 output_addr_const (asm_out_file
, x
);
21232 /* Use special relocations for symbol references. */
21233 if (GET_CODE (x
) != CONST_INT
)
21234 fputs ("(TARGET2)", asm_out_file
);
21235 fputc ('\n', asm_out_file
);
21239 #endif /* TARGET_UNWIND_INFO */
21242 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21243 stack alignment. */
21246 arm_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
21248 rtx unspec
= SET_SRC (pattern
);
21249 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
21253 case UNSPEC_STACK_ALIGN
:
21254 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
21255 put anything on the stack, so hopefully it won't matter.
21256 CFA = SP will be correct after alignment. */
21257 dwarf2out_reg_save_reg (label
, stack_pointer_rtx
,
21258 SET_DEST (pattern
));
21261 gcc_unreachable ();
21266 /* Output unwind directives for the start/end of a function. */
21269 arm_output_fn_unwind (FILE * f
, bool prologue
)
21271 if (!ARM_EABI_UNWIND_TABLES
)
21275 fputs ("\t.fnstart\n", f
);
21278 /* If this function will never be unwound, then mark it as such.
21279 The came condition is used in arm_unwind_emit to suppress
21280 the frame annotations. */
21281 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
21282 && (TREE_NOTHROW (current_function_decl
)
21283 || crtl
->all_throwers_are_sibcalls
))
21284 fputs("\t.cantunwind\n", f
);
21286 fputs ("\t.fnend\n", f
);
21291 arm_emit_tls_decoration (FILE *fp
, rtx x
)
21293 enum tls_reloc reloc
;
21296 val
= XVECEXP (x
, 0, 0);
21297 reloc
= (enum tls_reloc
) INTVAL (XVECEXP (x
, 0, 1));
21299 output_addr_const (fp
, val
);
21304 fputs ("(tlsgd)", fp
);
21307 fputs ("(tlsldm)", fp
);
21310 fputs ("(tlsldo)", fp
);
21313 fputs ("(gottpoff)", fp
);
21316 fputs ("(tpoff)", fp
);
21319 gcc_unreachable ();
21327 fputs (" + (. - ", fp
);
21328 output_addr_const (fp
, XVECEXP (x
, 0, 2));
21330 output_addr_const (fp
, XVECEXP (x
, 0, 3));
21340 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
21343 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
21345 gcc_assert (size
== 4);
21346 fputs ("\t.word\t", file
);
21347 output_addr_const (file
, x
);
21348 fputs ("(tlsldo)", file
);
21352 arm_output_addr_const_extra (FILE *fp
, rtx x
)
21354 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
21355 return arm_emit_tls_decoration (fp
, x
);
21356 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
21359 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
21361 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
21362 assemble_name_raw (fp
, label
);
21366 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_GOTSYM_OFF
)
21368 assemble_name (fp
, "_GLOBAL_OFFSET_TABLE_");
21372 output_addr_const (fp
, XVECEXP (x
, 0, 0));
21376 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_SYMBOL_OFFSET
)
21378 output_addr_const (fp
, XVECEXP (x
, 0, 0));
21382 output_addr_const (fp
, XVECEXP (x
, 0, 1));
21386 else if (GET_CODE (x
) == CONST_VECTOR
)
21387 return arm_emit_vector_const (fp
, x
);
21392 /* Output assembly for a shift instruction.
21393 SET_FLAGS determines how the instruction modifies the condition codes.
21394 0 - Do not set condition codes.
21395 1 - Set condition codes.
21396 2 - Use smallest instruction. */
21398 arm_output_shift(rtx
* operands
, int set_flags
)
21401 static const char flag_chars
[3] = {'?', '.', '!'};
21406 c
= flag_chars
[set_flags
];
21407 if (TARGET_UNIFIED_ASM
)
21409 shift
= shift_op(operands
[3], &val
);
21413 operands
[2] = GEN_INT(val
);
21414 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
21417 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
21420 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
21421 output_asm_insn (pattern
, operands
);
21425 /* Output a Thumb-1 casesi dispatch sequence. */
21427 thumb1_output_casesi (rtx
*operands
)
21429 rtx diff_vec
= PATTERN (next_real_insn (operands
[0]));
21431 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
21433 switch (GET_MODE(diff_vec
))
21436 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
21437 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
21439 return (ADDR_DIFF_VEC_FLAGS (diff_vec
).offset_unsigned
?
21440 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
21442 return "bl\t%___gnu_thumb1_case_si";
21444 gcc_unreachable ();
21448 /* Output a Thumb-2 casesi instruction. */
21450 thumb2_output_casesi (rtx
*operands
)
21452 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
21454 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
21456 output_asm_insn ("cmp\t%0, %1", operands
);
21457 output_asm_insn ("bhi\t%l3", operands
);
21458 switch (GET_MODE(diff_vec
))
21461 return "tbb\t[%|pc, %0]";
21463 return "tbh\t[%|pc, %0, lsl #1]";
21467 output_asm_insn ("adr\t%4, %l2", operands
);
21468 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
21469 output_asm_insn ("add\t%4, %4, %5", operands
);
21474 output_asm_insn ("adr\t%4, %l2", operands
);
21475 return "ldr\t%|pc, [%4, %0, lsl #2]";
21478 gcc_unreachable ();
21482 /* Most ARM cores are single issue, but some newer ones can dual issue.
21483 The scheduler descriptions rely on this being correct. */
21485 arm_issue_rate (void)
21500 /* A table and a function to perform ARM-specific name mangling for
21501 NEON vector types in order to conform to the AAPCS (see "Procedure
21502 Call Standard for the ARM Architecture", Appendix A). To qualify
21503 for emission with the mangled names defined in that document, a
21504 vector type must not only be of the correct mode but also be
21505 composed of NEON vector element types (e.g. __builtin_neon_qi). */
21508 enum machine_mode mode
;
21509 const char *element_type_name
;
21510 const char *aapcs_name
;
21511 } arm_mangle_map_entry
;
21513 static arm_mangle_map_entry arm_mangle_map
[] = {
21514 /* 64-bit containerized types. */
21515 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
21516 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
21517 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
21518 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
21519 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
21520 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
21521 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
21522 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
21523 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
21524 /* 128-bit containerized types. */
21525 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
21526 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
21527 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
21528 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
21529 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
21530 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
21531 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
21532 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
21533 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
21534 { VOIDmode
, NULL
, NULL
}
21538 arm_mangle_type (const_tree type
)
21540 arm_mangle_map_entry
*pos
= arm_mangle_map
;
21542 /* The ARM ABI documents (10th October 2008) say that "__va_list"
21543 has to be managled as if it is in the "std" namespace. */
21544 if (TARGET_AAPCS_BASED
21545 && lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
21547 static bool warned
;
21548 if (!warned
&& warn_psabi
&& !in_system_header
)
21551 inform (input_location
,
21552 "the mangling of %<va_list%> has changed in GCC 4.4");
21554 return "St9__va_list";
21557 /* Half-precision float. */
21558 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
21561 if (TREE_CODE (type
) != VECTOR_TYPE
)
21564 /* Check the mode of the vector type, and the name of the vector
21565 element type, against the table. */
21566 while (pos
->mode
!= VOIDmode
)
21568 tree elt_type
= TREE_TYPE (type
);
21570 if (pos
->mode
== TYPE_MODE (type
)
21571 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
21572 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
21573 pos
->element_type_name
))
21574 return pos
->aapcs_name
;
21579 /* Use the default mangling for unrecognized (possibly user-defined)
21584 /* Order of allocation of core registers for Thumb: this allocation is
21585 written over the corresponding initial entries of the array
21586 initialized with REG_ALLOC_ORDER. We allocate all low registers
21587 first. Saving and restoring a low register is usually cheaper than
21588 using a call-clobbered high register. */
21590 static const int thumb_core_reg_alloc_order
[] =
21592 3, 2, 1, 0, 4, 5, 6, 7,
21593 14, 12, 8, 9, 10, 11, 13, 15
21596 /* Adjust register allocation order when compiling for Thumb. */
21599 arm_order_regs_for_local_alloc (void)
21601 const int arm_reg_alloc_order
[] = REG_ALLOC_ORDER
;
21602 memcpy(reg_alloc_order
, arm_reg_alloc_order
, sizeof (reg_alloc_order
));
21604 memcpy (reg_alloc_order
, thumb_core_reg_alloc_order
,
21605 sizeof (thumb_core_reg_alloc_order
));
21608 /* Set default optimization options. */
21610 arm_optimization_options (int level
, int size ATTRIBUTE_UNUSED
)
21612 /* Enable section anchors by default at -O1 or higher.
21613 Use 2 to distinguish from an explicit -fsection-anchors
21614 given on the command line. */
21616 flag_section_anchors
= 2;
21619 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21622 arm_frame_pointer_required (void)
21624 return (cfun
->has_nonlocal_label
21625 || SUBTARGET_FRAME_POINTER_REQUIRED
21626 || (TARGET_ARM
&& TARGET_APCS_FRAME
&& ! leaf_function_p ()));
21629 /* Only thumb1 can't support conditional execution, so return true if
21630 the target is not thumb1. */
21632 arm_have_conditional_execution (void)
21634 return !TARGET_THUMB1
;
21637 #include "gt-arm.h"