1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 3, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
26 #include "coretypes.h"
32 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
37 #include "insn-attr.h"
48 #include "integrate.h"
51 #include "target-def.h"
53 #include "langhooks.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode
;
58 typedef struct minipool_fixup Mfix
;
60 const struct attribute_spec arm_attribute_table
[];
62 void (*arm_lang_output_object_attributes_hook
)(void);
64 /* Forward function declarations. */
65 static arm_stack_offsets
*arm_get_frame_offsets (void);
66 static void arm_add_gc_roots (void);
67 static int arm_gen_constant (enum rtx_code
, enum machine_mode
, rtx
,
68 HOST_WIDE_INT
, rtx
, rtx
, int, int);
69 static unsigned bit_count (unsigned long);
70 static int arm_address_register_rtx_p (rtx
, int);
71 static int arm_legitimate_index_p (enum machine_mode
, rtx
, RTX_CODE
, int);
72 static int thumb2_legitimate_index_p (enum machine_mode
, rtx
, int);
73 static int thumb1_base_register_rtx_p (rtx
, enum machine_mode
, int);
74 inline static int thumb1_index_register_rtx_p (rtx
, int);
75 static int thumb_far_jump_used_p (void);
76 static bool thumb_force_lr_save (void);
77 static int const_ok_for_op (HOST_WIDE_INT
, enum rtx_code
);
78 static rtx
emit_sfm (int, int);
79 static unsigned arm_size_return_regs (void);
80 static bool arm_assemble_integer (rtx
, unsigned int, int);
81 static const char *fp_const_from_val (REAL_VALUE_TYPE
*);
82 static arm_cc
get_arm_condition_code (rtx
);
83 static HOST_WIDE_INT
int_log2 (HOST_WIDE_INT
);
84 static rtx
is_jump_table (rtx
);
85 static const char *output_multi_immediate (rtx
*, const char *, const char *,
87 static const char *shift_op (rtx
, HOST_WIDE_INT
*);
88 static struct machine_function
*arm_init_machine_status (void);
89 static void thumb_exit (FILE *, int);
90 static rtx
is_jump_table (rtx
);
91 static HOST_WIDE_INT
get_jump_table_size (rtx
);
92 static Mnode
*move_minipool_fix_forward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
93 static Mnode
*add_minipool_forward_ref (Mfix
*);
94 static Mnode
*move_minipool_fix_backward_ref (Mnode
*, Mnode
*, HOST_WIDE_INT
);
95 static Mnode
*add_minipool_backward_ref (Mfix
*);
96 static void assign_minipool_offsets (Mfix
*);
97 static void arm_print_value (FILE *, rtx
);
98 static void dump_minipool (rtx
);
99 static int arm_barrier_cost (rtx
);
100 static Mfix
*create_fix_barrier (Mfix
*, HOST_WIDE_INT
);
101 static void push_minipool_barrier (rtx
, HOST_WIDE_INT
);
102 static void push_minipool_fix (rtx
, HOST_WIDE_INT
, rtx
*, enum machine_mode
,
104 static void arm_reorg (void);
105 static bool note_invalid_constants (rtx
, HOST_WIDE_INT
, int);
106 static unsigned long arm_compute_save_reg0_reg12_mask (void);
107 static unsigned long arm_compute_save_reg_mask (void);
108 static unsigned long arm_isr_value (tree
);
109 static unsigned long arm_compute_func_type (void);
110 static tree
arm_handle_fndecl_attribute (tree
*, tree
, tree
, int, bool *);
111 static tree
arm_handle_isr_attribute (tree
*, tree
, tree
, int, bool *);
112 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
113 static tree
arm_handle_notshared_attribute (tree
*, tree
, tree
, int, bool *);
115 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT
);
116 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT
);
117 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT
);
118 static int arm_comp_type_attributes (const_tree
, const_tree
);
119 static void arm_set_default_type_attributes (tree
);
120 static int arm_adjust_cost (rtx
, rtx
, rtx
, int);
121 static int count_insns_for_constant (HOST_WIDE_INT
, int);
122 static int arm_get_strip_length (int);
123 static bool arm_function_ok_for_sibcall (tree
, tree
);
124 static void arm_internal_label (FILE *, const char *, unsigned long);
125 static void arm_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
, HOST_WIDE_INT
,
127 static int arm_rtx_costs_1 (rtx
, enum rtx_code
, enum rtx_code
);
128 static bool arm_size_rtx_costs (rtx
, int, int, int *);
129 static bool arm_slowmul_rtx_costs (rtx
, int, int, int *);
130 static bool arm_fastmul_rtx_costs (rtx
, int, int, int *);
131 static bool arm_xscale_rtx_costs (rtx
, int, int, int *);
132 static bool arm_9e_rtx_costs (rtx
, int, int, int *);
133 static int arm_address_cost (rtx
);
134 static bool arm_memory_load_p (rtx
);
135 static bool arm_cirrus_insn_p (rtx
);
136 static void cirrus_reorg (rtx
);
137 static void arm_init_builtins (void);
138 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
139 static void arm_init_iwmmxt_builtins (void);
140 static rtx
safe_vector_operand (rtx
, enum machine_mode
);
141 static rtx
arm_expand_binop_builtin (enum insn_code
, tree
, rtx
);
142 static rtx
arm_expand_unop_builtin (enum insn_code
, tree
, rtx
, int);
143 static rtx
arm_expand_builtin (tree
, rtx
, rtx
, enum machine_mode
, int);
144 static void emit_constant_insn (rtx cond
, rtx pattern
);
145 static rtx
emit_set_insn (rtx
, rtx
);
146 static int arm_arg_partial_bytes (CUMULATIVE_ARGS
*, enum machine_mode
,
149 #ifdef OBJECT_FORMAT_ELF
150 static void arm_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
151 static void arm_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
154 static void arm_encode_section_info (tree
, rtx
, int);
157 static void arm_file_end (void);
158 static void arm_file_start (void);
160 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS
*, enum machine_mode
,
162 static bool arm_pass_by_reference (CUMULATIVE_ARGS
*,
163 enum machine_mode
, const_tree
, bool);
164 static bool arm_promote_prototypes (const_tree
);
165 static bool arm_default_short_enums (void);
166 static bool arm_align_anon_bitfield (void);
167 static bool arm_return_in_msb (const_tree
);
168 static bool arm_must_pass_in_stack (enum machine_mode
, const_tree
);
169 static bool arm_return_in_memory (const_tree
, const_tree
);
170 #ifdef TARGET_UNWIND_INFO
171 static void arm_unwind_emit (FILE *, rtx
);
172 static bool arm_output_ttype (rtx
);
174 static void arm_dwarf_handle_frame_unspec (const char *, rtx
, int);
176 static tree
arm_cxx_guard_type (void);
177 static bool arm_cxx_guard_mask_bit (void);
178 static tree
arm_get_cookie_size (tree
);
179 static bool arm_cookie_has_size (void);
180 static bool arm_cxx_cdtor_returns_this (void);
181 static bool arm_cxx_key_method_may_be_inline (void);
182 static void arm_cxx_determine_class_data_visibility (tree
);
183 static bool arm_cxx_class_data_always_comdat (void);
184 static bool arm_cxx_use_aeabi_atexit (void);
185 static void arm_init_libfuncs (void);
186 static bool arm_handle_option (size_t, const char *, int);
187 static void arm_target_help (void);
188 static unsigned HOST_WIDE_INT
arm_shift_truncation_mask (enum machine_mode
);
189 static bool arm_cannot_copy_insn_p (rtx
);
190 static bool arm_tls_symbol_p (rtx x
);
191 static int arm_issue_rate (void);
192 static void arm_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
193 static bool arm_allocate_stack_slots_for_args (void);
196 /* Initialize the GCC target structure. */
197 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
198 #undef TARGET_MERGE_DECL_ATTRIBUTES
199 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
202 #undef TARGET_ATTRIBUTE_TABLE
203 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
205 #undef TARGET_ASM_FILE_START
206 #define TARGET_ASM_FILE_START arm_file_start
207 #undef TARGET_ASM_FILE_END
208 #define TARGET_ASM_FILE_END arm_file_end
210 #undef TARGET_ASM_ALIGNED_SI_OP
211 #define TARGET_ASM_ALIGNED_SI_OP NULL
212 #undef TARGET_ASM_INTEGER
213 #define TARGET_ASM_INTEGER arm_assemble_integer
215 #undef TARGET_ASM_FUNCTION_PROLOGUE
216 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
218 #undef TARGET_ASM_FUNCTION_EPILOGUE
219 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
221 #undef TARGET_DEFAULT_TARGET_FLAGS
222 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
223 #undef TARGET_HANDLE_OPTION
224 #define TARGET_HANDLE_OPTION arm_handle_option
226 #define TARGET_HELP arm_target_help
228 #undef TARGET_COMP_TYPE_ATTRIBUTES
229 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
231 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
232 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
234 #undef TARGET_SCHED_ADJUST_COST
235 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
237 #undef TARGET_ENCODE_SECTION_INFO
239 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
241 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
244 #undef TARGET_STRIP_NAME_ENCODING
245 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
247 #undef TARGET_ASM_INTERNAL_LABEL
248 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
250 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
251 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
253 #undef TARGET_ASM_OUTPUT_MI_THUNK
254 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
255 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
256 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
258 /* This will be overridden in arm_override_options. */
259 #undef TARGET_RTX_COSTS
260 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
261 #undef TARGET_ADDRESS_COST
262 #define TARGET_ADDRESS_COST arm_address_cost
264 #undef TARGET_SHIFT_TRUNCATION_MASK
265 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
266 #undef TARGET_VECTOR_MODE_SUPPORTED_P
267 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
269 #undef TARGET_MACHINE_DEPENDENT_REORG
270 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
272 #undef TARGET_INIT_BUILTINS
273 #define TARGET_INIT_BUILTINS arm_init_builtins
274 #undef TARGET_EXPAND_BUILTIN
275 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
277 #undef TARGET_INIT_LIBFUNCS
278 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
280 #undef TARGET_PROMOTE_FUNCTION_ARGS
281 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
282 #undef TARGET_PROMOTE_FUNCTION_RETURN
283 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
284 #undef TARGET_PROMOTE_PROTOTYPES
285 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
286 #undef TARGET_PASS_BY_REFERENCE
287 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
288 #undef TARGET_ARG_PARTIAL_BYTES
289 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
291 #undef TARGET_SETUP_INCOMING_VARARGS
292 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
294 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
295 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
297 #undef TARGET_DEFAULT_SHORT_ENUMS
298 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
300 #undef TARGET_ALIGN_ANON_BITFIELD
301 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
303 #undef TARGET_NARROW_VOLATILE_BITFIELD
304 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
306 #undef TARGET_CXX_GUARD_TYPE
307 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
309 #undef TARGET_CXX_GUARD_MASK_BIT
310 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
312 #undef TARGET_CXX_GET_COOKIE_SIZE
313 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
315 #undef TARGET_CXX_COOKIE_HAS_SIZE
316 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
318 #undef TARGET_CXX_CDTOR_RETURNS_THIS
319 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
321 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
322 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
324 #undef TARGET_CXX_USE_AEABI_ATEXIT
325 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
327 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
328 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
329 arm_cxx_determine_class_data_visibility
331 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
332 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
334 #undef TARGET_RETURN_IN_MSB
335 #define TARGET_RETURN_IN_MSB arm_return_in_msb
337 #undef TARGET_RETURN_IN_MEMORY
338 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
340 #undef TARGET_MUST_PASS_IN_STACK
341 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
343 #ifdef TARGET_UNWIND_INFO
344 #undef TARGET_UNWIND_EMIT
345 #define TARGET_UNWIND_EMIT arm_unwind_emit
347 /* EABI unwinding tables use a different format for the typeinfo tables. */
348 #undef TARGET_ASM_TTYPE
349 #define TARGET_ASM_TTYPE arm_output_ttype
351 #undef TARGET_ARM_EABI_UNWINDER
352 #define TARGET_ARM_EABI_UNWINDER true
353 #endif /* TARGET_UNWIND_INFO */
355 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
356 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
358 #undef TARGET_CANNOT_COPY_INSN_P
359 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
362 #undef TARGET_HAVE_TLS
363 #define TARGET_HAVE_TLS true
366 #undef TARGET_CANNOT_FORCE_CONST_MEM
367 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
369 #undef TARGET_SCHED_ISSUE_RATE
370 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
372 #undef TARGET_MANGLE_TYPE
373 #define TARGET_MANGLE_TYPE arm_mangle_type
376 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
377 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
380 struct gcc_target targetm
= TARGET_INITIALIZER
;
382 /* Obstack for minipool constant handling. */
383 static struct obstack minipool_obstack
;
384 static char * minipool_startobj
;
386 /* The maximum number of insns skipped which
387 will be conditionalised if possible. */
388 static int max_insns_skipped
= 5;
390 extern FILE * asm_out_file
;
392 /* True if we are currently building a constant table. */
393 int making_const_table
;
395 /* Define the information needed to generate branch insns. This is
396 stored from the compare operation. */
397 rtx arm_compare_op0
, arm_compare_op1
;
399 /* The processor for which instructions should be scheduled. */
400 enum processor_type arm_tune
= arm_none
;
402 /* The default processor used if not overridden by commandline. */
403 static enum processor_type arm_default_cpu
= arm_none
;
405 /* Which floating point model to use. */
406 enum arm_fp_model arm_fp_model
;
408 /* Which floating point hardware is available. */
409 enum fputype arm_fpu_arch
;
411 /* Which floating point hardware to schedule for. */
412 enum fputype arm_fpu_tune
;
414 /* Whether to use floating point hardware. */
415 enum float_abi_type arm_float_abi
;
417 /* Which ABI to use. */
418 enum arm_abi_type arm_abi
;
420 /* Which thread pointer model to use. */
421 enum arm_tp_type target_thread_pointer
= TP_AUTO
;
423 /* Used to parse -mstructure_size_boundary command line option. */
424 int arm_structure_size_boundary
= DEFAULT_STRUCTURE_SIZE_BOUNDARY
;
426 /* Used for Thumb call_via trampolines. */
427 rtx thumb_call_via_label
[14];
428 static int thumb_call_reg_needed
;
430 /* Bit values used to identify processor capabilities. */
431 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
432 #define FL_ARCH3M (1 << 1) /* Extended multiply */
433 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
434 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
435 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
436 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
437 #define FL_THUMB (1 << 6) /* Thumb aware */
438 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
439 #define FL_STRONG (1 << 8) /* StrongARM */
440 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
441 #define FL_XSCALE (1 << 10) /* XScale */
442 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
443 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
444 media instructions. */
445 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
446 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
447 Note: ARM6 & 7 derivatives only. */
448 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
449 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
450 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
452 #define FL_DIV (1 << 18) /* Hardware divide. */
453 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
454 #define FL_NEON (1 << 20) /* Neon instructions. */
456 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
458 #define FL_FOR_ARCH2 FL_NOTM
459 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
460 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
461 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
462 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
463 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
464 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
465 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
466 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
467 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
468 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
469 #define FL_FOR_ARCH6J FL_FOR_ARCH6
470 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
471 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
472 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
473 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
474 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
475 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
476 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
477 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
478 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
480 /* The bits in this mask specify which
481 instructions we are allowed to generate. */
482 static unsigned long insn_flags
= 0;
484 /* The bits in this mask specify which instruction scheduling options should
486 static unsigned long tune_flags
= 0;
488 /* The following are used in the arm.md file as equivalents to bits
489 in the above two flag variables. */
491 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
494 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
497 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
500 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
503 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
506 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
509 /* Nonzero if this chip supports the ARM 6K extensions. */
512 /* Nonzero if instructions not present in the 'M' profile can be used. */
513 int arm_arch_notm
= 0;
515 /* Nonzero if this chip can benefit from load scheduling. */
516 int arm_ld_sched
= 0;
518 /* Nonzero if this chip is a StrongARM. */
519 int arm_tune_strongarm
= 0;
521 /* Nonzero if this chip is a Cirrus variant. */
522 int arm_arch_cirrus
= 0;
524 /* Nonzero if this chip supports Intel Wireless MMX technology. */
525 int arm_arch_iwmmxt
= 0;
527 /* Nonzero if this chip is an XScale. */
528 int arm_arch_xscale
= 0;
530 /* Nonzero if tuning for XScale */
531 int arm_tune_xscale
= 0;
533 /* Nonzero if we want to tune for stores that access the write-buffer.
534 This typically means an ARM6 or ARM7 with MMU or MPU. */
535 int arm_tune_wbuf
= 0;
537 /* Nonzero if generating Thumb instructions. */
540 /* Nonzero if we should define __THUMB_INTERWORK__ in the
542 XXX This is a bit of a hack, it's intended to help work around
543 problems in GLD which doesn't understand that armv5t code is
544 interworking clean. */
545 int arm_cpp_interwork
= 0;
547 /* Nonzero if chip supports Thumb 2. */
550 /* Nonzero if chip supports integer division instruction. */
553 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
554 must report the mode of the memory reference from PRINT_OPERAND to
555 PRINT_OPERAND_ADDRESS. */
556 enum machine_mode output_memory_reference_mode
;
558 /* The register number to be used for the PIC offset register. */
559 unsigned arm_pic_register
= INVALID_REGNUM
;
561 /* Set to 1 when a return insn is output, this means that the epilogue
563 int return_used_this_function
;
565 /* Set to 1 after arm_reorg has started. Reset to start at the start of
566 the next function. */
567 static int after_arm_reorg
= 0;
569 /* The maximum number of insns to be used when loading a constant. */
570 static int arm_constant_limit
= 3;
572 /* For an explanation of these variables, see final_prescan_insn below. */
574 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
575 enum arm_cond_code arm_current_cc
;
577 int arm_target_label
;
578 /* The number of conditionally executed insns, including the current insn. */
579 int arm_condexec_count
= 0;
580 /* A bitmask specifying the patterns for the IT block.
581 Zero means do not output an IT block before this insn. */
582 int arm_condexec_mask
= 0;
583 /* The number of bits used in arm_condexec_mask. */
584 int arm_condexec_masklen
= 0;
586 /* The condition codes of the ARM, and the inverse function. */
587 static const char * const arm_condition_codes
[] =
589 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
590 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
593 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
594 #define streq(string1, string2) (strcmp (string1, string2) == 0)
596 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
597 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
598 | (1 << PIC_OFFSET_TABLE_REGNUM)))
600 /* Initialization code. */
604 const char *const name
;
605 enum processor_type core
;
607 const unsigned long flags
;
608 bool (* rtx_costs
) (rtx
, int, int, int *);
611 /* Not all of these give usefully different compilation alternatives,
612 but there is no simple way of generalizing them. */
613 static const struct processors all_cores
[] =
616 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
617 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
618 #include "arm-cores.def"
620 {NULL
, arm_none
, NULL
, 0, NULL
}
623 static const struct processors all_architectures
[] =
625 /* ARM Architectures */
626 /* We don't specify rtx_costs here as it will be figured out
629 {"armv2", arm2
, "2", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH2
, NULL
},
630 {"armv2a", arm2
, "2", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH2
, NULL
},
631 {"armv3", arm6
, "3", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH3
, NULL
},
632 {"armv3m", arm7m
, "3M", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH3M
, NULL
},
633 {"armv4", arm7tdmi
, "4", FL_CO_PROC
| FL_MODE26
| FL_FOR_ARCH4
, NULL
},
634 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
635 implementations that support it, so we will leave it out for now. */
636 {"armv4t", arm7tdmi
, "4T", FL_CO_PROC
| FL_FOR_ARCH4T
, NULL
},
637 {"armv5", arm10tdmi
, "5", FL_CO_PROC
| FL_FOR_ARCH5
, NULL
},
638 {"armv5t", arm10tdmi
, "5T", FL_CO_PROC
| FL_FOR_ARCH5T
, NULL
},
639 {"armv5e", arm1026ejs
, "5E", FL_CO_PROC
| FL_FOR_ARCH5E
, NULL
},
640 {"armv5te", arm1026ejs
, "5TE", FL_CO_PROC
| FL_FOR_ARCH5TE
, NULL
},
641 {"armv6", arm1136js
, "6", FL_CO_PROC
| FL_FOR_ARCH6
, NULL
},
642 {"armv6j", arm1136js
, "6J", FL_CO_PROC
| FL_FOR_ARCH6J
, NULL
},
643 {"armv6k", mpcore
, "6K", FL_CO_PROC
| FL_FOR_ARCH6K
, NULL
},
644 {"armv6z", arm1176jzs
, "6Z", FL_CO_PROC
| FL_FOR_ARCH6Z
, NULL
},
645 {"armv6zk", arm1176jzs
, "6ZK", FL_CO_PROC
| FL_FOR_ARCH6ZK
, NULL
},
646 {"armv6t2", arm1156t2s
, "6T2", FL_CO_PROC
| FL_FOR_ARCH6T2
, NULL
},
647 {"armv6-m", cortexm1
, "6M", FL_FOR_ARCH6M
, NULL
},
648 {"armv7", cortexa8
, "7", FL_CO_PROC
| FL_FOR_ARCH7
, NULL
},
649 {"armv7-a", cortexa8
, "7A", FL_CO_PROC
| FL_FOR_ARCH7A
, NULL
},
650 {"armv7-r", cortexr4
, "7R", FL_CO_PROC
| FL_FOR_ARCH7R
, NULL
},
651 {"armv7-m", cortexm3
, "7M", FL_CO_PROC
| FL_FOR_ARCH7M
, NULL
},
652 {"ep9312", ep9312
, "4T", FL_LDSCHED
| FL_CIRRUS
| FL_FOR_ARCH4
, NULL
},
653 {"iwmmxt", iwmmxt
, "5TE", FL_LDSCHED
| FL_STRONG
| FL_FOR_ARCH5TE
| FL_XSCALE
| FL_IWMMXT
, NULL
},
654 {NULL
, arm_none
, NULL
, 0 , NULL
}
657 struct arm_cpu_select
661 const struct processors
* processors
;
664 /* This is a magic structure. The 'string' field is magically filled in
665 with a pointer to the value specified by the user on the command line
666 assuming that the user has specified such a value. */
668 static struct arm_cpu_select arm_select
[] =
670 /* string name processors */
671 { NULL
, "-mcpu=", all_cores
},
672 { NULL
, "-march=", all_architectures
},
673 { NULL
, "-mtune=", all_cores
}
676 /* Defines representing the indexes into the above table. */
677 #define ARM_OPT_SET_CPU 0
678 #define ARM_OPT_SET_ARCH 1
679 #define ARM_OPT_SET_TUNE 2
681 /* The name of the preprocessor macro to define for this architecture. */
683 char arm_arch_name
[] = "__ARM_ARCH_0UNK__";
692 /* Available values for -mfpu=. */
694 static const struct fpu_desc all_fpus
[] =
696 {"fpa", FPUTYPE_FPA
},
697 {"fpe2", FPUTYPE_FPA_EMU2
},
698 {"fpe3", FPUTYPE_FPA_EMU2
},
699 {"maverick", FPUTYPE_MAVERICK
},
700 {"vfp", FPUTYPE_VFP
},
701 {"vfp3", FPUTYPE_VFP3
},
702 {"neon", FPUTYPE_NEON
}
706 /* Floating point models used by the different hardware.
707 See fputype in arm.h. */
709 static const enum fputype fp_model_for_fpu
[] =
711 /* No FP hardware. */
712 ARM_FP_MODEL_UNKNOWN
, /* FPUTYPE_NONE */
713 ARM_FP_MODEL_FPA
, /* FPUTYPE_FPA */
714 ARM_FP_MODEL_FPA
, /* FPUTYPE_FPA_EMU2 */
715 ARM_FP_MODEL_FPA
, /* FPUTYPE_FPA_EMU3 */
716 ARM_FP_MODEL_MAVERICK
, /* FPUTYPE_MAVERICK */
717 ARM_FP_MODEL_VFP
, /* FPUTYPE_VFP */
718 ARM_FP_MODEL_VFP
, /* FPUTYPE_VFP3 */
719 ARM_FP_MODEL_VFP
/* FPUTYPE_NEON */
726 enum float_abi_type abi_type
;
730 /* Available values for -mfloat-abi=. */
732 static const struct float_abi all_float_abis
[] =
734 {"soft", ARM_FLOAT_ABI_SOFT
},
735 {"softfp", ARM_FLOAT_ABI_SOFTFP
},
736 {"hard", ARM_FLOAT_ABI_HARD
}
743 enum arm_abi_type abi_type
;
747 /* Available values for -mabi=. */
749 static const struct abi_name arm_all_abis
[] =
751 {"apcs-gnu", ARM_ABI_APCS
},
752 {"atpcs", ARM_ABI_ATPCS
},
753 {"aapcs", ARM_ABI_AAPCS
},
754 {"iwmmxt", ARM_ABI_IWMMXT
},
755 {"aapcs-linux", ARM_ABI_AAPCS_LINUX
}
758 /* Supported TLS relocations. */
768 /* Emit an insn that's a simple single-set. Both the operands must be known
771 emit_set_insn (rtx x
, rtx y
)
773 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
776 /* Return the number of bits set in VALUE. */
778 bit_count (unsigned long value
)
780 unsigned long count
= 0;
785 value
&= value
- 1; /* Clear the least-significant set bit. */
791 /* Set up library functions unique to ARM. */
794 arm_init_libfuncs (void)
796 /* There are no special library functions unless we are using the
801 /* The functions below are described in Section 4 of the "Run-Time
802 ABI for the ARM architecture", Version 1.0. */
804 /* Double-precision floating-point arithmetic. Table 2. */
805 set_optab_libfunc (add_optab
, DFmode
, "__aeabi_dadd");
806 set_optab_libfunc (sdiv_optab
, DFmode
, "__aeabi_ddiv");
807 set_optab_libfunc (smul_optab
, DFmode
, "__aeabi_dmul");
808 set_optab_libfunc (neg_optab
, DFmode
, "__aeabi_dneg");
809 set_optab_libfunc (sub_optab
, DFmode
, "__aeabi_dsub");
811 /* Double-precision comparisons. Table 3. */
812 set_optab_libfunc (eq_optab
, DFmode
, "__aeabi_dcmpeq");
813 set_optab_libfunc (ne_optab
, DFmode
, NULL
);
814 set_optab_libfunc (lt_optab
, DFmode
, "__aeabi_dcmplt");
815 set_optab_libfunc (le_optab
, DFmode
, "__aeabi_dcmple");
816 set_optab_libfunc (ge_optab
, DFmode
, "__aeabi_dcmpge");
817 set_optab_libfunc (gt_optab
, DFmode
, "__aeabi_dcmpgt");
818 set_optab_libfunc (unord_optab
, DFmode
, "__aeabi_dcmpun");
820 /* Single-precision floating-point arithmetic. Table 4. */
821 set_optab_libfunc (add_optab
, SFmode
, "__aeabi_fadd");
822 set_optab_libfunc (sdiv_optab
, SFmode
, "__aeabi_fdiv");
823 set_optab_libfunc (smul_optab
, SFmode
, "__aeabi_fmul");
824 set_optab_libfunc (neg_optab
, SFmode
, "__aeabi_fneg");
825 set_optab_libfunc (sub_optab
, SFmode
, "__aeabi_fsub");
827 /* Single-precision comparisons. Table 5. */
828 set_optab_libfunc (eq_optab
, SFmode
, "__aeabi_fcmpeq");
829 set_optab_libfunc (ne_optab
, SFmode
, NULL
);
830 set_optab_libfunc (lt_optab
, SFmode
, "__aeabi_fcmplt");
831 set_optab_libfunc (le_optab
, SFmode
, "__aeabi_fcmple");
832 set_optab_libfunc (ge_optab
, SFmode
, "__aeabi_fcmpge");
833 set_optab_libfunc (gt_optab
, SFmode
, "__aeabi_fcmpgt");
834 set_optab_libfunc (unord_optab
, SFmode
, "__aeabi_fcmpun");
836 /* Floating-point to integer conversions. Table 6. */
837 set_conv_libfunc (sfix_optab
, SImode
, DFmode
, "__aeabi_d2iz");
838 set_conv_libfunc (ufix_optab
, SImode
, DFmode
, "__aeabi_d2uiz");
839 set_conv_libfunc (sfix_optab
, DImode
, DFmode
, "__aeabi_d2lz");
840 set_conv_libfunc (ufix_optab
, DImode
, DFmode
, "__aeabi_d2ulz");
841 set_conv_libfunc (sfix_optab
, SImode
, SFmode
, "__aeabi_f2iz");
842 set_conv_libfunc (ufix_optab
, SImode
, SFmode
, "__aeabi_f2uiz");
843 set_conv_libfunc (sfix_optab
, DImode
, SFmode
, "__aeabi_f2lz");
844 set_conv_libfunc (ufix_optab
, DImode
, SFmode
, "__aeabi_f2ulz");
846 /* Conversions between floating types. Table 7. */
847 set_conv_libfunc (trunc_optab
, SFmode
, DFmode
, "__aeabi_d2f");
848 set_conv_libfunc (sext_optab
, DFmode
, SFmode
, "__aeabi_f2d");
850 /* Integer to floating-point conversions. Table 8. */
851 set_conv_libfunc (sfloat_optab
, DFmode
, SImode
, "__aeabi_i2d");
852 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__aeabi_ui2d");
853 set_conv_libfunc (sfloat_optab
, DFmode
, DImode
, "__aeabi_l2d");
854 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__aeabi_ul2d");
855 set_conv_libfunc (sfloat_optab
, SFmode
, SImode
, "__aeabi_i2f");
856 set_conv_libfunc (ufloat_optab
, SFmode
, SImode
, "__aeabi_ui2f");
857 set_conv_libfunc (sfloat_optab
, SFmode
, DImode
, "__aeabi_l2f");
858 set_conv_libfunc (ufloat_optab
, SFmode
, DImode
, "__aeabi_ul2f");
860 /* Long long. Table 9. */
861 set_optab_libfunc (smul_optab
, DImode
, "__aeabi_lmul");
862 set_optab_libfunc (sdivmod_optab
, DImode
, "__aeabi_ldivmod");
863 set_optab_libfunc (udivmod_optab
, DImode
, "__aeabi_uldivmod");
864 set_optab_libfunc (ashl_optab
, DImode
, "__aeabi_llsl");
865 set_optab_libfunc (lshr_optab
, DImode
, "__aeabi_llsr");
866 set_optab_libfunc (ashr_optab
, DImode
, "__aeabi_lasr");
867 set_optab_libfunc (cmp_optab
, DImode
, "__aeabi_lcmp");
868 set_optab_libfunc (ucmp_optab
, DImode
, "__aeabi_ulcmp");
870 /* Integer (32/32->32) division. \S 4.3.1. */
871 set_optab_libfunc (sdivmod_optab
, SImode
, "__aeabi_idivmod");
872 set_optab_libfunc (udivmod_optab
, SImode
, "__aeabi_uidivmod");
874 /* The divmod functions are designed so that they can be used for
875 plain division, even though they return both the quotient and the
876 remainder. The quotient is returned in the usual location (i.e.,
877 r0 for SImode, {r0, r1} for DImode), just as would be expected
878 for an ordinary division routine. Because the AAPCS calling
879 conventions specify that all of { r0, r1, r2, r3 } are
880 callee-saved registers, there is no need to tell the compiler
881 explicitly that those registers are clobbered by these
883 set_optab_libfunc (sdiv_optab
, DImode
, "__aeabi_ldivmod");
884 set_optab_libfunc (udiv_optab
, DImode
, "__aeabi_uldivmod");
886 /* For SImode division the ABI provides div-without-mod routines,
888 set_optab_libfunc (sdiv_optab
, SImode
, "__aeabi_idiv");
889 set_optab_libfunc (udiv_optab
, SImode
, "__aeabi_uidiv");
891 /* We don't have mod libcalls. Fortunately gcc knows how to use the
892 divmod libcalls instead. */
893 set_optab_libfunc (smod_optab
, DImode
, NULL
);
894 set_optab_libfunc (umod_optab
, DImode
, NULL
);
895 set_optab_libfunc (smod_optab
, SImode
, NULL
);
896 set_optab_libfunc (umod_optab
, SImode
, NULL
);
899 /* Implement TARGET_HANDLE_OPTION. */
902 arm_handle_option (size_t code
, const char *arg
, int value ATTRIBUTE_UNUSED
)
907 arm_select
[1].string
= arg
;
911 arm_select
[0].string
= arg
;
914 case OPT_mhard_float
:
915 target_float_abi_name
= "hard";
918 case OPT_msoft_float
:
919 target_float_abi_name
= "soft";
923 arm_select
[2].string
= arg
;
932 arm_target_help (void)
935 static int columns
= 0;
938 /* If we have not done so already, obtain the desired maximum width of
939 the output. Note - this is a duplication of the code at the start of
940 gcc/opts.c:print_specific_help() - the two copies should probably be
941 replaced by a single function. */
946 GET_ENVIRONMENT (p
, "COLUMNS");
949 int value
= atoi (p
);
956 /* Use a reasonable default. */
960 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
962 /* The - 2 is because we know that the last entry in the array is NULL. */
963 i
= ARRAY_SIZE (all_cores
) - 2;
965 printf (" %s", all_cores
[i
].name
);
966 remaining
= columns
- (strlen (all_cores
[i
].name
) + 4);
967 gcc_assert (remaining
>= 0);
971 int len
= strlen (all_cores
[i
].name
);
973 if (remaining
> len
+ 2)
975 printf (", %s", all_cores
[i
].name
);
976 remaining
-= len
+ 2;
982 printf ("\n %s", all_cores
[i
].name
);
983 remaining
= columns
- (len
+ 4);
987 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
989 i
= ARRAY_SIZE (all_architectures
) - 2;
992 printf (" %s", all_architectures
[i
].name
);
993 remaining
= columns
- (strlen (all_architectures
[i
].name
) + 4);
994 gcc_assert (remaining
>= 0);
998 int len
= strlen (all_architectures
[i
].name
);
1000 if (remaining
> len
+ 2)
1002 printf (", %s", all_architectures
[i
].name
);
1003 remaining
-= len
+ 2;
1009 printf ("\n %s", all_architectures
[i
].name
);
1010 remaining
= columns
- (len
+ 4);
1017 /* Fix up any incompatible options that the user has specified.
1018 This has now turned into a maze. */
1020 arm_override_options (void)
1023 enum processor_type target_arch_cpu
= arm_none
;
1025 /* Set up the flags based on the cpu/architecture selected by the user. */
1026 for (i
= ARRAY_SIZE (arm_select
); i
--;)
1028 struct arm_cpu_select
* ptr
= arm_select
+ i
;
1030 if (ptr
->string
!= NULL
&& ptr
->string
[0] != '\0')
1032 const struct processors
* sel
;
1034 for (sel
= ptr
->processors
; sel
->name
!= NULL
; sel
++)
1035 if (streq (ptr
->string
, sel
->name
))
1037 /* Set the architecture define. */
1038 if (i
!= ARM_OPT_SET_TUNE
)
1039 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", sel
->arch
);
1041 /* Determine the processor core for which we should
1042 tune code-generation. */
1043 if (/* -mcpu= is a sensible default. */
1044 i
== ARM_OPT_SET_CPU
1045 /* -mtune= overrides -mcpu= and -march=. */
1046 || i
== ARM_OPT_SET_TUNE
)
1047 arm_tune
= (enum processor_type
) (sel
- ptr
->processors
);
1049 /* Remember the CPU associated with this architecture.
1050 If no other option is used to set the CPU type,
1051 we'll use this to guess the most suitable tuning
1053 if (i
== ARM_OPT_SET_ARCH
)
1054 target_arch_cpu
= sel
->core
;
1056 if (i
!= ARM_OPT_SET_TUNE
)
1058 /* If we have been given an architecture and a processor
1059 make sure that they are compatible. We only generate
1060 a warning though, and we prefer the CPU over the
1062 if (insn_flags
!= 0 && (insn_flags
^ sel
->flags
))
1063 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1066 insn_flags
= sel
->flags
;
1072 if (sel
->name
== NULL
)
1073 error ("bad value (%s) for %s switch", ptr
->string
, ptr
->name
);
1077 /* Guess the tuning options from the architecture if necessary. */
1078 if (arm_tune
== arm_none
)
1079 arm_tune
= target_arch_cpu
;
1081 /* If the user did not specify a processor, choose one for them. */
1082 if (insn_flags
== 0)
1084 const struct processors
* sel
;
1085 unsigned int sought
;
1086 enum processor_type cpu
;
1088 cpu
= TARGET_CPU_DEFAULT
;
1089 if (cpu
== arm_none
)
1091 #ifdef SUBTARGET_CPU_DEFAULT
1092 /* Use the subtarget default CPU if none was specified by
1094 cpu
= SUBTARGET_CPU_DEFAULT
;
1096 /* Default to ARM6. */
1097 if (cpu
== arm_none
)
1100 sel
= &all_cores
[cpu
];
1102 insn_flags
= sel
->flags
;
1104 /* Now check to see if the user has specified some command line
1105 switch that require certain abilities from the cpu. */
1108 if (TARGET_INTERWORK
|| TARGET_THUMB
)
1110 sought
|= (FL_THUMB
| FL_MODE32
);
1112 /* There are no ARM processors that support both APCS-26 and
1113 interworking. Therefore we force FL_MODE26 to be removed
1114 from insn_flags here (if it was set), so that the search
1115 below will always be able to find a compatible processor. */
1116 insn_flags
&= ~FL_MODE26
;
1119 if (sought
!= 0 && ((sought
& insn_flags
) != sought
))
1121 /* Try to locate a CPU type that supports all of the abilities
1122 of the default CPU, plus the extra abilities requested by
1124 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1125 if ((sel
->flags
& sought
) == (sought
| insn_flags
))
1128 if (sel
->name
== NULL
)
1130 unsigned current_bit_count
= 0;
1131 const struct processors
* best_fit
= NULL
;
1133 /* Ideally we would like to issue an error message here
1134 saying that it was not possible to find a CPU compatible
1135 with the default CPU, but which also supports the command
1136 line options specified by the programmer, and so they
1137 ought to use the -mcpu=<name> command line option to
1138 override the default CPU type.
1140 If we cannot find a cpu that has both the
1141 characteristics of the default cpu and the given
1142 command line options we scan the array again looking
1143 for a best match. */
1144 for (sel
= all_cores
; sel
->name
!= NULL
; sel
++)
1145 if ((sel
->flags
& sought
) == sought
)
1149 count
= bit_count (sel
->flags
& insn_flags
);
1151 if (count
>= current_bit_count
)
1154 current_bit_count
= count
;
1158 gcc_assert (best_fit
);
1162 insn_flags
= sel
->flags
;
1164 sprintf (arm_arch_name
, "__ARM_ARCH_%s__", sel
->arch
);
1165 arm_default_cpu
= (enum processor_type
) (sel
- all_cores
);
1166 if (arm_tune
== arm_none
)
1167 arm_tune
= arm_default_cpu
;
1170 /* The processor for which we should tune should now have been
1172 gcc_assert (arm_tune
!= arm_none
);
1174 tune_flags
= all_cores
[(int)arm_tune
].flags
;
1176 targetm
.rtx_costs
= arm_size_rtx_costs
;
1178 targetm
.rtx_costs
= all_cores
[(int)arm_tune
].rtx_costs
;
1180 /* Make sure that the processor choice does not conflict with any of the
1181 other command line choices. */
1182 if (TARGET_ARM
&& !(insn_flags
& FL_NOTM
))
1183 error ("target CPU does not support ARM mode");
1185 if (TARGET_INTERWORK
&& !(insn_flags
& FL_THUMB
))
1187 warning (0, "target CPU does not support interworking" );
1188 target_flags
&= ~MASK_INTERWORK
;
1191 if (TARGET_THUMB
&& !(insn_flags
& FL_THUMB
))
1193 warning (0, "target CPU does not support THUMB instructions");
1194 target_flags
&= ~MASK_THUMB
;
1197 if (TARGET_APCS_FRAME
&& TARGET_THUMB
)
1199 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1200 target_flags
&= ~MASK_APCS_FRAME
;
1203 /* Callee super interworking implies thumb interworking. Adding
1204 this to the flags here simplifies the logic elsewhere. */
1205 if (TARGET_THUMB
&& TARGET_CALLEE_INTERWORKING
)
1206 target_flags
|= MASK_INTERWORK
;
1208 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1209 from here where no function is being compiled currently. */
1210 if ((TARGET_TPCS_FRAME
|| TARGET_TPCS_LEAF_FRAME
) && TARGET_ARM
)
1211 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1213 if (TARGET_ARM
&& TARGET_CALLEE_INTERWORKING
)
1214 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1216 if (TARGET_ARM
&& TARGET_CALLER_INTERWORKING
)
1217 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1219 if (TARGET_APCS_STACK
&& !TARGET_APCS_FRAME
)
1221 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1222 target_flags
|= MASK_APCS_FRAME
;
1225 if (TARGET_POKE_FUNCTION_NAME
)
1226 target_flags
|= MASK_APCS_FRAME
;
1228 if (TARGET_APCS_REENT
&& flag_pic
)
1229 error ("-fpic and -mapcs-reent are incompatible");
1231 if (TARGET_APCS_REENT
)
1232 warning (0, "APCS reentrant code not supported. Ignored");
1234 /* If this target is normally configured to use APCS frames, warn if they
1235 are turned off and debugging is turned on. */
1237 && write_symbols
!= NO_DEBUG
1238 && !TARGET_APCS_FRAME
1239 && (TARGET_DEFAULT
& MASK_APCS_FRAME
))
1240 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1242 if (TARGET_APCS_FLOAT
)
1243 warning (0, "passing floating point arguments in fp regs not yet supported");
1245 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1246 arm_arch3m
= (insn_flags
& FL_ARCH3M
) != 0;
1247 arm_arch4
= (insn_flags
& FL_ARCH4
) != 0;
1248 arm_arch4t
= arm_arch4
& ((insn_flags
& FL_THUMB
) != 0);
1249 arm_arch5
= (insn_flags
& FL_ARCH5
) != 0;
1250 arm_arch5e
= (insn_flags
& FL_ARCH5E
) != 0;
1251 arm_arch6
= (insn_flags
& FL_ARCH6
) != 0;
1252 arm_arch6k
= (insn_flags
& FL_ARCH6K
) != 0;
1253 arm_arch_notm
= (insn_flags
& FL_NOTM
) != 0;
1254 arm_arch_thumb2
= (insn_flags
& FL_THUMB2
) != 0;
1255 arm_arch_xscale
= (insn_flags
& FL_XSCALE
) != 0;
1256 arm_arch_cirrus
= (insn_flags
& FL_CIRRUS
) != 0;
1258 arm_ld_sched
= (tune_flags
& FL_LDSCHED
) != 0;
1259 arm_tune_strongarm
= (tune_flags
& FL_STRONG
) != 0;
1260 thumb_code
= (TARGET_ARM
== 0);
1261 arm_tune_wbuf
= (tune_flags
& FL_WBUF
) != 0;
1262 arm_tune_xscale
= (tune_flags
& FL_XSCALE
) != 0;
1263 arm_arch_iwmmxt
= (insn_flags
& FL_IWMMXT
) != 0;
1264 arm_arch_hwdiv
= (insn_flags
& FL_DIV
) != 0;
1266 /* V5 code we generate is completely interworking capable, so we turn off
1267 TARGET_INTERWORK here to avoid many tests later on. */
1269 /* XXX However, we must pass the right pre-processor defines to CPP
1270 or GLD can get confused. This is a hack. */
1271 if (TARGET_INTERWORK
)
1272 arm_cpp_interwork
= 1;
1275 target_flags
&= ~MASK_INTERWORK
;
1277 if (target_abi_name
)
1279 for (i
= 0; i
< ARRAY_SIZE (arm_all_abis
); i
++)
1281 if (streq (arm_all_abis
[i
].name
, target_abi_name
))
1283 arm_abi
= arm_all_abis
[i
].abi_type
;
1287 if (i
== ARRAY_SIZE (arm_all_abis
))
1288 error ("invalid ABI option: -mabi=%s", target_abi_name
);
1291 arm_abi
= ARM_DEFAULT_ABI
;
1293 if (TARGET_IWMMXT
&& !ARM_DOUBLEWORD_ALIGN
)
1294 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1296 if (TARGET_IWMMXT_ABI
&& !TARGET_IWMMXT
)
1297 error ("iwmmxt abi requires an iwmmxt capable cpu");
1299 arm_fp_model
= ARM_FP_MODEL_UNKNOWN
;
1300 if (target_fpu_name
== NULL
&& target_fpe_name
!= NULL
)
1302 if (streq (target_fpe_name
, "2"))
1303 target_fpu_name
= "fpe2";
1304 else if (streq (target_fpe_name
, "3"))
1305 target_fpu_name
= "fpe3";
1307 error ("invalid floating point emulation option: -mfpe=%s",
1310 if (target_fpu_name
!= NULL
)
1312 /* The user specified a FPU. */
1313 for (i
= 0; i
< ARRAY_SIZE (all_fpus
); i
++)
1315 if (streq (all_fpus
[i
].name
, target_fpu_name
))
1317 arm_fpu_arch
= all_fpus
[i
].fpu
;
1318 arm_fpu_tune
= arm_fpu_arch
;
1319 arm_fp_model
= fp_model_for_fpu
[arm_fpu_arch
];
1323 if (arm_fp_model
== ARM_FP_MODEL_UNKNOWN
)
1324 error ("invalid floating point option: -mfpu=%s", target_fpu_name
);
1328 #ifdef FPUTYPE_DEFAULT
1329 /* Use the default if it is specified for this platform. */
1330 arm_fpu_arch
= FPUTYPE_DEFAULT
;
1331 arm_fpu_tune
= FPUTYPE_DEFAULT
;
1333 /* Pick one based on CPU type. */
1334 /* ??? Some targets assume FPA is the default.
1335 if ((insn_flags & FL_VFP) != 0)
1336 arm_fpu_arch = FPUTYPE_VFP;
1339 if (arm_arch_cirrus
)
1340 arm_fpu_arch
= FPUTYPE_MAVERICK
;
1342 arm_fpu_arch
= FPUTYPE_FPA_EMU2
;
1344 if (tune_flags
& FL_CO_PROC
&& arm_fpu_arch
== FPUTYPE_FPA_EMU2
)
1345 arm_fpu_tune
= FPUTYPE_FPA
;
1347 arm_fpu_tune
= arm_fpu_arch
;
1348 arm_fp_model
= fp_model_for_fpu
[arm_fpu_arch
];
1349 gcc_assert (arm_fp_model
!= ARM_FP_MODEL_UNKNOWN
);
1352 if (target_float_abi_name
!= NULL
)
1354 /* The user specified a FP ABI. */
1355 for (i
= 0; i
< ARRAY_SIZE (all_float_abis
); i
++)
1357 if (streq (all_float_abis
[i
].name
, target_float_abi_name
))
1359 arm_float_abi
= all_float_abis
[i
].abi_type
;
1363 if (i
== ARRAY_SIZE (all_float_abis
))
1364 error ("invalid floating point abi: -mfloat-abi=%s",
1365 target_float_abi_name
);
1368 arm_float_abi
= TARGET_DEFAULT_FLOAT_ABI
;
1370 if (arm_float_abi
== ARM_FLOAT_ABI_HARD
&& TARGET_VFP
)
1371 sorry ("-mfloat-abi=hard and VFP");
1373 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1374 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1375 will ever exist. GCC makes no attempt to support this combination. */
1376 if (TARGET_IWMMXT
&& !TARGET_SOFT_FLOAT
)
1377 sorry ("iWMMXt and hardware floating point");
1379 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1380 if (TARGET_THUMB2
&& TARGET_IWMMXT
)
1381 sorry ("Thumb-2 iWMMXt");
1383 /* If soft-float is specified then don't use FPU. */
1384 if (TARGET_SOFT_FLOAT
)
1385 arm_fpu_arch
= FPUTYPE_NONE
;
1387 /* For arm2/3 there is no need to do any scheduling if there is only
1388 a floating point emulator, or we are doing software floating-point. */
1389 if ((TARGET_SOFT_FLOAT
1390 || arm_fpu_tune
== FPUTYPE_FPA_EMU2
1391 || arm_fpu_tune
== FPUTYPE_FPA_EMU3
)
1392 && (tune_flags
& FL_MODE32
) == 0)
1393 flag_schedule_insns
= flag_schedule_insns_after_reload
= 0;
1395 if (target_thread_switch
)
1397 if (strcmp (target_thread_switch
, "soft") == 0)
1398 target_thread_pointer
= TP_SOFT
;
1399 else if (strcmp (target_thread_switch
, "auto") == 0)
1400 target_thread_pointer
= TP_AUTO
;
1401 else if (strcmp (target_thread_switch
, "cp15") == 0)
1402 target_thread_pointer
= TP_CP15
;
1404 error ("invalid thread pointer option: -mtp=%s", target_thread_switch
);
1407 /* Use the cp15 method if it is available. */
1408 if (target_thread_pointer
== TP_AUTO
)
1410 if (arm_arch6k
&& !TARGET_THUMB
)
1411 target_thread_pointer
= TP_CP15
;
1413 target_thread_pointer
= TP_SOFT
;
1416 if (TARGET_HARD_TP
&& TARGET_THUMB1
)
1417 error ("can not use -mtp=cp15 with 16-bit Thumb");
1419 /* Override the default structure alignment for AAPCS ABI. */
1420 if (TARGET_AAPCS_BASED
)
1421 arm_structure_size_boundary
= 8;
1423 if (structure_size_string
!= NULL
)
1425 int size
= strtol (structure_size_string
, NULL
, 0);
1427 if (size
== 8 || size
== 32
1428 || (ARM_DOUBLEWORD_ALIGN
&& size
== 64))
1429 arm_structure_size_boundary
= size
;
1431 warning (0, "structure size boundary can only be set to %s",
1432 ARM_DOUBLEWORD_ALIGN
? "8, 32 or 64": "8 or 32");
1435 if (!TARGET_ARM
&& TARGET_VXWORKS_RTP
&& flag_pic
)
1437 error ("RTP PIC is incompatible with Thumb");
1441 /* If stack checking is disabled, we can use r10 as the PIC register,
1442 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1443 if (flag_pic
&& TARGET_SINGLE_PIC_BASE
)
1445 if (TARGET_VXWORKS_RTP
)
1446 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1447 arm_pic_register
= (TARGET_APCS_STACK
|| TARGET_AAPCS_BASED
) ? 9 : 10;
1450 if (flag_pic
&& TARGET_VXWORKS_RTP
)
1451 arm_pic_register
= 9;
1453 if (arm_pic_register_string
!= NULL
)
1455 int pic_register
= decode_reg_name (arm_pic_register_string
);
1458 warning (0, "-mpic-register= is useless without -fpic");
1460 /* Prevent the user from choosing an obviously stupid PIC register. */
1461 else if (pic_register
< 0 || call_used_regs
[pic_register
]
1462 || pic_register
== HARD_FRAME_POINTER_REGNUM
1463 || pic_register
== STACK_POINTER_REGNUM
1464 || pic_register
>= PC_REGNUM
1465 || (TARGET_VXWORKS_RTP
1466 && (unsigned int) pic_register
!= arm_pic_register
))
1467 error ("unable to use '%s' for PIC register", arm_pic_register_string
);
1469 arm_pic_register
= pic_register
;
1472 /* ??? We might want scheduling for thumb2. */
1473 if (TARGET_THUMB
&& flag_schedule_insns
)
1475 /* Don't warn since it's on by default in -O2. */
1476 flag_schedule_insns
= 0;
1481 arm_constant_limit
= 1;
1483 /* If optimizing for size, bump the number of instructions that we
1484 are prepared to conditionally execute (even on a StrongARM). */
1485 max_insns_skipped
= 6;
1489 /* For processors with load scheduling, it never costs more than
1490 2 cycles to load a constant, and the load scheduler may well
1491 reduce that to 1. */
1493 arm_constant_limit
= 1;
1495 /* On XScale the longer latency of a load makes it more difficult
1496 to achieve a good schedule, so it's faster to synthesize
1497 constants that can be done in two insns. */
1498 if (arm_tune_xscale
)
1499 arm_constant_limit
= 2;
1501 /* StrongARM has early execution of branches, so a sequence
1502 that is worth skipping is shorter. */
1503 if (arm_tune_strongarm
)
1504 max_insns_skipped
= 3;
1507 /* Register global variables with the garbage collector. */
1508 arm_add_gc_roots ();
1512 arm_add_gc_roots (void)
1514 gcc_obstack_init(&minipool_obstack
);
1515 minipool_startobj
= (char *) obstack_alloc (&minipool_obstack
, 0);
1518 /* A table of known ARM exception types.
1519 For use with the interrupt function attribute. */
1523 const char *const arg
;
1524 const unsigned long return_value
;
1528 static const isr_attribute_arg isr_attribute_args
[] =
1530 { "IRQ", ARM_FT_ISR
},
1531 { "irq", ARM_FT_ISR
},
1532 { "FIQ", ARM_FT_FIQ
},
1533 { "fiq", ARM_FT_FIQ
},
1534 { "ABORT", ARM_FT_ISR
},
1535 { "abort", ARM_FT_ISR
},
1536 { "ABORT", ARM_FT_ISR
},
1537 { "abort", ARM_FT_ISR
},
1538 { "UNDEF", ARM_FT_EXCEPTION
},
1539 { "undef", ARM_FT_EXCEPTION
},
1540 { "SWI", ARM_FT_EXCEPTION
},
1541 { "swi", ARM_FT_EXCEPTION
},
1542 { NULL
, ARM_FT_NORMAL
}
1545 /* Returns the (interrupt) function type of the current
1546 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1548 static unsigned long
1549 arm_isr_value (tree argument
)
1551 const isr_attribute_arg
* ptr
;
1555 return ARM_FT_NORMAL
| ARM_FT_STACKALIGN
;
1557 /* No argument - default to IRQ. */
1558 if (argument
== NULL_TREE
)
1561 /* Get the value of the argument. */
1562 if (TREE_VALUE (argument
) == NULL_TREE
1563 || TREE_CODE (TREE_VALUE (argument
)) != STRING_CST
)
1564 return ARM_FT_UNKNOWN
;
1566 arg
= TREE_STRING_POINTER (TREE_VALUE (argument
));
1568 /* Check it against the list of known arguments. */
1569 for (ptr
= isr_attribute_args
; ptr
->arg
!= NULL
; ptr
++)
1570 if (streq (arg
, ptr
->arg
))
1571 return ptr
->return_value
;
1573 /* An unrecognized interrupt type. */
1574 return ARM_FT_UNKNOWN
;
1577 /* Computes the type of the current function. */
1579 static unsigned long
1580 arm_compute_func_type (void)
1582 unsigned long type
= ARM_FT_UNKNOWN
;
1586 gcc_assert (TREE_CODE (current_function_decl
) == FUNCTION_DECL
);
1588 /* Decide if the current function is volatile. Such functions
1589 never return, and many memory cycles can be saved by not storing
1590 register values that will never be needed again. This optimization
1591 was added to speed up context switching in a kernel application. */
1593 && (TREE_NOTHROW (current_function_decl
)
1594 || !(flag_unwind_tables
1595 || (flag_exceptions
&& !USING_SJLJ_EXCEPTIONS
)))
1596 && TREE_THIS_VOLATILE (current_function_decl
))
1597 type
|= ARM_FT_VOLATILE
;
1599 if (cfun
->static_chain_decl
!= NULL
)
1600 type
|= ARM_FT_NESTED
;
1602 attr
= DECL_ATTRIBUTES (current_function_decl
);
1604 a
= lookup_attribute ("naked", attr
);
1606 type
|= ARM_FT_NAKED
;
1608 a
= lookup_attribute ("isr", attr
);
1610 a
= lookup_attribute ("interrupt", attr
);
1613 type
|= TARGET_INTERWORK
? ARM_FT_INTERWORKED
: ARM_FT_NORMAL
;
1615 type
|= arm_isr_value (TREE_VALUE (a
));
1620 /* Returns the type of the current function. */
1623 arm_current_func_type (void)
1625 if (ARM_FUNC_TYPE (cfun
->machine
->func_type
) == ARM_FT_UNKNOWN
)
1626 cfun
->machine
->func_type
= arm_compute_func_type ();
1628 return cfun
->machine
->func_type
;
1632 arm_allocate_stack_slots_for_args (void)
1634 /* Naked functions should not allocate stack slots for arguments. */
1635 return !IS_NAKED (arm_current_func_type ());
1639 /* Return 1 if it is possible to return using a single instruction.
1640 If SIBLING is non-null, this is a test for a return before a sibling
1641 call. SIBLING is the call insn, so we can examine its register usage. */
1644 use_return_insn (int iscond
, rtx sibling
)
1647 unsigned int func_type
;
1648 unsigned long saved_int_regs
;
1649 unsigned HOST_WIDE_INT stack_adjust
;
1650 arm_stack_offsets
*offsets
;
1652 /* Never use a return instruction before reload has run. */
1653 if (!reload_completed
)
1656 func_type
= arm_current_func_type ();
1658 /* Naked, volatile and stack alignment functions need special
1660 if (func_type
& (ARM_FT_VOLATILE
| ARM_FT_NAKED
| ARM_FT_STACKALIGN
))
1663 /* So do interrupt functions that use the frame pointer and Thumb
1664 interrupt functions. */
1665 if (IS_INTERRUPT (func_type
) && (frame_pointer_needed
|| TARGET_THUMB
))
1668 offsets
= arm_get_frame_offsets ();
1669 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
1671 /* As do variadic functions. */
1672 if (crtl
->args
.pretend_args_size
1673 || cfun
->machine
->uses_anonymous_args
1674 /* Or if the function calls __builtin_eh_return () */
1675 || crtl
->calls_eh_return
1676 /* Or if the function calls alloca */
1677 || cfun
->calls_alloca
1678 /* Or if there is a stack adjustment. However, if the stack pointer
1679 is saved on the stack, we can use a pre-incrementing stack load. */
1680 || !(stack_adjust
== 0 || (TARGET_APCS_FRAME
&& frame_pointer_needed
1681 && stack_adjust
== 4)))
1684 saved_int_regs
= offsets
->saved_regs_mask
;
1686 /* Unfortunately, the insn
1688 ldmib sp, {..., sp, ...}
1690 triggers a bug on most SA-110 based devices, such that the stack
1691 pointer won't be correctly restored if the instruction takes a
1692 page fault. We work around this problem by popping r3 along with
1693 the other registers, since that is never slower than executing
1694 another instruction.
1696 We test for !arm_arch5 here, because code for any architecture
1697 less than this could potentially be run on one of the buggy
1699 if (stack_adjust
== 4 && !arm_arch5
&& TARGET_ARM
)
1701 /* Validate that r3 is a call-clobbered register (always true in
1702 the default abi) ... */
1703 if (!call_used_regs
[3])
1706 /* ... that it isn't being used for a return value ... */
1707 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD
))
1710 /* ... or for a tail-call argument ... */
1713 gcc_assert (GET_CODE (sibling
) == CALL_INSN
);
1715 if (find_regno_fusage (sibling
, USE
, 3))
1719 /* ... and that there are no call-saved registers in r0-r2
1720 (always true in the default ABI). */
1721 if (saved_int_regs
& 0x7)
1725 /* Can't be done if interworking with Thumb, and any registers have been
1727 if (TARGET_INTERWORK
&& saved_int_regs
!= 0 && !IS_INTERRUPT(func_type
))
1730 /* On StrongARM, conditional returns are expensive if they aren't
1731 taken and multiple registers have been stacked. */
1732 if (iscond
&& arm_tune_strongarm
)
1734 /* Conditional return when just the LR is stored is a simple
1735 conditional-load instruction, that's not expensive. */
1736 if (saved_int_regs
!= 0 && saved_int_regs
!= (1 << LR_REGNUM
))
1740 && arm_pic_register
!= INVALID_REGNUM
1741 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
1745 /* If there are saved registers but the LR isn't saved, then we need
1746 two instructions for the return. */
1747 if (saved_int_regs
&& !(saved_int_regs
& (1 << LR_REGNUM
)))
1750 /* Can't be done if any of the FPA regs are pushed,
1751 since this also requires an insn. */
1752 if (TARGET_HARD_FLOAT
&& TARGET_FPA
)
1753 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
1754 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1757 /* Likewise VFP regs. */
1758 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
1759 for (regno
= FIRST_VFP_REGNUM
; regno
<= LAST_VFP_REGNUM
; regno
++)
1760 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1763 if (TARGET_REALLY_IWMMXT
)
1764 for (regno
= FIRST_IWMMXT_REGNUM
; regno
<= LAST_IWMMXT_REGNUM
; regno
++)
1765 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
1771 /* Return TRUE if int I is a valid immediate ARM constant. */
1774 const_ok_for_arm (HOST_WIDE_INT i
)
1778 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1779 be all zero, or all one. */
1780 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff) != 0
1781 && ((i
& ~(unsigned HOST_WIDE_INT
) 0xffffffff)
1782 != ((~(unsigned HOST_WIDE_INT
) 0)
1783 & ~(unsigned HOST_WIDE_INT
) 0xffffffff)))
1786 i
&= (unsigned HOST_WIDE_INT
) 0xffffffff;
1788 /* Fast return for 0 and small values. We must do this for zero, since
1789 the code below can't handle that one case. */
1790 if ((i
& ~(unsigned HOST_WIDE_INT
) 0xff) == 0)
1793 /* Get the number of trailing zeros. */
1794 lowbit
= ffs((int) i
) - 1;
1796 /* Only even shifts are allowed in ARM mode so round down to the
1797 nearest even number. */
1801 if ((i
& ~(((unsigned HOST_WIDE_INT
) 0xff) << lowbit
)) == 0)
1806 /* Allow rotated constants in ARM mode. */
1808 && ((i
& ~0xc000003f) == 0
1809 || (i
& ~0xf000000f) == 0
1810 || (i
& ~0xfc000003) == 0))
1817 /* Allow repeated pattern. */
1820 if (i
== v
|| i
== (v
| (v
<< 8)))
1827 /* Return true if I is a valid constant for the operation CODE. */
1829 const_ok_for_op (HOST_WIDE_INT i
, enum rtx_code code
)
1831 if (const_ok_for_arm (i
))
1837 return const_ok_for_arm (ARM_SIGN_EXTEND (-i
));
1839 case MINUS
: /* Should only occur with (MINUS I reg) => rsb */
1845 return const_ok_for_arm (ARM_SIGN_EXTEND (~i
));
1852 /* Emit a sequence of insns to handle a large constant.
1853 CODE is the code of the operation required, it can be any of SET, PLUS,
1854 IOR, AND, XOR, MINUS;
1855 MODE is the mode in which the operation is being performed;
1856 VAL is the integer to operate on;
1857 SOURCE is the other operand (a register, or a null-pointer for SET);
1858 SUBTARGETS means it is safe to create scratch registers if that will
1859 either produce a simpler sequence, or we will want to cse the values.
1860 Return value is the number of insns emitted. */
1862 /* ??? Tweak this for thumb2. */
1864 arm_split_constant (enum rtx_code code
, enum machine_mode mode
, rtx insn
,
1865 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
)
1869 if (insn
&& GET_CODE (PATTERN (insn
)) == COND_EXEC
)
1870 cond
= COND_EXEC_TEST (PATTERN (insn
));
1874 if (subtargets
|| code
== SET
1875 || (GET_CODE (target
) == REG
&& GET_CODE (source
) == REG
1876 && REGNO (target
) != REGNO (source
)))
1878 /* After arm_reorg has been called, we can't fix up expensive
1879 constants by pushing them into memory so we must synthesize
1880 them in-line, regardless of the cost. This is only likely to
1881 be more costly on chips that have load delay slots and we are
1882 compiling without running the scheduler (so no splitting
1883 occurred before the final instruction emission).
1885 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1887 if (!after_arm_reorg
1889 && (arm_gen_constant (code
, mode
, NULL_RTX
, val
, target
, source
,
1891 > arm_constant_limit
+ (code
!= SET
)))
1895 /* Currently SET is the only monadic value for CODE, all
1896 the rest are diadic. */
1897 emit_set_insn (target
, GEN_INT (val
));
1902 rtx temp
= subtargets
? gen_reg_rtx (mode
) : target
;
1904 emit_set_insn (temp
, GEN_INT (val
));
1905 /* For MINUS, the value is subtracted from, since we never
1906 have subtraction of a constant. */
1908 emit_set_insn (target
, gen_rtx_MINUS (mode
, temp
, source
));
1910 emit_set_insn (target
,
1911 gen_rtx_fmt_ee (code
, mode
, source
, temp
));
1917 return arm_gen_constant (code
, mode
, cond
, val
, target
, source
, subtargets
,
1921 /* Return the number of ARM instructions required to synthesize the given
1924 count_insns_for_constant (HOST_WIDE_INT remainder
, int i
)
1926 HOST_WIDE_INT temp1
;
1934 if (remainder
& (3 << (i
- 2)))
1939 temp1
= remainder
& ((0x0ff << end
)
1940 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
1941 remainder
&= ~temp1
;
1946 } while (remainder
);
1950 /* Emit an instruction with the indicated PATTERN. If COND is
1951 non-NULL, conditionalize the execution of the instruction on COND
1955 emit_constant_insn (rtx cond
, rtx pattern
)
1958 pattern
= gen_rtx_COND_EXEC (VOIDmode
, copy_rtx (cond
), pattern
);
1959 emit_insn (pattern
);
1962 /* As above, but extra parameter GENERATE which, if clear, suppresses
1964 /* ??? This needs more work for thumb2. */
1967 arm_gen_constant (enum rtx_code code
, enum machine_mode mode
, rtx cond
,
1968 HOST_WIDE_INT val
, rtx target
, rtx source
, int subtargets
,
1973 int can_negate_initial
= 0;
1976 int num_bits_set
= 0;
1977 int set_sign_bit_copies
= 0;
1978 int clear_sign_bit_copies
= 0;
1979 int clear_zero_bit_copies
= 0;
1980 int set_zero_bit_copies
= 0;
1982 unsigned HOST_WIDE_INT temp1
, temp2
;
1983 unsigned HOST_WIDE_INT remainder
= val
& 0xffffffff;
1985 /* Find out which operations are safe for a given CODE. Also do a quick
1986 check for degenerate cases; these can occur when DImode operations
1998 can_negate_initial
= 1;
2002 if (remainder
== 0xffffffff)
2005 emit_constant_insn (cond
,
2006 gen_rtx_SET (VOIDmode
, target
,
2007 GEN_INT (ARM_SIGN_EXTEND (val
))));
2012 if (reload_completed
&& rtx_equal_p (target
, source
))
2015 emit_constant_insn (cond
,
2016 gen_rtx_SET (VOIDmode
, target
, source
));
2025 emit_constant_insn (cond
,
2026 gen_rtx_SET (VOIDmode
, target
, const0_rtx
));
2029 if (remainder
== 0xffffffff)
2031 if (reload_completed
&& rtx_equal_p (target
, source
))
2034 emit_constant_insn (cond
,
2035 gen_rtx_SET (VOIDmode
, target
, source
));
2044 if (reload_completed
&& rtx_equal_p (target
, source
))
2047 emit_constant_insn (cond
,
2048 gen_rtx_SET (VOIDmode
, target
, source
));
2052 /* We don't know how to handle other cases yet. */
2053 gcc_assert (remainder
== 0xffffffff);
2056 emit_constant_insn (cond
,
2057 gen_rtx_SET (VOIDmode
, target
,
2058 gen_rtx_NOT (mode
, source
)));
2062 /* We treat MINUS as (val - source), since (source - val) is always
2063 passed as (source + (-val)). */
2067 emit_constant_insn (cond
,
2068 gen_rtx_SET (VOIDmode
, target
,
2069 gen_rtx_NEG (mode
, source
)));
2072 if (const_ok_for_arm (val
))
2075 emit_constant_insn (cond
,
2076 gen_rtx_SET (VOIDmode
, target
,
2077 gen_rtx_MINUS (mode
, GEN_INT (val
),
2089 /* If we can do it in one insn get out quickly. */
2090 if (const_ok_for_arm (val
)
2091 || (can_negate_initial
&& const_ok_for_arm (-val
))
2092 || (can_invert
&& const_ok_for_arm (~val
)))
2095 emit_constant_insn (cond
,
2096 gen_rtx_SET (VOIDmode
, target
,
2098 ? gen_rtx_fmt_ee (code
, mode
, source
,
2104 /* Calculate a few attributes that may be useful for specific
2106 for (i
= 31; i
>= 0; i
--)
2108 if ((remainder
& (1 << i
)) == 0)
2109 clear_sign_bit_copies
++;
2114 for (i
= 31; i
>= 0; i
--)
2116 if ((remainder
& (1 << i
)) != 0)
2117 set_sign_bit_copies
++;
2122 for (i
= 0; i
<= 31; i
++)
2124 if ((remainder
& (1 << i
)) == 0)
2125 clear_zero_bit_copies
++;
2130 for (i
= 0; i
<= 31; i
++)
2132 if ((remainder
& (1 << i
)) != 0)
2133 set_zero_bit_copies
++;
2141 /* See if we can use movw. */
2142 if (arm_arch_thumb2
&& (remainder
& 0xffff0000) == 0)
2145 emit_constant_insn (cond
, gen_rtx_SET (VOIDmode
, target
,
2150 /* See if we can do this by sign_extending a constant that is known
2151 to be negative. This is a good, way of doing it, since the shift
2152 may well merge into a subsequent insn. */
2153 if (set_sign_bit_copies
> 1)
2155 if (const_ok_for_arm
2156 (temp1
= ARM_SIGN_EXTEND (remainder
2157 << (set_sign_bit_copies
- 1))))
2161 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2162 emit_constant_insn (cond
,
2163 gen_rtx_SET (VOIDmode
, new_src
,
2165 emit_constant_insn (cond
,
2166 gen_ashrsi3 (target
, new_src
,
2167 GEN_INT (set_sign_bit_copies
- 1)));
2171 /* For an inverted constant, we will need to set the low bits,
2172 these will be shifted out of harm's way. */
2173 temp1
|= (1 << (set_sign_bit_copies
- 1)) - 1;
2174 if (const_ok_for_arm (~temp1
))
2178 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2179 emit_constant_insn (cond
,
2180 gen_rtx_SET (VOIDmode
, new_src
,
2182 emit_constant_insn (cond
,
2183 gen_ashrsi3 (target
, new_src
,
2184 GEN_INT (set_sign_bit_copies
- 1)));
2190 /* See if we can calculate the value as the difference between two
2191 valid immediates. */
2192 if (clear_sign_bit_copies
+ clear_zero_bit_copies
<= 16)
2194 int topshift
= clear_sign_bit_copies
& ~1;
2196 temp1
= ARM_SIGN_EXTEND ((remainder
+ (0x00800000 >> topshift
))
2197 & (0xff000000 >> topshift
));
2199 /* If temp1 is zero, then that means the 9 most significant
2200 bits of remainder were 1 and we've caused it to overflow.
2201 When topshift is 0 we don't need to do anything since we
2202 can borrow from 'bit 32'. */
2203 if (temp1
== 0 && topshift
!= 0)
2204 temp1
= 0x80000000 >> (topshift
- 1);
2206 temp2
= ARM_SIGN_EXTEND (temp1
- remainder
);
2208 if (const_ok_for_arm (temp2
))
2212 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2213 emit_constant_insn (cond
,
2214 gen_rtx_SET (VOIDmode
, new_src
,
2216 emit_constant_insn (cond
,
2217 gen_addsi3 (target
, new_src
,
2225 /* See if we can generate this by setting the bottom (or the top)
2226 16 bits, and then shifting these into the other half of the
2227 word. We only look for the simplest cases, to do more would cost
2228 too much. Be careful, however, not to generate this when the
2229 alternative would take fewer insns. */
2230 if (val
& 0xffff0000)
2232 temp1
= remainder
& 0xffff0000;
2233 temp2
= remainder
& 0x0000ffff;
2235 /* Overlaps outside this range are best done using other methods. */
2236 for (i
= 9; i
< 24; i
++)
2238 if ((((temp2
| (temp2
<< i
)) & 0xffffffff) == remainder
)
2239 && !const_ok_for_arm (temp2
))
2241 rtx new_src
= (subtargets
2242 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2244 insns
= arm_gen_constant (code
, mode
, cond
, temp2
, new_src
,
2245 source
, subtargets
, generate
);
2253 gen_rtx_ASHIFT (mode
, source
,
2260 /* Don't duplicate cases already considered. */
2261 for (i
= 17; i
< 24; i
++)
2263 if (((temp1
| (temp1
>> i
)) == remainder
)
2264 && !const_ok_for_arm (temp1
))
2266 rtx new_src
= (subtargets
2267 ? (generate
? gen_reg_rtx (mode
) : NULL_RTX
)
2269 insns
= arm_gen_constant (code
, mode
, cond
, temp1
, new_src
,
2270 source
, subtargets
, generate
);
2275 gen_rtx_SET (VOIDmode
, target
,
2278 gen_rtx_LSHIFTRT (mode
, source
,
2289 /* If we have IOR or XOR, and the constant can be loaded in a
2290 single instruction, and we can find a temporary to put it in,
2291 then this can be done in two instructions instead of 3-4. */
2293 /* TARGET can't be NULL if SUBTARGETS is 0 */
2294 || (reload_completed
&& !reg_mentioned_p (target
, source
)))
2296 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val
)))
2300 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2302 emit_constant_insn (cond
,
2303 gen_rtx_SET (VOIDmode
, sub
,
2305 emit_constant_insn (cond
,
2306 gen_rtx_SET (VOIDmode
, target
,
2307 gen_rtx_fmt_ee (code
, mode
,
2317 if (set_sign_bit_copies
> 8
2318 && (val
& (-1 << (32 - set_sign_bit_copies
))) == val
)
2322 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2323 rtx shift
= GEN_INT (set_sign_bit_copies
);
2327 gen_rtx_SET (VOIDmode
, sub
,
2329 gen_rtx_ASHIFT (mode
,
2334 gen_rtx_SET (VOIDmode
, target
,
2336 gen_rtx_LSHIFTRT (mode
, sub
,
2342 if (set_zero_bit_copies
> 8
2343 && (remainder
& ((1 << set_zero_bit_copies
) - 1)) == remainder
)
2347 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2348 rtx shift
= GEN_INT (set_zero_bit_copies
);
2352 gen_rtx_SET (VOIDmode
, sub
,
2354 gen_rtx_LSHIFTRT (mode
,
2359 gen_rtx_SET (VOIDmode
, target
,
2361 gen_rtx_ASHIFT (mode
, sub
,
2367 if (const_ok_for_arm (temp1
= ARM_SIGN_EXTEND (~val
)))
2371 rtx sub
= subtargets
? gen_reg_rtx (mode
) : target
;
2372 emit_constant_insn (cond
,
2373 gen_rtx_SET (VOIDmode
, sub
,
2374 gen_rtx_NOT (mode
, source
)));
2377 sub
= gen_reg_rtx (mode
);
2378 emit_constant_insn (cond
,
2379 gen_rtx_SET (VOIDmode
, sub
,
2380 gen_rtx_AND (mode
, source
,
2382 emit_constant_insn (cond
,
2383 gen_rtx_SET (VOIDmode
, target
,
2384 gen_rtx_NOT (mode
, sub
)));
2391 /* See if two shifts will do 2 or more insn's worth of work. */
2392 if (clear_sign_bit_copies
>= 16 && clear_sign_bit_copies
< 24)
2394 HOST_WIDE_INT shift_mask
= ((0xffffffff
2395 << (32 - clear_sign_bit_copies
))
2398 if ((remainder
| shift_mask
) != 0xffffffff)
2402 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2403 insns
= arm_gen_constant (AND
, mode
, cond
,
2404 remainder
| shift_mask
,
2405 new_src
, source
, subtargets
, 1);
2410 rtx targ
= subtargets
? NULL_RTX
: target
;
2411 insns
= arm_gen_constant (AND
, mode
, cond
,
2412 remainder
| shift_mask
,
2413 targ
, source
, subtargets
, 0);
2419 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2420 rtx shift
= GEN_INT (clear_sign_bit_copies
);
2422 emit_insn (gen_ashlsi3 (new_src
, source
, shift
));
2423 emit_insn (gen_lshrsi3 (target
, new_src
, shift
));
2429 if (clear_zero_bit_copies
>= 16 && clear_zero_bit_copies
< 24)
2431 HOST_WIDE_INT shift_mask
= (1 << clear_zero_bit_copies
) - 1;
2433 if ((remainder
| shift_mask
) != 0xffffffff)
2437 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2439 insns
= arm_gen_constant (AND
, mode
, cond
,
2440 remainder
| shift_mask
,
2441 new_src
, source
, subtargets
, 1);
2446 rtx targ
= subtargets
? NULL_RTX
: target
;
2448 insns
= arm_gen_constant (AND
, mode
, cond
,
2449 remainder
| shift_mask
,
2450 targ
, source
, subtargets
, 0);
2456 rtx new_src
= subtargets
? gen_reg_rtx (mode
) : target
;
2457 rtx shift
= GEN_INT (clear_zero_bit_copies
);
2459 emit_insn (gen_lshrsi3 (new_src
, source
, shift
));
2460 emit_insn (gen_ashlsi3 (target
, new_src
, shift
));
2472 for (i
= 0; i
< 32; i
++)
2473 if (remainder
& (1 << i
))
2476 if (code
== AND
|| (can_invert
&& num_bits_set
> 16))
2477 remainder
= (~remainder
) & 0xffffffff;
2478 else if (code
== PLUS
&& num_bits_set
> 16)
2479 remainder
= (-remainder
) & 0xffffffff;
2486 /* Now try and find a way of doing the job in either two or three
2488 We start by looking for the largest block of zeros that are aligned on
2489 a 2-bit boundary, we then fill up the temps, wrapping around to the
2490 top of the word when we drop off the bottom.
2491 In the worst case this code should produce no more than four insns.
2492 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2493 best place to start. */
2495 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2501 int best_consecutive_zeros
= 0;
2503 for (i
= 0; i
< 32; i
+= 2)
2505 int consecutive_zeros
= 0;
2507 if (!(remainder
& (3 << i
)))
2509 while ((i
< 32) && !(remainder
& (3 << i
)))
2511 consecutive_zeros
+= 2;
2514 if (consecutive_zeros
> best_consecutive_zeros
)
2516 best_consecutive_zeros
= consecutive_zeros
;
2517 best_start
= i
- consecutive_zeros
;
2523 /* So long as it won't require any more insns to do so, it's
2524 desirable to emit a small constant (in bits 0...9) in the last
2525 insn. This way there is more chance that it can be combined with
2526 a later addressing insn to form a pre-indexed load or store
2527 operation. Consider:
2529 *((volatile int *)0xe0000100) = 1;
2530 *((volatile int *)0xe0000110) = 2;
2532 We want this to wind up as:
2536 str rB, [rA, #0x100]
2538 str rB, [rA, #0x110]
2540 rather than having to synthesize both large constants from scratch.
2542 Therefore, we calculate how many insns would be required to emit
2543 the constant starting from `best_start', and also starting from
2544 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2545 yield a shorter sequence, we may as well use zero. */
2547 && ((((unsigned HOST_WIDE_INT
) 1) << best_start
) < remainder
)
2548 && (count_insns_for_constant (remainder
, 0) <=
2549 count_insns_for_constant (remainder
, best_start
)))
2553 /* Now start emitting the insns. */
2561 if (remainder
& (3 << (i
- 2)))
2566 temp1
= remainder
& ((0x0ff << end
)
2567 | ((i
< end
) ? (0xff >> (32 - end
)) : 0));
2568 remainder
&= ~temp1
;
2572 rtx new_src
, temp1_rtx
;
2574 if (code
== SET
|| code
== MINUS
)
2576 new_src
= (subtargets
? gen_reg_rtx (mode
) : target
);
2577 if (can_invert
&& code
!= MINUS
)
2582 if (remainder
&& subtargets
)
2583 new_src
= gen_reg_rtx (mode
);
2588 else if (can_negate
)
2592 temp1
= trunc_int_for_mode (temp1
, mode
);
2593 temp1_rtx
= GEN_INT (temp1
);
2597 else if (code
== MINUS
)
2598 temp1_rtx
= gen_rtx_MINUS (mode
, temp1_rtx
, source
);
2600 temp1_rtx
= gen_rtx_fmt_ee (code
, mode
, source
, temp1_rtx
);
2602 emit_constant_insn (cond
,
2603 gen_rtx_SET (VOIDmode
, new_src
,
2613 else if (code
== MINUS
)
2622 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2635 /* Canonicalize a comparison so that we are more likely to recognize it.
2636 This can be done for a few constant compares, where we can make the
2637 immediate value easier to load. */
2640 arm_canonicalize_comparison (enum rtx_code code
, enum machine_mode mode
,
2643 unsigned HOST_WIDE_INT i
= INTVAL (*op1
);
2644 unsigned HOST_WIDE_INT maxval
;
2645 maxval
= (((unsigned HOST_WIDE_INT
) 1) << (GET_MODE_BITSIZE(mode
) - 1)) - 1;
2656 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
2658 *op1
= GEN_INT (i
+ 1);
2659 return code
== GT
? GE
: LT
;
2666 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
2668 *op1
= GEN_INT (i
- 1);
2669 return code
== GE
? GT
: LE
;
2675 if (i
!= ~((unsigned HOST_WIDE_INT
) 0)
2676 && (const_ok_for_arm (i
+ 1) || const_ok_for_arm (-(i
+ 1))))
2678 *op1
= GEN_INT (i
+ 1);
2679 return code
== GTU
? GEU
: LTU
;
2686 && (const_ok_for_arm (i
- 1) || const_ok_for_arm (-(i
- 1))))
2688 *op1
= GEN_INT (i
- 1);
2689 return code
== GEU
? GTU
: LEU
;
2701 /* Define how to find the value returned by a function. */
2704 arm_function_value(const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
2706 enum machine_mode mode
;
2707 int unsignedp ATTRIBUTE_UNUSED
;
2708 rtx r ATTRIBUTE_UNUSED
;
2710 mode
= TYPE_MODE (type
);
2711 /* Promote integer types. */
2712 if (INTEGRAL_TYPE_P (type
))
2713 PROMOTE_FUNCTION_MODE (mode
, unsignedp
, type
);
2715 /* Promotes small structs returned in a register to full-word size
2716 for big-endian AAPCS. */
2717 if (arm_return_in_msb (type
))
2719 HOST_WIDE_INT size
= int_size_in_bytes (type
);
2720 if (size
% UNITS_PER_WORD
!= 0)
2722 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
2723 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
2727 return LIBCALL_VALUE(mode
);
2730 /* Determine the amount of memory needed to store the possible return
2731 registers of an untyped call. */
2733 arm_apply_result_size (void)
2739 if (TARGET_HARD_FLOAT_ABI
)
2743 if (TARGET_MAVERICK
)
2746 if (TARGET_IWMMXT_ABI
)
2753 /* Decide whether a type should be returned in memory (true)
2754 or in a register (false). This is called as the target hook
2755 TARGET_RETURN_IN_MEMORY. */
2757 arm_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
2761 size
= int_size_in_bytes (type
);
2763 /* Vector values should be returned using ARM registers, not memory (unless
2764 they're over 16 bytes, which will break since we only have four
2765 call-clobbered registers to play with). */
2766 if (TREE_CODE (type
) == VECTOR_TYPE
)
2767 return (size
< 0 || size
> (4 * UNITS_PER_WORD
));
2769 if (!AGGREGATE_TYPE_P (type
) &&
2770 !(TARGET_AAPCS_BASED
&& TREE_CODE (type
) == COMPLEX_TYPE
))
2771 /* All simple types are returned in registers.
2772 For AAPCS, complex types are treated the same as aggregates. */
2775 if (arm_abi
!= ARM_ABI_APCS
)
2777 /* ATPCS and later return aggregate types in memory only if they are
2778 larger than a word (or are variable size). */
2779 return (size
< 0 || size
> UNITS_PER_WORD
);
2782 /* For the arm-wince targets we choose to be compatible with Microsoft's
2783 ARM and Thumb compilers, which always return aggregates in memory. */
2785 /* All structures/unions bigger than one word are returned in memory.
2786 Also catch the case where int_size_in_bytes returns -1. In this case
2787 the aggregate is either huge or of variable size, and in either case
2788 we will want to return it via memory and not in a register. */
2789 if (size
< 0 || size
> UNITS_PER_WORD
)
2792 if (TREE_CODE (type
) == RECORD_TYPE
)
2796 /* For a struct the APCS says that we only return in a register
2797 if the type is 'integer like' and every addressable element
2798 has an offset of zero. For practical purposes this means
2799 that the structure can have at most one non bit-field element
2800 and that this element must be the first one in the structure. */
2802 /* Find the first field, ignoring non FIELD_DECL things which will
2803 have been created by C++. */
2804 for (field
= TYPE_FIELDS (type
);
2805 field
&& TREE_CODE (field
) != FIELD_DECL
;
2806 field
= TREE_CHAIN (field
))
2810 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2812 /* Check that the first field is valid for returning in a register. */
2814 /* ... Floats are not allowed */
2815 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
2818 /* ... Aggregates that are not themselves valid for returning in
2819 a register are not allowed. */
2820 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
2823 /* Now check the remaining fields, if any. Only bitfields are allowed,
2824 since they are not addressable. */
2825 for (field
= TREE_CHAIN (field
);
2827 field
= TREE_CHAIN (field
))
2829 if (TREE_CODE (field
) != FIELD_DECL
)
2832 if (!DECL_BIT_FIELD_TYPE (field
))
2839 if (TREE_CODE (type
) == UNION_TYPE
)
2843 /* Unions can be returned in registers if every element is
2844 integral, or can be returned in an integer register. */
2845 for (field
= TYPE_FIELDS (type
);
2847 field
= TREE_CHAIN (field
))
2849 if (TREE_CODE (field
) != FIELD_DECL
)
2852 if (FLOAT_TYPE_P (TREE_TYPE (field
)))
2855 if (arm_return_in_memory (TREE_TYPE (field
), NULL_TREE
))
2861 #endif /* not ARM_WINCE */
2863 /* Return all other types in memory. */
2867 /* Indicate whether or not words of a double are in big-endian order. */
2870 arm_float_words_big_endian (void)
2872 if (TARGET_MAVERICK
)
2875 /* For FPA, float words are always big-endian. For VFP, floats words
2876 follow the memory system mode. */
2884 return (TARGET_BIG_END
? 1 : 0);
2889 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2890 for a call to a function whose data type is FNTYPE.
2891 For a library call, FNTYPE is NULL. */
2893 arm_init_cumulative_args (CUMULATIVE_ARGS
*pcum
, tree fntype
,
2894 rtx libname ATTRIBUTE_UNUSED
,
2895 tree fndecl ATTRIBUTE_UNUSED
)
2897 /* On the ARM, the offset starts at 0. */
2899 pcum
->iwmmxt_nregs
= 0;
2900 pcum
->can_split
= true;
2902 /* Varargs vectors are treated the same as long long.
2903 named_count avoids having to change the way arm handles 'named' */
2904 pcum
->named_count
= 0;
2907 if (TARGET_REALLY_IWMMXT
&& fntype
)
2911 for (fn_arg
= TYPE_ARG_TYPES (fntype
);
2913 fn_arg
= TREE_CHAIN (fn_arg
))
2914 pcum
->named_count
+= 1;
2916 if (! pcum
->named_count
)
2917 pcum
->named_count
= INT_MAX
;
2922 /* Return true if mode/type need doubleword alignment. */
2924 arm_needs_doubleword_align (enum machine_mode mode
, tree type
)
2926 return (GET_MODE_ALIGNMENT (mode
) > PARM_BOUNDARY
2927 || (type
&& TYPE_ALIGN (type
) > PARM_BOUNDARY
));
2931 /* Determine where to put an argument to a function.
2932 Value is zero to push the argument on the stack,
2933 or a hard register in which to store the argument.
2935 MODE is the argument's machine mode.
2936 TYPE is the data type of the argument (as a tree).
2937 This is null for libcalls where that information may
2939 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2940 the preceding args and about the function being called.
2941 NAMED is nonzero if this argument is a named parameter
2942 (otherwise it is an extra parameter matching an ellipsis). */
2945 arm_function_arg (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
2946 tree type
, int named
)
2950 /* Varargs vectors are treated the same as long long.
2951 named_count avoids having to change the way arm handles 'named' */
2952 if (TARGET_IWMMXT_ABI
2953 && arm_vector_mode_supported_p (mode
)
2954 && pcum
->named_count
> pcum
->nargs
+ 1)
2956 if (pcum
->iwmmxt_nregs
<= 9)
2957 return gen_rtx_REG (mode
, pcum
->iwmmxt_nregs
+ FIRST_IWMMXT_REGNUM
);
2960 pcum
->can_split
= false;
2965 /* Put doubleword aligned quantities in even register pairs. */
2967 && ARM_DOUBLEWORD_ALIGN
2968 && arm_needs_doubleword_align (mode
, type
))
2971 if (mode
== VOIDmode
)
2972 /* Pick an arbitrary value for operand 2 of the call insn. */
2975 /* Only allow splitting an arg between regs and memory if all preceding
2976 args were allocated to regs. For args passed by reference we only count
2977 the reference pointer. */
2978 if (pcum
->can_split
)
2981 nregs
= ARM_NUM_REGS2 (mode
, type
);
2983 if (!named
|| pcum
->nregs
+ nregs
> NUM_ARG_REGS
)
2986 return gen_rtx_REG (mode
, pcum
->nregs
);
2990 arm_arg_partial_bytes (CUMULATIVE_ARGS
*pcum
, enum machine_mode mode
,
2991 tree type
, bool named ATTRIBUTE_UNUSED
)
2993 int nregs
= pcum
->nregs
;
2995 if (TARGET_IWMMXT_ABI
&& arm_vector_mode_supported_p (mode
))
2998 if (NUM_ARG_REGS
> nregs
2999 && (NUM_ARG_REGS
< nregs
+ ARM_NUM_REGS2 (mode
, type
))
3001 return (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
3006 /* Variable sized types are passed by reference. This is a GCC
3007 extension to the ARM ABI. */
3010 arm_pass_by_reference (CUMULATIVE_ARGS
*cum ATTRIBUTE_UNUSED
,
3011 enum machine_mode mode ATTRIBUTE_UNUSED
,
3012 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3014 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3017 /* Encode the current state of the #pragma [no_]long_calls. */
3020 OFF
, /* No #pragma [no_]long_calls is in effect. */
3021 LONG
, /* #pragma long_calls is in effect. */
3022 SHORT
/* #pragma no_long_calls is in effect. */
3025 static arm_pragma_enum arm_pragma_long_calls
= OFF
;
3028 arm_pr_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
3030 arm_pragma_long_calls
= LONG
;
3034 arm_pr_no_long_calls (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
3036 arm_pragma_long_calls
= SHORT
;
3040 arm_pr_long_calls_off (struct cpp_reader
* pfile ATTRIBUTE_UNUSED
)
3042 arm_pragma_long_calls
= OFF
;
3045 /* Table of machine attributes. */
3046 const struct attribute_spec arm_attribute_table
[] =
3048 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3049 /* Function calls made to this symbol must be done indirectly, because
3050 it may lie outside of the 26 bit addressing range of a normal function
3052 { "long_call", 0, 0, false, true, true, NULL
},
3053 /* Whereas these functions are always known to reside within the 26 bit
3054 addressing range. */
3055 { "short_call", 0, 0, false, true, true, NULL
},
3056 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3057 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute
},
3058 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute
},
3059 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute
},
3061 /* ARM/PE has three new attributes:
3063 dllexport - for exporting a function/variable that will live in a dll
3064 dllimport - for importing a function/variable from a dll
3066 Microsoft allows multiple declspecs in one __declspec, separating
3067 them with spaces. We do NOT support this. Instead, use __declspec
3070 { "dllimport", 0, 0, true, false, false, NULL
},
3071 { "dllexport", 0, 0, true, false, false, NULL
},
3072 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute
},
3073 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3074 { "dllimport", 0, 0, false, false, false, handle_dll_attribute
},
3075 { "dllexport", 0, 0, false, false, false, handle_dll_attribute
},
3076 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute
},
3078 { NULL
, 0, 0, false, false, false, NULL
}
3081 /* Handle an attribute requiring a FUNCTION_DECL;
3082 arguments as in struct attribute_spec.handler. */
3084 arm_handle_fndecl_attribute (tree
*node
, tree name
, tree args ATTRIBUTE_UNUSED
,
3085 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
3087 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3089 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
3090 IDENTIFIER_POINTER (name
));
3091 *no_add_attrs
= true;
3097 /* Handle an "interrupt" or "isr" attribute;
3098 arguments as in struct attribute_spec.handler. */
3100 arm_handle_isr_attribute (tree
*node
, tree name
, tree args
, int flags
,
3105 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3107 warning (OPT_Wattributes
, "%qs attribute only applies to functions",
3108 IDENTIFIER_POINTER (name
));
3109 *no_add_attrs
= true;
3111 /* FIXME: the argument if any is checked for type attributes;
3112 should it be checked for decl ones? */
3116 if (TREE_CODE (*node
) == FUNCTION_TYPE
3117 || TREE_CODE (*node
) == METHOD_TYPE
)
3119 if (arm_isr_value (args
) == ARM_FT_UNKNOWN
)
3121 warning (OPT_Wattributes
, "%qs attribute ignored",
3122 IDENTIFIER_POINTER (name
));
3123 *no_add_attrs
= true;
3126 else if (TREE_CODE (*node
) == POINTER_TYPE
3127 && (TREE_CODE (TREE_TYPE (*node
)) == FUNCTION_TYPE
3128 || TREE_CODE (TREE_TYPE (*node
)) == METHOD_TYPE
)
3129 && arm_isr_value (args
) != ARM_FT_UNKNOWN
)
3131 *node
= build_variant_type_copy (*node
);
3132 TREE_TYPE (*node
) = build_type_attribute_variant
3134 tree_cons (name
, args
, TYPE_ATTRIBUTES (TREE_TYPE (*node
))));
3135 *no_add_attrs
= true;
3139 /* Possibly pass this attribute on from the type to a decl. */
3140 if (flags
& ((int) ATTR_FLAG_DECL_NEXT
3141 | (int) ATTR_FLAG_FUNCTION_NEXT
3142 | (int) ATTR_FLAG_ARRAY_NEXT
))
3144 *no_add_attrs
= true;
3145 return tree_cons (name
, args
, NULL_TREE
);
3149 warning (OPT_Wattributes
, "%qs attribute ignored",
3150 IDENTIFIER_POINTER (name
));
3158 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3159 /* Handle the "notshared" attribute. This attribute is another way of
3160 requesting hidden visibility. ARM's compiler supports
3161 "__declspec(notshared)"; we support the same thing via an
3165 arm_handle_notshared_attribute (tree
*node
,
3166 tree name ATTRIBUTE_UNUSED
,
3167 tree args ATTRIBUTE_UNUSED
,
3168 int flags ATTRIBUTE_UNUSED
,
3171 tree decl
= TYPE_NAME (*node
);
3175 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
3176 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
3177 *no_add_attrs
= false;
3183 /* Return 0 if the attributes for two types are incompatible, 1 if they
3184 are compatible, and 2 if they are nearly compatible (which causes a
3185 warning to be generated). */
3187 arm_comp_type_attributes (const_tree type1
, const_tree type2
)
3191 /* Check for mismatch of non-default calling convention. */
3192 if (TREE_CODE (type1
) != FUNCTION_TYPE
)
3195 /* Check for mismatched call attributes. */
3196 l1
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
3197 l2
= lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
3198 s1
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1
)) != NULL
;
3199 s2
= lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2
)) != NULL
;
3201 /* Only bother to check if an attribute is defined. */
3202 if (l1
| l2
| s1
| s2
)
3204 /* If one type has an attribute, the other must have the same attribute. */
3205 if ((l1
!= l2
) || (s1
!= s2
))
3208 /* Disallow mixed attributes. */
3209 if ((l1
& s2
) || (l2
& s1
))
3213 /* Check for mismatched ISR attribute. */
3214 l1
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type1
)) != NULL
;
3216 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1
)) != NULL
;
3217 l2
= lookup_attribute ("isr", TYPE_ATTRIBUTES (type2
)) != NULL
;
3219 l1
= lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2
)) != NULL
;
3226 /* Assigns default attributes to newly defined type. This is used to
3227 set short_call/long_call attributes for function types of
3228 functions defined inside corresponding #pragma scopes. */
3230 arm_set_default_type_attributes (tree type
)
3232 /* Add __attribute__ ((long_call)) to all functions, when
3233 inside #pragma long_calls or __attribute__ ((short_call)),
3234 when inside #pragma no_long_calls. */
3235 if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
3237 tree type_attr_list
, attr_name
;
3238 type_attr_list
= TYPE_ATTRIBUTES (type
);
3240 if (arm_pragma_long_calls
== LONG
)
3241 attr_name
= get_identifier ("long_call");
3242 else if (arm_pragma_long_calls
== SHORT
)
3243 attr_name
= get_identifier ("short_call");
3247 type_attr_list
= tree_cons (attr_name
, NULL_TREE
, type_attr_list
);
3248 TYPE_ATTRIBUTES (type
) = type_attr_list
;
3252 /* Return true if DECL is known to be linked into section SECTION. */
3255 arm_function_in_section_p (tree decl
, section
*section
)
3257 /* We can only be certain about functions defined in the same
3258 compilation unit. */
3259 if (!TREE_STATIC (decl
))
3262 /* Make sure that SYMBOL always binds to the definition in this
3263 compilation unit. */
3264 if (!targetm
.binds_local_p (decl
))
3267 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3268 if (!DECL_SECTION_NAME (decl
))
3270 /* Only cater for unit-at-a-time mode, where we know that the user
3271 cannot later specify a section for DECL. */
3272 if (!flag_unit_at_a_time
)
3275 /* Make sure that we will not create a unique section for DECL. */
3276 if (flag_function_sections
|| DECL_ONE_ONLY (decl
))
3280 return function_section (decl
) == section
;
3283 /* Return nonzero if a 32-bit "long_call" should be generated for
3284 a call from the current function to DECL. We generate a long_call
3287 a. has an __attribute__((long call))
3288 or b. is within the scope of a #pragma long_calls
3289 or c. the -mlong-calls command line switch has been specified
3291 However we do not generate a long call if the function:
3293 d. has an __attribute__ ((short_call))
3294 or e. is inside the scope of a #pragma no_long_calls
3295 or f. is defined in the same section as the current function. */
3298 arm_is_long_call_p (tree decl
)
3303 return TARGET_LONG_CALLS
;
3305 attrs
= TYPE_ATTRIBUTES (TREE_TYPE (decl
));
3306 if (lookup_attribute ("short_call", attrs
))
3309 /* For "f", be conservative, and only cater for cases in which the
3310 whole of the current function is placed in the same section. */
3311 if (!flag_reorder_blocks_and_partition
3312 && arm_function_in_section_p (decl
, current_function_section ()))
3315 if (lookup_attribute ("long_call", attrs
))
3318 return TARGET_LONG_CALLS
;
3321 /* Return nonzero if it is ok to make a tail-call to DECL. */
3323 arm_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
3325 unsigned long func_type
;
3327 if (cfun
->machine
->sibcall_blocked
)
3330 /* Never tailcall something for which we have no decl, or if we
3331 are in Thumb mode. */
3332 if (decl
== NULL
|| TARGET_THUMB
)
3335 /* The PIC register is live on entry to VxWorks PLT entries, so we
3336 must make the call before restoring the PIC register. */
3337 if (TARGET_VXWORKS_RTP
&& flag_pic
&& !targetm
.binds_local_p (decl
))
3340 /* Cannot tail-call to long calls, since these are out of range of
3341 a branch instruction. */
3342 if (arm_is_long_call_p (decl
))
3345 /* If we are interworking and the function is not declared static
3346 then we can't tail-call it unless we know that it exists in this
3347 compilation unit (since it might be a Thumb routine). */
3348 if (TARGET_INTERWORK
&& TREE_PUBLIC (decl
) && !TREE_ASM_WRITTEN (decl
))
3351 func_type
= arm_current_func_type ();
3352 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3353 if (IS_INTERRUPT (func_type
))
3356 /* Never tailcall if function may be called with a misaligned SP. */
3357 if (IS_STACKALIGN (func_type
))
3360 /* Everything else is ok. */
3365 /* Addressing mode support functions. */
3367 /* Return nonzero if X is a legitimate immediate operand when compiling
3368 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3370 legitimate_pic_operand_p (rtx x
)
3372 if (GET_CODE (x
) == SYMBOL_REF
3373 || (GET_CODE (x
) == CONST
3374 && GET_CODE (XEXP (x
, 0)) == PLUS
3375 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
3381 /* Record that the current function needs a PIC register. Initialize
3382 cfun->machine->pic_reg if we have not already done so. */
3385 require_pic_register (void)
3387 /* A lot of the logic here is made obscure by the fact that this
3388 routine gets called as part of the rtx cost estimation process.
3389 We don't want those calls to affect any assumptions about the real
3390 function; and further, we can't call entry_of_function() until we
3391 start the real expansion process. */
3392 if (!crtl
->uses_pic_offset_table
)
3394 gcc_assert (can_create_pseudo_p ());
3395 if (arm_pic_register
!= INVALID_REGNUM
)
3397 cfun
->machine
->pic_reg
= gen_rtx_REG (Pmode
, arm_pic_register
);
3399 /* Play games to avoid marking the function as needing pic
3400 if we are being called as part of the cost-estimation
3402 if (current_ir_type () != IR_GIMPLE
)
3403 crtl
->uses_pic_offset_table
= 1;
3409 cfun
->machine
->pic_reg
= gen_reg_rtx (Pmode
);
3411 /* Play games to avoid marking the function as needing pic
3412 if we are being called as part of the cost-estimation
3414 if (current_ir_type () != IR_GIMPLE
)
3416 crtl
->uses_pic_offset_table
= 1;
3419 arm_load_pic_register (0UL);
3423 emit_insn_after (seq
, entry_of_function ());
3430 legitimize_pic_address (rtx orig
, enum machine_mode mode
, rtx reg
)
3432 if (GET_CODE (orig
) == SYMBOL_REF
3433 || GET_CODE (orig
) == LABEL_REF
)
3435 rtx pic_ref
, address
;
3439 /* If this function doesn't have a pic register, create one now. */
3440 require_pic_register ();
3444 gcc_assert (can_create_pseudo_p ());
3445 reg
= gen_reg_rtx (Pmode
);
3451 address
= gen_reg_rtx (Pmode
);
3456 emit_insn (gen_pic_load_addr_arm (address
, orig
));
3457 else if (TARGET_THUMB2
)
3458 emit_insn (gen_pic_load_addr_thumb2 (address
, orig
));
3459 else /* TARGET_THUMB1 */
3460 emit_insn (gen_pic_load_addr_thumb1 (address
, orig
));
3462 /* VxWorks does not impose a fixed gap between segments; the run-time
3463 gap can be different from the object-file gap. We therefore can't
3464 use GOTOFF unless we are absolutely sure that the symbol is in the
3465 same segment as the GOT. Unfortunately, the flexibility of linker
3466 scripts means that we can't be sure of that in general, so assume
3467 that GOTOFF is never valid on VxWorks. */
3468 if ((GET_CODE (orig
) == LABEL_REF
3469 || (GET_CODE (orig
) == SYMBOL_REF
&&
3470 SYMBOL_REF_LOCAL_P (orig
)))
3472 && !TARGET_VXWORKS_RTP
)
3473 pic_ref
= gen_rtx_PLUS (Pmode
, cfun
->machine
->pic_reg
, address
);
3476 pic_ref
= gen_const_mem (Pmode
,
3477 gen_rtx_PLUS (Pmode
, cfun
->machine
->pic_reg
,
3481 insn
= emit_move_insn (reg
, pic_ref
);
3483 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3485 set_unique_reg_note (insn
, REG_EQUAL
, orig
);
3489 else if (GET_CODE (orig
) == CONST
)
3493 if (GET_CODE (XEXP (orig
, 0)) == PLUS
3494 && XEXP (XEXP (orig
, 0), 0) == cfun
->machine
->pic_reg
)
3497 if (GET_CODE (XEXP (orig
, 0)) == UNSPEC
3498 && XINT (XEXP (orig
, 0), 1) == UNSPEC_TLS
)
3503 gcc_assert (can_create_pseudo_p ());
3504 reg
= gen_reg_rtx (Pmode
);
3507 gcc_assert (GET_CODE (XEXP (orig
, 0)) == PLUS
);
3509 base
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 0), Pmode
, reg
);
3510 offset
= legitimize_pic_address (XEXP (XEXP (orig
, 0), 1), Pmode
,
3511 base
== reg
? 0 : reg
);
3513 if (GET_CODE (offset
) == CONST_INT
)
3515 /* The base register doesn't really matter, we only want to
3516 test the index for the appropriate mode. */
3517 if (!arm_legitimate_index_p (mode
, offset
, SET
, 0))
3519 gcc_assert (can_create_pseudo_p ());
3520 offset
= force_reg (Pmode
, offset
);
3523 if (GET_CODE (offset
) == CONST_INT
)
3524 return plus_constant (base
, INTVAL (offset
));
3527 if (GET_MODE_SIZE (mode
) > 4
3528 && (GET_MODE_CLASS (mode
) == MODE_INT
3529 || TARGET_SOFT_FLOAT
))
3531 emit_insn (gen_addsi3 (reg
, base
, offset
));
3535 return gen_rtx_PLUS (Pmode
, base
, offset
);
3542 /* Find a spare register to use during the prolog of a function. */
3545 thumb_find_work_register (unsigned long pushed_regs_mask
)
3549 /* Check the argument registers first as these are call-used. The
3550 register allocation order means that sometimes r3 might be used
3551 but earlier argument registers might not, so check them all. */
3552 for (reg
= LAST_ARG_REGNUM
; reg
>= 0; reg
--)
3553 if (!df_regs_ever_live_p (reg
))
3556 /* Before going on to check the call-saved registers we can try a couple
3557 more ways of deducing that r3 is available. The first is when we are
3558 pushing anonymous arguments onto the stack and we have less than 4
3559 registers worth of fixed arguments(*). In this case r3 will be part of
3560 the variable argument list and so we can be sure that it will be
3561 pushed right at the start of the function. Hence it will be available
3562 for the rest of the prologue.
3563 (*): ie crtl->args.pretend_args_size is greater than 0. */
3564 if (cfun
->machine
->uses_anonymous_args
3565 && crtl
->args
.pretend_args_size
> 0)
3566 return LAST_ARG_REGNUM
;
3568 /* The other case is when we have fixed arguments but less than 4 registers
3569 worth. In this case r3 might be used in the body of the function, but
3570 it is not being used to convey an argument into the function. In theory
3571 we could just check crtl->args.size to see how many bytes are
3572 being passed in argument registers, but it seems that it is unreliable.
3573 Sometimes it will have the value 0 when in fact arguments are being
3574 passed. (See testcase execute/20021111-1.c for an example). So we also
3575 check the args_info.nregs field as well. The problem with this field is
3576 that it makes no allowances for arguments that are passed to the
3577 function but which are not used. Hence we could miss an opportunity
3578 when a function has an unused argument in r3. But it is better to be
3579 safe than to be sorry. */
3580 if (! cfun
->machine
->uses_anonymous_args
3581 && crtl
->args
.size
>= 0
3582 && crtl
->args
.size
<= (LAST_ARG_REGNUM
* UNITS_PER_WORD
)
3583 && crtl
->args
.info
.nregs
< 4)
3584 return LAST_ARG_REGNUM
;
3586 /* Otherwise look for a call-saved register that is going to be pushed. */
3587 for (reg
= LAST_LO_REGNUM
; reg
> LAST_ARG_REGNUM
; reg
--)
3588 if (pushed_regs_mask
& (1 << reg
))
3593 /* Thumb-2 can use high regs. */
3594 for (reg
= FIRST_HI_REGNUM
; reg
< 15; reg
++)
3595 if (pushed_regs_mask
& (1 << reg
))
3598 /* Something went wrong - thumb_compute_save_reg_mask()
3599 should have arranged for a suitable register to be pushed. */
3603 static GTY(()) int pic_labelno
;
3605 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3609 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED
)
3611 rtx l1
, labelno
, pic_tmp
, pic_tmp2
, pic_rtx
, pic_reg
;
3612 rtx global_offset_table
;
3614 if (crtl
->uses_pic_offset_table
== 0 || TARGET_SINGLE_PIC_BASE
)
3617 gcc_assert (flag_pic
);
3619 pic_reg
= cfun
->machine
->pic_reg
;
3620 if (TARGET_VXWORKS_RTP
)
3622 pic_rtx
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_BASE
);
3623 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
3624 emit_insn (gen_pic_load_addr_arm (pic_reg
, pic_rtx
));
3626 emit_insn (gen_rtx_SET (Pmode
, pic_reg
, gen_rtx_MEM (Pmode
, pic_reg
)));
3628 pic_tmp
= gen_rtx_SYMBOL_REF (Pmode
, VXWORKS_GOTT_INDEX
);
3629 emit_insn (gen_pic_offset_arm (pic_reg
, pic_reg
, pic_tmp
));
3633 /* We use an UNSPEC rather than a LABEL_REF because this label
3634 never appears in the code stream. */
3636 labelno
= GEN_INT (pic_labelno
++);
3637 l1
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
3638 l1
= gen_rtx_CONST (VOIDmode
, l1
);
3641 = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
3642 /* On the ARM the PC register contains 'dot + 8' at the time of the
3643 addition, on the Thumb it is 'dot + 4'. */
3644 pic_tmp
= plus_constant (l1
, TARGET_ARM
? 8 : 4);
3647 pic_tmp2
= gen_rtx_PLUS (Pmode
, global_offset_table
, pc_rtx
);
3648 pic_tmp2
= gen_rtx_CONST (VOIDmode
, pic_tmp2
);
3651 pic_tmp2
= gen_rtx_CONST (VOIDmode
, global_offset_table
);
3653 pic_rtx
= gen_rtx_MINUS (Pmode
, pic_tmp2
, pic_tmp
);
3654 pic_rtx
= gen_rtx_CONST (Pmode
, pic_rtx
);
3658 emit_insn (gen_pic_load_addr_arm (pic_reg
, pic_rtx
));
3659 emit_insn (gen_pic_add_dot_plus_eight (pic_reg
, pic_reg
, labelno
));
3661 else if (TARGET_THUMB2
)
3663 /* Thumb-2 only allows very limited access to the PC. Calculate the
3664 address in a temporary register. */
3665 if (arm_pic_register
!= INVALID_REGNUM
)
3667 pic_tmp
= gen_rtx_REG (SImode
,
3668 thumb_find_work_register (saved_regs
));
3672 gcc_assert (can_create_pseudo_p ());
3673 pic_tmp
= gen_reg_rtx (Pmode
);
3676 emit_insn (gen_pic_load_addr_thumb2 (pic_reg
, pic_rtx
));
3677 emit_insn (gen_pic_load_dot_plus_four (pic_tmp
, labelno
));
3678 emit_insn (gen_addsi3 (pic_reg
, pic_reg
, pic_tmp
));
3680 else /* TARGET_THUMB1 */
3682 if (arm_pic_register
!= INVALID_REGNUM
3683 && REGNO (pic_reg
) > LAST_LO_REGNUM
)
3685 /* We will have pushed the pic register, so we should always be
3686 able to find a work register. */
3687 pic_tmp
= gen_rtx_REG (SImode
,
3688 thumb_find_work_register (saved_regs
));
3689 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp
, pic_rtx
));
3690 emit_insn (gen_movsi (pic_offset_table_rtx
, pic_tmp
));
3693 emit_insn (gen_pic_load_addr_thumb1 (pic_reg
, pic_rtx
));
3694 emit_insn (gen_pic_add_dot_plus_four (pic_reg
, pic_reg
, labelno
));
3698 /* Need to emit this whether or not we obey regdecls,
3699 since setjmp/longjmp can cause life info to screw up. */
3704 /* Return nonzero if X is valid as an ARM state addressing register. */
3706 arm_address_register_rtx_p (rtx x
, int strict_p
)
3710 if (GET_CODE (x
) != REG
)
3716 return ARM_REGNO_OK_FOR_BASE_P (regno
);
3718 return (regno
<= LAST_ARM_REGNUM
3719 || regno
>= FIRST_PSEUDO_REGISTER
3720 || regno
== FRAME_POINTER_REGNUM
3721 || regno
== ARG_POINTER_REGNUM
);
3724 /* Return TRUE if this rtx is the difference of a symbol and a label,
3725 and will reduce to a PC-relative relocation in the object file.
3726 Expressions like this can be left alone when generating PIC, rather
3727 than forced through the GOT. */
3729 pcrel_constant_p (rtx x
)
3731 if (GET_CODE (x
) == MINUS
)
3732 return symbol_mentioned_p (XEXP (x
, 0)) && label_mentioned_p (XEXP (x
, 1));
3737 /* Return nonzero if X is a valid ARM state address operand. */
3739 arm_legitimate_address_p (enum machine_mode mode
, rtx x
, RTX_CODE outer
,
3743 enum rtx_code code
= GET_CODE (x
);
3745 if (arm_address_register_rtx_p (x
, strict_p
))
3748 use_ldrd
= (TARGET_LDRD
3750 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
3752 if (code
== POST_INC
|| code
== PRE_DEC
3753 || ((code
== PRE_INC
|| code
== POST_DEC
)
3754 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
3755 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
3757 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
3758 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
3759 && GET_CODE (XEXP (x
, 1)) == PLUS
3760 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
3762 rtx addend
= XEXP (XEXP (x
, 1), 1);
3764 /* Don't allow ldrd post increment by register because it's hard
3765 to fixup invalid register choices. */
3767 && GET_CODE (x
) == POST_MODIFY
3768 && GET_CODE (addend
) == REG
)
3771 return ((use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)
3772 && arm_legitimate_index_p (mode
, addend
, outer
, strict_p
));
3775 /* After reload constants split into minipools will have addresses
3776 from a LABEL_REF. */
3777 else if (reload_completed
3778 && (code
== LABEL_REF
3780 && GET_CODE (XEXP (x
, 0)) == PLUS
3781 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
3782 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
3785 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
3788 else if (code
== PLUS
)
3790 rtx xop0
= XEXP (x
, 0);
3791 rtx xop1
= XEXP (x
, 1);
3793 return ((arm_address_register_rtx_p (xop0
, strict_p
)
3794 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
))
3795 || (arm_address_register_rtx_p (xop1
, strict_p
)
3796 && arm_legitimate_index_p (mode
, xop0
, outer
, strict_p
)));
3800 /* Reload currently can't handle MINUS, so disable this for now */
3801 else if (GET_CODE (x
) == MINUS
)
3803 rtx xop0
= XEXP (x
, 0);
3804 rtx xop1
= XEXP (x
, 1);
3806 return (arm_address_register_rtx_p (xop0
, strict_p
)
3807 && arm_legitimate_index_p (mode
, xop1
, outer
, strict_p
));
3811 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
3812 && code
== SYMBOL_REF
3813 && CONSTANT_POOL_ADDRESS_P (x
)
3815 && symbol_mentioned_p (get_pool_constant (x
))
3816 && ! pcrel_constant_p (get_pool_constant (x
))))
3822 /* Return nonzero if X is a valid Thumb-2 address operand. */
3824 thumb2_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
3827 enum rtx_code code
= GET_CODE (x
);
3829 if (arm_address_register_rtx_p (x
, strict_p
))
3832 use_ldrd
= (TARGET_LDRD
3834 || (mode
== DFmode
&& (TARGET_SOFT_FLOAT
|| TARGET_VFP
))));
3836 if (code
== POST_INC
|| code
== PRE_DEC
3837 || ((code
== PRE_INC
|| code
== POST_DEC
)
3838 && (use_ldrd
|| GET_MODE_SIZE (mode
) <= 4)))
3839 return arm_address_register_rtx_p (XEXP (x
, 0), strict_p
);
3841 else if ((code
== POST_MODIFY
|| code
== PRE_MODIFY
)
3842 && arm_address_register_rtx_p (XEXP (x
, 0), strict_p
)
3843 && GET_CODE (XEXP (x
, 1)) == PLUS
3844 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), XEXP (x
, 0)))
3846 /* Thumb-2 only has autoincrement by constant. */
3847 rtx addend
= XEXP (XEXP (x
, 1), 1);
3848 HOST_WIDE_INT offset
;
3850 if (GET_CODE (addend
) != CONST_INT
)
3853 offset
= INTVAL(addend
);
3854 if (GET_MODE_SIZE (mode
) <= 4)
3855 return (offset
> -256 && offset
< 256);
3857 return (use_ldrd
&& offset
> -1024 && offset
< 1024
3858 && (offset
& 3) == 0);
3861 /* After reload constants split into minipools will have addresses
3862 from a LABEL_REF. */
3863 else if (reload_completed
3864 && (code
== LABEL_REF
3866 && GET_CODE (XEXP (x
, 0)) == PLUS
3867 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
3868 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
3871 else if (mode
== TImode
|| (TARGET_NEON
&& VALID_NEON_STRUCT_MODE (mode
)))
3874 else if (code
== PLUS
)
3876 rtx xop0
= XEXP (x
, 0);
3877 rtx xop1
= XEXP (x
, 1);
3879 return ((arm_address_register_rtx_p (xop0
, strict_p
)
3880 && thumb2_legitimate_index_p (mode
, xop1
, strict_p
))
3881 || (arm_address_register_rtx_p (xop1
, strict_p
)
3882 && thumb2_legitimate_index_p (mode
, xop0
, strict_p
)));
3885 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
3886 && code
== SYMBOL_REF
3887 && CONSTANT_POOL_ADDRESS_P (x
)
3889 && symbol_mentioned_p (get_pool_constant (x
))
3890 && ! pcrel_constant_p (get_pool_constant (x
))))
3896 /* Return nonzero if INDEX is valid for an address index operand in
3899 arm_legitimate_index_p (enum machine_mode mode
, rtx index
, RTX_CODE outer
,
3902 HOST_WIDE_INT range
;
3903 enum rtx_code code
= GET_CODE (index
);
3905 /* Standard coprocessor addressing modes. */
3906 if (TARGET_HARD_FLOAT
3907 && (TARGET_FPA
|| TARGET_MAVERICK
)
3908 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
3909 || (TARGET_MAVERICK
&& mode
== DImode
)))
3910 return (code
== CONST_INT
&& INTVAL (index
) < 1024
3911 && INTVAL (index
) > -1024
3912 && (INTVAL (index
) & 3) == 0);
3915 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
3916 return (code
== CONST_INT
3917 && INTVAL (index
) < 1016
3918 && INTVAL (index
) > -1024
3919 && (INTVAL (index
) & 3) == 0);
3921 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
3922 return (code
== CONST_INT
3923 && INTVAL (index
) < 1024
3924 && INTVAL (index
) > -1024
3925 && (INTVAL (index
) & 3) == 0);
3927 if (arm_address_register_rtx_p (index
, strict_p
)
3928 && (GET_MODE_SIZE (mode
) <= 4))
3931 if (mode
== DImode
|| mode
== DFmode
)
3933 if (code
== CONST_INT
)
3935 HOST_WIDE_INT val
= INTVAL (index
);
3938 return val
> -256 && val
< 256;
3940 return val
> -4096 && val
< 4092;
3943 return TARGET_LDRD
&& arm_address_register_rtx_p (index
, strict_p
);
3946 if (GET_MODE_SIZE (mode
) <= 4
3949 || (mode
== QImode
&& outer
== SIGN_EXTEND
))))
3953 rtx xiop0
= XEXP (index
, 0);
3954 rtx xiop1
= XEXP (index
, 1);
3956 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
3957 && power_of_two_operand (xiop1
, SImode
))
3958 || (arm_address_register_rtx_p (xiop1
, strict_p
)
3959 && power_of_two_operand (xiop0
, SImode
)));
3961 else if (code
== LSHIFTRT
|| code
== ASHIFTRT
3962 || code
== ASHIFT
|| code
== ROTATERT
)
3964 rtx op
= XEXP (index
, 1);
3966 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
3967 && GET_CODE (op
) == CONST_INT
3969 && INTVAL (op
) <= 31);
3973 /* For ARM v4 we may be doing a sign-extend operation during the
3977 if (mode
== HImode
|| (outer
== SIGN_EXTEND
&& mode
== QImode
))
3983 range
= (mode
== HImode
) ? 4095 : 4096;
3985 return (code
== CONST_INT
3986 && INTVAL (index
) < range
3987 && INTVAL (index
) > -range
);
3990 /* Return true if OP is a valid index scaling factor for Thumb-2 address
3991 index operand. i.e. 1, 2, 4 or 8. */
3993 thumb2_index_mul_operand (rtx op
)
3997 if (GET_CODE(op
) != CONST_INT
)
4001 return (val
== 1 || val
== 2 || val
== 4 || val
== 8);
4004 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4006 thumb2_legitimate_index_p (enum machine_mode mode
, rtx index
, int strict_p
)
4008 enum rtx_code code
= GET_CODE (index
);
4010 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4011 /* Standard coprocessor addressing modes. */
4012 if (TARGET_HARD_FLOAT
4013 && (TARGET_FPA
|| TARGET_MAVERICK
)
4014 && (GET_MODE_CLASS (mode
) == MODE_FLOAT
4015 || (TARGET_MAVERICK
&& mode
== DImode
)))
4016 return (code
== CONST_INT
&& INTVAL (index
) < 1024
4017 && INTVAL (index
) > -1024
4018 && (INTVAL (index
) & 3) == 0);
4020 if (TARGET_REALLY_IWMMXT
&& VALID_IWMMXT_REG_MODE (mode
))
4022 /* For DImode assume values will usually live in core regs
4023 and only allow LDRD addressing modes. */
4024 if (!TARGET_LDRD
|| mode
!= DImode
)
4025 return (code
== CONST_INT
4026 && INTVAL (index
) < 1024
4027 && INTVAL (index
) > -1024
4028 && (INTVAL (index
) & 3) == 0);
4032 && (VALID_NEON_DREG_MODE (mode
) || VALID_NEON_QREG_MODE (mode
)))
4033 return (code
== CONST_INT
4034 && INTVAL (index
) < 1016
4035 && INTVAL (index
) > -1024
4036 && (INTVAL (index
) & 3) == 0);
4038 if (arm_address_register_rtx_p (index
, strict_p
)
4039 && (GET_MODE_SIZE (mode
) <= 4))
4042 if (mode
== DImode
|| mode
== DFmode
)
4044 HOST_WIDE_INT val
= INTVAL (index
);
4045 /* ??? Can we assume ldrd for thumb2? */
4046 /* Thumb-2 ldrd only has reg+const addressing modes. */
4047 if (code
!= CONST_INT
)
4050 /* ldrd supports offsets of +-1020.
4051 However the ldr fallback does not. */
4052 return val
> -256 && val
< 256 && (val
& 3) == 0;
4057 rtx xiop0
= XEXP (index
, 0);
4058 rtx xiop1
= XEXP (index
, 1);
4060 return ((arm_address_register_rtx_p (xiop0
, strict_p
)
4061 && thumb2_index_mul_operand (xiop1
))
4062 || (arm_address_register_rtx_p (xiop1
, strict_p
)
4063 && thumb2_index_mul_operand (xiop0
)));
4065 else if (code
== ASHIFT
)
4067 rtx op
= XEXP (index
, 1);
4069 return (arm_address_register_rtx_p (XEXP (index
, 0), strict_p
)
4070 && GET_CODE (op
) == CONST_INT
4072 && INTVAL (op
) <= 3);
4075 return (code
== CONST_INT
4076 && INTVAL (index
) < 4096
4077 && INTVAL (index
) > -256);
4080 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4082 thumb1_base_register_rtx_p (rtx x
, enum machine_mode mode
, int strict_p
)
4086 if (GET_CODE (x
) != REG
)
4092 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno
, mode
);
4094 return (regno
<= LAST_LO_REGNUM
4095 || regno
> LAST_VIRTUAL_REGISTER
4096 || regno
== FRAME_POINTER_REGNUM
4097 || (GET_MODE_SIZE (mode
) >= 4
4098 && (regno
== STACK_POINTER_REGNUM
4099 || regno
>= FIRST_PSEUDO_REGISTER
4100 || x
== hard_frame_pointer_rtx
4101 || x
== arg_pointer_rtx
)));
4104 /* Return nonzero if x is a legitimate index register. This is the case
4105 for any base register that can access a QImode object. */
4107 thumb1_index_register_rtx_p (rtx x
, int strict_p
)
4109 return thumb1_base_register_rtx_p (x
, QImode
, strict_p
);
4112 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4114 The AP may be eliminated to either the SP or the FP, so we use the
4115 least common denominator, e.g. SImode, and offsets from 0 to 64.
4117 ??? Verify whether the above is the right approach.
4119 ??? Also, the FP may be eliminated to the SP, so perhaps that
4120 needs special handling also.
4122 ??? Look at how the mips16 port solves this problem. It probably uses
4123 better ways to solve some of these problems.
4125 Although it is not incorrect, we don't accept QImode and HImode
4126 addresses based on the frame pointer or arg pointer until the
4127 reload pass starts. This is so that eliminating such addresses
4128 into stack based ones won't produce impossible code. */
4130 thumb1_legitimate_address_p (enum machine_mode mode
, rtx x
, int strict_p
)
4132 /* ??? Not clear if this is right. Experiment. */
4133 if (GET_MODE_SIZE (mode
) < 4
4134 && !(reload_in_progress
|| reload_completed
)
4135 && (reg_mentioned_p (frame_pointer_rtx
, x
)
4136 || reg_mentioned_p (arg_pointer_rtx
, x
)
4137 || reg_mentioned_p (virtual_incoming_args_rtx
, x
)
4138 || reg_mentioned_p (virtual_outgoing_args_rtx
, x
)
4139 || reg_mentioned_p (virtual_stack_dynamic_rtx
, x
)
4140 || reg_mentioned_p (virtual_stack_vars_rtx
, x
)))
4143 /* Accept any base register. SP only in SImode or larger. */
4144 else if (thumb1_base_register_rtx_p (x
, mode
, strict_p
))
4147 /* This is PC relative data before arm_reorg runs. */
4148 else if (GET_MODE_SIZE (mode
) >= 4 && CONSTANT_P (x
)
4149 && GET_CODE (x
) == SYMBOL_REF
4150 && CONSTANT_POOL_ADDRESS_P (x
) && !flag_pic
)
4153 /* This is PC relative data after arm_reorg runs. */
4154 else if (GET_MODE_SIZE (mode
) >= 4 && reload_completed
4155 && (GET_CODE (x
) == LABEL_REF
4156 || (GET_CODE (x
) == CONST
4157 && GET_CODE (XEXP (x
, 0)) == PLUS
4158 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == LABEL_REF
4159 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)))
4162 /* Post-inc indexing only supported for SImode and larger. */
4163 else if (GET_CODE (x
) == POST_INC
&& GET_MODE_SIZE (mode
) >= 4
4164 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
))
4167 else if (GET_CODE (x
) == PLUS
)
4169 /* REG+REG address can be any two index registers. */
4170 /* We disallow FRAME+REG addressing since we know that FRAME
4171 will be replaced with STACK, and SP relative addressing only
4172 permits SP+OFFSET. */
4173 if (GET_MODE_SIZE (mode
) <= 4
4174 && XEXP (x
, 0) != frame_pointer_rtx
4175 && XEXP (x
, 1) != frame_pointer_rtx
4176 && thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
4177 && thumb1_index_register_rtx_p (XEXP (x
, 1), strict_p
))
4180 /* REG+const has 5-7 bit offset for non-SP registers. */
4181 else if ((thumb1_index_register_rtx_p (XEXP (x
, 0), strict_p
)
4182 || XEXP (x
, 0) == arg_pointer_rtx
)
4183 && GET_CODE (XEXP (x
, 1)) == CONST_INT
4184 && thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
4187 /* REG+const has 10-bit offset for SP, but only SImode and
4188 larger is supported. */
4189 /* ??? Should probably check for DI/DFmode overflow here
4190 just like GO_IF_LEGITIMATE_OFFSET does. */
4191 else if (GET_CODE (XEXP (x
, 0)) == REG
4192 && REGNO (XEXP (x
, 0)) == STACK_POINTER_REGNUM
4193 && GET_MODE_SIZE (mode
) >= 4
4194 && GET_CODE (XEXP (x
, 1)) == CONST_INT
4195 && INTVAL (XEXP (x
, 1)) >= 0
4196 && INTVAL (XEXP (x
, 1)) + GET_MODE_SIZE (mode
) <= 1024
4197 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
4200 else if (GET_CODE (XEXP (x
, 0)) == REG
4201 && (REGNO (XEXP (x
, 0)) == FRAME_POINTER_REGNUM
4202 || REGNO (XEXP (x
, 0)) == ARG_POINTER_REGNUM
4203 || (REGNO (XEXP (x
, 0)) >= FIRST_VIRTUAL_REGISTER
4204 && REGNO (XEXP (x
, 0)) <= LAST_VIRTUAL_REGISTER
))
4205 && GET_MODE_SIZE (mode
) >= 4
4206 && GET_CODE (XEXP (x
, 1)) == CONST_INT
4207 && (INTVAL (XEXP (x
, 1)) & 3) == 0)
4211 else if (GET_MODE_CLASS (mode
) != MODE_FLOAT
4212 && GET_MODE_SIZE (mode
) == 4
4213 && GET_CODE (x
) == SYMBOL_REF
4214 && CONSTANT_POOL_ADDRESS_P (x
)
4216 && symbol_mentioned_p (get_pool_constant (x
))
4217 && ! pcrel_constant_p (get_pool_constant (x
))))
4223 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4224 instruction of mode MODE. */
4226 thumb_legitimate_offset_p (enum machine_mode mode
, HOST_WIDE_INT val
)
4228 switch (GET_MODE_SIZE (mode
))
4231 return val
>= 0 && val
< 32;
4234 return val
>= 0 && val
< 64 && (val
& 1) == 0;
4238 && (val
+ GET_MODE_SIZE (mode
)) <= 128
4243 /* Build the SYMBOL_REF for __tls_get_addr. */
4245 static GTY(()) rtx tls_get_addr_libfunc
;
4248 get_tls_get_addr (void)
4250 if (!tls_get_addr_libfunc
)
4251 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
4252 return tls_get_addr_libfunc
;
4256 arm_load_tp (rtx target
)
4259 target
= gen_reg_rtx (SImode
);
4263 /* Can return in any reg. */
4264 emit_insn (gen_load_tp_hard (target
));
4268 /* Always returned in r0. Immediately copy the result into a pseudo,
4269 otherwise other uses of r0 (e.g. setting up function arguments) may
4270 clobber the value. */
4274 emit_insn (gen_load_tp_soft ());
4276 tmp
= gen_rtx_REG (SImode
, 0);
4277 emit_move_insn (target
, tmp
);
4283 load_tls_operand (rtx x
, rtx reg
)
4287 if (reg
== NULL_RTX
)
4288 reg
= gen_reg_rtx (SImode
);
4290 tmp
= gen_rtx_CONST (SImode
, x
);
4292 emit_move_insn (reg
, tmp
);
4298 arm_call_tls_get_addr (rtx x
, rtx reg
, rtx
*valuep
, int reloc
)
4300 rtx insns
, label
, labelno
, sum
;
4304 labelno
= GEN_INT (pic_labelno
++);
4305 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
4306 label
= gen_rtx_CONST (VOIDmode
, label
);
4308 sum
= gen_rtx_UNSPEC (Pmode
,
4309 gen_rtvec (4, x
, GEN_INT (reloc
), label
,
4310 GEN_INT (TARGET_ARM
? 8 : 4)),
4312 reg
= load_tls_operand (sum
, reg
);
4315 emit_insn (gen_pic_add_dot_plus_eight (reg
, reg
, labelno
));
4316 else if (TARGET_THUMB2
)
4319 /* Thumb-2 only allows very limited access to the PC. Calculate
4320 the address in a temporary register. */
4321 tmp
= gen_reg_rtx (SImode
);
4322 emit_insn (gen_pic_load_dot_plus_four (tmp
, labelno
));
4323 emit_insn (gen_addsi3(reg
, reg
, tmp
));
4325 else /* TARGET_THUMB1 */
4326 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
4328 *valuep
= emit_library_call_value (get_tls_get_addr (), NULL_RTX
, LCT_PURE
, /* LCT_CONST? */
4329 Pmode
, 1, reg
, Pmode
);
4331 insns
= get_insns ();
4338 legitimize_tls_address (rtx x
, rtx reg
)
4340 rtx dest
, tp
, label
, labelno
, sum
, insns
, ret
, eqv
, addend
;
4341 unsigned int model
= SYMBOL_REF_TLS_MODEL (x
);
4345 case TLS_MODEL_GLOBAL_DYNAMIC
:
4346 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_GD32
);
4347 dest
= gen_reg_rtx (Pmode
);
4348 emit_libcall_block (insns
, dest
, ret
, x
);
4351 case TLS_MODEL_LOCAL_DYNAMIC
:
4352 insns
= arm_call_tls_get_addr (x
, reg
, &ret
, TLS_LDM32
);
4354 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4355 share the LDM result with other LD model accesses. */
4356 eqv
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const1_rtx
),
4358 dest
= gen_reg_rtx (Pmode
);
4359 emit_libcall_block (insns
, dest
, ret
, eqv
);
4361 /* Load the addend. */
4362 addend
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, x
, GEN_INT (TLS_LDO32
)),
4364 addend
= force_reg (SImode
, gen_rtx_CONST (SImode
, addend
));
4365 return gen_rtx_PLUS (Pmode
, dest
, addend
);
4367 case TLS_MODEL_INITIAL_EXEC
:
4368 labelno
= GEN_INT (pic_labelno
++);
4369 label
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, labelno
), UNSPEC_PIC_LABEL
);
4370 label
= gen_rtx_CONST (VOIDmode
, label
);
4371 sum
= gen_rtx_UNSPEC (Pmode
,
4372 gen_rtvec (4, x
, GEN_INT (TLS_IE32
), label
,
4373 GEN_INT (TARGET_ARM
? 8 : 4)),
4375 reg
= load_tls_operand (sum
, reg
);
4378 emit_insn (gen_tls_load_dot_plus_eight (reg
, reg
, labelno
));
4379 else if (TARGET_THUMB2
)
4382 /* Thumb-2 only allows very limited access to the PC. Calculate
4383 the address in a temporary register. */
4384 tmp
= gen_reg_rtx (SImode
);
4385 emit_insn (gen_pic_load_dot_plus_four (tmp
, labelno
));
4386 emit_insn (gen_addsi3(reg
, reg
, tmp
));
4387 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
4391 emit_insn (gen_pic_add_dot_plus_four (reg
, reg
, labelno
));
4392 emit_move_insn (reg
, gen_const_mem (SImode
, reg
));
4395 tp
= arm_load_tp (NULL_RTX
);
4397 return gen_rtx_PLUS (Pmode
, tp
, reg
);
4399 case TLS_MODEL_LOCAL_EXEC
:
4400 tp
= arm_load_tp (NULL_RTX
);
4402 reg
= gen_rtx_UNSPEC (Pmode
,
4403 gen_rtvec (2, x
, GEN_INT (TLS_LE32
)),
4405 reg
= force_reg (SImode
, gen_rtx_CONST (SImode
, reg
));
4407 return gen_rtx_PLUS (Pmode
, tp
, reg
);
4414 /* Try machine-dependent ways of modifying an illegitimate address
4415 to be legitimate. If we find one, return the new, valid address. */
4417 arm_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
4419 if (arm_tls_symbol_p (x
))
4420 return legitimize_tls_address (x
, NULL_RTX
);
4422 if (GET_CODE (x
) == PLUS
)
4424 rtx xop0
= XEXP (x
, 0);
4425 rtx xop1
= XEXP (x
, 1);
4427 if (CONSTANT_P (xop0
) && !symbol_mentioned_p (xop0
))
4428 xop0
= force_reg (SImode
, xop0
);
4430 if (CONSTANT_P (xop1
) && !symbol_mentioned_p (xop1
))
4431 xop1
= force_reg (SImode
, xop1
);
4433 if (ARM_BASE_REGISTER_RTX_P (xop0
)
4434 && GET_CODE (xop1
) == CONST_INT
)
4436 HOST_WIDE_INT n
, low_n
;
4440 /* VFP addressing modes actually allow greater offsets, but for
4441 now we just stick with the lowest common denominator. */
4443 || ((TARGET_SOFT_FLOAT
|| TARGET_VFP
) && mode
== DFmode
))
4455 low_n
= ((mode
) == TImode
? 0
4456 : n
>= 0 ? (n
& 0xfff) : -((-n
) & 0xfff));
4460 base_reg
= gen_reg_rtx (SImode
);
4461 val
= force_operand (plus_constant (xop0
, n
), NULL_RTX
);
4462 emit_move_insn (base_reg
, val
);
4463 x
= plus_constant (base_reg
, low_n
);
4465 else if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
4466 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
4469 /* XXX We don't allow MINUS any more -- see comment in
4470 arm_legitimate_address_p (). */
4471 else if (GET_CODE (x
) == MINUS
)
4473 rtx xop0
= XEXP (x
, 0);
4474 rtx xop1
= XEXP (x
, 1);
4476 if (CONSTANT_P (xop0
))
4477 xop0
= force_reg (SImode
, xop0
);
4479 if (CONSTANT_P (xop1
) && ! symbol_mentioned_p (xop1
))
4480 xop1
= force_reg (SImode
, xop1
);
4482 if (xop0
!= XEXP (x
, 0) || xop1
!= XEXP (x
, 1))
4483 x
= gen_rtx_MINUS (SImode
, xop0
, xop1
);
4486 /* Make sure to take full advantage of the pre-indexed addressing mode
4487 with absolute addresses which often allows for the base register to
4488 be factorized for multiple adjacent memory references, and it might
4489 even allows for the mini pool to be avoided entirely. */
4490 else if (GET_CODE (x
) == CONST_INT
&& optimize
> 0)
4493 HOST_WIDE_INT mask
, base
, index
;
4496 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4497 use a 8-bit index. So let's use a 12-bit index for SImode only and
4498 hope that arm_gen_constant will enable ldrb to use more bits. */
4499 bits
= (mode
== SImode
) ? 12 : 8;
4500 mask
= (1 << bits
) - 1;
4501 base
= INTVAL (x
) & ~mask
;
4502 index
= INTVAL (x
) & mask
;
4503 if (bit_count (base
& 0xffffffff) > (32 - bits
)/2)
4505 /* It'll most probably be more efficient to generate the base
4506 with more bits set and use a negative index instead. */
4510 base_reg
= force_reg (SImode
, GEN_INT (base
));
4511 x
= plus_constant (base_reg
, index
);
4516 /* We need to find and carefully transform any SYMBOL and LABEL
4517 references; so go back to the original address expression. */
4518 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
4520 if (new_x
!= orig_x
)
4528 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4529 to be legitimate. If we find one, return the new, valid address. */
4531 thumb_legitimize_address (rtx x
, rtx orig_x
, enum machine_mode mode
)
4533 if (arm_tls_symbol_p (x
))
4534 return legitimize_tls_address (x
, NULL_RTX
);
4536 if (GET_CODE (x
) == PLUS
4537 && GET_CODE (XEXP (x
, 1)) == CONST_INT
4538 && (INTVAL (XEXP (x
, 1)) >= 32 * GET_MODE_SIZE (mode
)
4539 || INTVAL (XEXP (x
, 1)) < 0))
4541 rtx xop0
= XEXP (x
, 0);
4542 rtx xop1
= XEXP (x
, 1);
4543 HOST_WIDE_INT offset
= INTVAL (xop1
);
4545 /* Try and fold the offset into a biasing of the base register and
4546 then offsetting that. Don't do this when optimizing for space
4547 since it can cause too many CSEs. */
4548 if (optimize_size
&& offset
>= 0
4549 && offset
< 256 + 31 * GET_MODE_SIZE (mode
))
4551 HOST_WIDE_INT delta
;
4554 delta
= offset
- (256 - GET_MODE_SIZE (mode
));
4555 else if (offset
< 32 * GET_MODE_SIZE (mode
) + 8)
4556 delta
= 31 * GET_MODE_SIZE (mode
);
4558 delta
= offset
& (~31 * GET_MODE_SIZE (mode
));
4560 xop0
= force_operand (plus_constant (xop0
, offset
- delta
),
4562 x
= plus_constant (xop0
, delta
);
4564 else if (offset
< 0 && offset
> -256)
4565 /* Small negative offsets are best done with a subtract before the
4566 dereference, forcing these into a register normally takes two
4568 x
= force_operand (x
, NULL_RTX
);
4571 /* For the remaining cases, force the constant into a register. */
4572 xop1
= force_reg (SImode
, xop1
);
4573 x
= gen_rtx_PLUS (SImode
, xop0
, xop1
);
4576 else if (GET_CODE (x
) == PLUS
4577 && s_register_operand (XEXP (x
, 1), SImode
)
4578 && !s_register_operand (XEXP (x
, 0), SImode
))
4580 rtx xop0
= force_operand (XEXP (x
, 0), NULL_RTX
);
4582 x
= gen_rtx_PLUS (SImode
, xop0
, XEXP (x
, 1));
4587 /* We need to find and carefully transform any SYMBOL and LABEL
4588 references; so go back to the original address expression. */
4589 rtx new_x
= legitimize_pic_address (orig_x
, mode
, NULL_RTX
);
4591 if (new_x
!= orig_x
)
4599 thumb_legitimize_reload_address (rtx
*x_p
,
4600 enum machine_mode mode
,
4601 int opnum
, int type
,
4602 int ind_levels ATTRIBUTE_UNUSED
)
4606 if (GET_CODE (x
) == PLUS
4607 && GET_MODE_SIZE (mode
) < 4
4608 && REG_P (XEXP (x
, 0))
4609 && XEXP (x
, 0) == stack_pointer_rtx
4610 && GET_CODE (XEXP (x
, 1)) == CONST_INT
4611 && !thumb_legitimate_offset_p (mode
, INTVAL (XEXP (x
, 1))))
4616 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
4617 Pmode
, VOIDmode
, 0, 0, opnum
, type
);
4621 /* If both registers are hi-regs, then it's better to reload the
4622 entire expression rather than each register individually. That
4623 only requires one reload register rather than two. */
4624 if (GET_CODE (x
) == PLUS
4625 && REG_P (XEXP (x
, 0))
4626 && REG_P (XEXP (x
, 1))
4627 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 0), mode
)
4628 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x
, 1), mode
))
4633 push_reload (orig_x
, NULL_RTX
, x_p
, NULL
, MODE_BASE_REG_CLASS (mode
),
4634 Pmode
, VOIDmode
, 0, 0, opnum
, type
);
4641 /* Test for various thread-local symbols. */
4643 /* Return TRUE if X is a thread-local symbol. */
4646 arm_tls_symbol_p (rtx x
)
4648 if (! TARGET_HAVE_TLS
)
4651 if (GET_CODE (x
) != SYMBOL_REF
)
4654 return SYMBOL_REF_TLS_MODEL (x
) != 0;
4657 /* Helper for arm_tls_referenced_p. */
4660 arm_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
4662 if (GET_CODE (*x
) == SYMBOL_REF
)
4663 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
4665 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4666 TLS offsets, not real symbol references. */
4667 if (GET_CODE (*x
) == UNSPEC
4668 && XINT (*x
, 1) == UNSPEC_TLS
)
4674 /* Return TRUE if X contains any TLS symbol references. */
4677 arm_tls_referenced_p (rtx x
)
4679 if (! TARGET_HAVE_TLS
)
4682 return for_each_rtx (&x
, arm_tls_operand_p_1
, NULL
);
4685 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4688 arm_cannot_force_const_mem (rtx x
)
4692 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
)
4694 split_const (x
, &base
, &offset
);
4695 if (GET_CODE (base
) == SYMBOL_REF
4696 && !offset_within_block_p (base
, INTVAL (offset
)))
4699 return arm_tls_referenced_p (x
);
4702 #define REG_OR_SUBREG_REG(X) \
4703 (GET_CODE (X) == REG \
4704 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4706 #define REG_OR_SUBREG_RTX(X) \
4707 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4709 #ifndef COSTS_N_INSNS
4710 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4713 thumb1_rtx_costs (rtx x
, enum rtx_code code
, enum rtx_code outer
)
4715 enum machine_mode mode
= GET_MODE (x
);
4728 return COSTS_N_INSNS (1);
4731 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4734 unsigned HOST_WIDE_INT i
= INTVAL (XEXP (x
, 1));
4741 return COSTS_N_INSNS (2) + cycles
;
4743 return COSTS_N_INSNS (1) + 16;
4746 return (COSTS_N_INSNS (1)
4747 + 4 * ((GET_CODE (SET_SRC (x
)) == MEM
)
4748 + GET_CODE (SET_DEST (x
)) == MEM
));
4753 if ((unsigned HOST_WIDE_INT
) INTVAL (x
) < 256)
4755 if (thumb_shiftable_const (INTVAL (x
)))
4756 return COSTS_N_INSNS (2);
4757 return COSTS_N_INSNS (3);
4759 else if ((outer
== PLUS
|| outer
== COMPARE
)
4760 && INTVAL (x
) < 256 && INTVAL (x
) > -256)
4762 else if (outer
== AND
4763 && INTVAL (x
) < 256 && INTVAL (x
) >= -256)
4764 return COSTS_N_INSNS (1);
4765 else if (outer
== ASHIFT
|| outer
== ASHIFTRT
4766 || outer
== LSHIFTRT
)
4768 return COSTS_N_INSNS (2);
4774 return COSTS_N_INSNS (3);
4792 /* XXX another guess. */
4793 /* Memory costs quite a lot for the first word, but subsequent words
4794 load at the equivalent of a single insn each. */
4795 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
4796 + ((GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
))
4801 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
4806 /* XXX still guessing. */
4807 switch (GET_MODE (XEXP (x
, 0)))
4810 return (1 + (mode
== DImode
? 4 : 0)
4811 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
4814 return (4 + (mode
== DImode
? 4 : 0)
4815 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
4818 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
4830 /* Worker routine for arm_rtx_costs. */
4831 /* ??? This needs updating for thumb2. */
4833 arm_rtx_costs_1 (rtx x
, enum rtx_code code
, enum rtx_code outer
)
4835 enum machine_mode mode
= GET_MODE (x
);
4836 enum rtx_code subcode
;
4842 /* Memory costs quite a lot for the first word, but subsequent words
4843 load at the equivalent of a single insn each. */
4844 return (10 + 4 * ((GET_MODE_SIZE (mode
) - 1) / UNITS_PER_WORD
)
4845 + (GET_CODE (x
) == SYMBOL_REF
4846 && CONSTANT_POOL_ADDRESS_P (x
) ? 4 : 0));
4852 return optimize_size
? COSTS_N_INSNS (2) : 100;
4855 if (mode
== SImode
&& GET_CODE (XEXP (x
, 1)) == REG
)
4862 case ASHIFT
: case LSHIFTRT
: case ASHIFTRT
:
4864 return (8 + (GET_CODE (XEXP (x
, 1)) == CONST_INT
? 0 : 8)
4865 + ((GET_CODE (XEXP (x
, 0)) == REG
4866 || (GET_CODE (XEXP (x
, 0)) == SUBREG
4867 && GET_CODE (SUBREG_REG (XEXP (x
, 0))) == REG
))
4869 return (1 + ((GET_CODE (XEXP (x
, 0)) == REG
4870 || (GET_CODE (XEXP (x
, 0)) == SUBREG
4871 && GET_CODE (SUBREG_REG (XEXP (x
, 0))) == REG
))
4873 + ((GET_CODE (XEXP (x
, 1)) == REG
4874 || (GET_CODE (XEXP (x
, 1)) == SUBREG
4875 && GET_CODE (SUBREG_REG (XEXP (x
, 1))) == REG
)
4876 || (GET_CODE (XEXP (x
, 1)) == CONST_INT
))
4880 if (GET_CODE (XEXP (x
, 1)) == MULT
&& mode
== SImode
&& arm_arch_thumb2
)
4882 extra_cost
= rtx_cost (XEXP (x
, 1), code
);
4883 if (!REG_OR_SUBREG_REG (XEXP (x
, 0)))
4884 extra_cost
+= 4 * ARM_NUM_REGS (mode
);
4889 return (4 + (REG_OR_SUBREG_REG (XEXP (x
, 1)) ? 0 : 8)
4890 + ((REG_OR_SUBREG_REG (XEXP (x
, 0))
4891 || (GET_CODE (XEXP (x
, 0)) == CONST_INT
4892 && const_ok_for_arm (INTVAL (XEXP (x
, 0)))))
4895 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4896 return (2 + ((REG_OR_SUBREG_REG (XEXP (x
, 1))
4897 || (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
4898 && arm_const_double_rtx (XEXP (x
, 1))))
4900 + ((REG_OR_SUBREG_REG (XEXP (x
, 0))
4901 || (GET_CODE (XEXP (x
, 0)) == CONST_DOUBLE
4902 && arm_const_double_rtx (XEXP (x
, 0))))
4905 if (((GET_CODE (XEXP (x
, 0)) == CONST_INT
4906 && const_ok_for_arm (INTVAL (XEXP (x
, 0)))
4907 && REG_OR_SUBREG_REG (XEXP (x
, 1))))
4908 || (((subcode
= GET_CODE (XEXP (x
, 1))) == ASHIFT
4909 || subcode
== ASHIFTRT
|| subcode
== LSHIFTRT
4910 || subcode
== ROTATE
|| subcode
== ROTATERT
4912 && GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
4913 && ((INTVAL (XEXP (XEXP (x
, 1), 1)) &
4914 (INTVAL (XEXP (XEXP (x
, 1), 1)) - 1)) == 0)))
4915 && REG_OR_SUBREG_REG (XEXP (XEXP (x
, 1), 0))
4916 && (REG_OR_SUBREG_REG (XEXP (XEXP (x
, 1), 1))
4917 || GET_CODE (XEXP (XEXP (x
, 1), 1)) == CONST_INT
)
4918 && REG_OR_SUBREG_REG (XEXP (x
, 0))))
4923 if (arm_arch6
&& mode
== SImode
4924 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
4925 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
4926 return 1 + (GET_CODE (XEXP (XEXP (x
, 0), 0)) == MEM
? 10 : 0)
4927 + (GET_CODE (XEXP (x
, 1)) == MEM
? 10 : 0);
4929 if (GET_CODE (XEXP (x
, 0)) == MULT
)
4931 extra_cost
= rtx_cost (XEXP (x
, 0), code
);
4932 if (!REG_OR_SUBREG_REG (XEXP (x
, 1)))
4933 extra_cost
+= 4 * ARM_NUM_REGS (mode
);
4937 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
4938 return (2 + (REG_OR_SUBREG_REG (XEXP (x
, 0)) ? 0 : 8)
4939 + ((REG_OR_SUBREG_REG (XEXP (x
, 1))
4940 || (GET_CODE (XEXP (x
, 1)) == CONST_DOUBLE
4941 && arm_const_double_rtx (XEXP (x
, 1))))
4945 case AND
: case XOR
: case IOR
:
4948 /* Normally the frame registers will be spilt into reg+const during
4949 reload, so it is a bad idea to combine them with other instructions,
4950 since then they might not be moved outside of loops. As a compromise
4951 we allow integration with ops that have a constant as their second
4953 if ((REG_OR_SUBREG_REG (XEXP (x
, 0))
4954 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))
4955 && GET_CODE (XEXP (x
, 1)) != CONST_INT
)
4956 || (REG_OR_SUBREG_REG (XEXP (x
, 0))
4957 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x
, 0)))))
4961 return (4 + extra_cost
+ (REG_OR_SUBREG_REG (XEXP (x
, 0)) ? 0 : 8)
4962 + ((REG_OR_SUBREG_REG (XEXP (x
, 1))
4963 || (GET_CODE (XEXP (x
, 1)) == CONST_INT
4964 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
)))
4967 if (REG_OR_SUBREG_REG (XEXP (x
, 0)))
4968 return (1 + (GET_CODE (XEXP (x
, 1)) == CONST_INT
? 0 : extra_cost
)
4969 + ((REG_OR_SUBREG_REG (XEXP (x
, 1))
4970 || (GET_CODE (XEXP (x
, 1)) == CONST_INT
4971 && const_ok_for_op (INTVAL (XEXP (x
, 1)), code
)))
4974 else if (REG_OR_SUBREG_REG (XEXP (x
, 1)))
4975 return (1 + extra_cost
4976 + ((((subcode
= GET_CODE (XEXP (x
, 0))) == ASHIFT
4977 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
4978 || subcode
== ROTATE
|| subcode
== ROTATERT
4980 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4981 && ((INTVAL (XEXP (XEXP (x
, 0), 1)) &
4982 (INTVAL (XEXP (XEXP (x
, 0), 1)) - 1)) == 0)))
4983 && (REG_OR_SUBREG_REG (XEXP (XEXP (x
, 0), 0)))
4984 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x
, 0), 1)))
4985 || GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
))
4991 /* This should have been handled by the CPU specific routines. */
4995 if (arm_arch3m
&& mode
== SImode
4996 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
4997 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
4998 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0))
4999 == GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
5000 && (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
5001 || GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
))
5006 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5007 return 4 + (REG_OR_SUBREG_REG (XEXP (x
, 0)) ? 0 : 6);
5011 return 4 + (REG_OR_SUBREG_REG (XEXP (x
, 0)) ? 0 : 4);
5013 return 1 + (REG_OR_SUBREG_REG (XEXP (x
, 0)) ? 0 : 4);
5016 if (GET_CODE (XEXP (x
, 1)) == PC
|| GET_CODE (XEXP (x
, 2)) == PC
)
5024 return 4 + (mode
== DImode
? 4 : 0);
5027 if (arm_arch_thumb2
&& mode
== SImode
)
5028 return 1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0);
5030 if (GET_MODE (XEXP (x
, 0)) == QImode
)
5031 return (4 + (mode
== DImode
? 4 : 0)
5032 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
5035 if (arm_arch6
&& mode
== SImode
)
5036 return 1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0);
5038 switch (GET_MODE (XEXP (x
, 0)))
5041 return (1 + (mode
== DImode
? 4 : 0)
5042 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
5045 return (4 + (mode
== DImode
? 4 : 0)
5046 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
5049 return (1 + (GET_CODE (XEXP (x
, 0)) == MEM
? 10 : 0));
5064 if (const_ok_for_arm (INTVAL (x
)))
5065 return outer
== SET
? 2 : -1;
5066 else if (outer
== AND
5067 && const_ok_for_arm (~INTVAL (x
)))
5069 else if ((outer
== COMPARE
5070 || outer
== PLUS
|| outer
== MINUS
)
5071 && const_ok_for_arm (-INTVAL (x
)))
5082 if (arm_const_double_rtx (x
) || vfp3_const_double_rtx (x
))
5083 return outer
== SET
? 2 : -1;
5084 else if ((outer
== COMPARE
|| outer
== PLUS
)
5085 && neg_const_double_rtx_ok_for_fpa (x
))
5094 /* RTX costs when optimizing for size. */
5096 arm_size_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
5098 enum machine_mode mode
= GET_MODE (x
);
5102 /* XXX TBD. For now, use the standard costs. */
5103 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
5107 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
5111 /* A memory access costs 1 insn if the mode is small, or the address is
5112 a single register, otherwise it costs one insn per word. */
5113 if (REG_P (XEXP (x
, 0)))
5114 *total
= COSTS_N_INSNS (1);
5116 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
5123 /* Needs a libcall, so it costs about this. */
5124 *total
= COSTS_N_INSNS (2);
5128 if (mode
== SImode
&& GET_CODE (XEXP (x
, 1)) == REG
)
5130 *total
= COSTS_N_INSNS (2) + rtx_cost (XEXP (x
, 0), code
);
5138 if (mode
== DImode
&& GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5140 *total
= COSTS_N_INSNS (3) + rtx_cost (XEXP (x
, 0), code
);
5143 else if (mode
== SImode
)
5145 *total
= COSTS_N_INSNS (1) + rtx_cost (XEXP (x
, 0), code
);
5146 /* Slightly disparage register shifts, but not by much. */
5147 if (GET_CODE (XEXP (x
, 1)) != CONST_INT
)
5148 *total
+= 1 + rtx_cost (XEXP (x
, 1), code
);
5152 /* Needs a libcall. */
5153 *total
= COSTS_N_INSNS (2);
5157 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5159 *total
= COSTS_N_INSNS (1);
5165 enum rtx_code subcode0
= GET_CODE (XEXP (x
, 0));
5166 enum rtx_code subcode1
= GET_CODE (XEXP (x
, 1));
5168 if (subcode0
== ROTATE
|| subcode0
== ROTATERT
|| subcode0
== ASHIFT
5169 || subcode0
== LSHIFTRT
|| subcode0
== ASHIFTRT
5170 || subcode1
== ROTATE
|| subcode1
== ROTATERT
5171 || subcode1
== ASHIFT
|| subcode1
== LSHIFTRT
5172 || subcode1
== ASHIFTRT
)
5174 /* It's just the cost of the two operands. */
5179 *total
= COSTS_N_INSNS (1);
5183 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
5187 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5189 *total
= COSTS_N_INSNS (1);
5194 case AND
: case XOR
: case IOR
:
5197 enum rtx_code subcode
= GET_CODE (XEXP (x
, 0));
5199 if (subcode
== ROTATE
|| subcode
== ROTATERT
|| subcode
== ASHIFT
5200 || subcode
== LSHIFTRT
|| subcode
== ASHIFTRT
5201 || (code
== AND
&& subcode
== NOT
))
5203 /* It's just the cost of the two operands. */
5209 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
5213 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
5217 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5218 *total
= COSTS_N_INSNS (1);
5221 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
5230 if (cc_register (XEXP (x
, 0), VOIDmode
))
5233 *total
= COSTS_N_INSNS (1);
5237 if (TARGET_HARD_FLOAT
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5238 *total
= COSTS_N_INSNS (1);
5240 *total
= COSTS_N_INSNS (1 + ARM_NUM_REGS (mode
));
5245 if (GET_MODE_SIZE (GET_MODE (XEXP (x
, 0))) < 4)
5247 if (!(arm_arch4
&& MEM_P (XEXP (x
, 0))))
5248 *total
+= COSTS_N_INSNS (arm_arch6
? 1 : 2);
5251 *total
+= COSTS_N_INSNS (1);
5256 if (!(arm_arch4
&& MEM_P (XEXP (x
, 0))))
5258 switch (GET_MODE (XEXP (x
, 0)))
5261 *total
+= COSTS_N_INSNS (1);
5265 *total
+= COSTS_N_INSNS (arm_arch6
? 1 : 2);
5271 *total
+= COSTS_N_INSNS (2);
5276 *total
+= COSTS_N_INSNS (1);
5281 if (const_ok_for_arm (INTVAL (x
)))
5282 *total
= COSTS_N_INSNS (outer_code
== SET
? 1 : 0);
5283 else if (const_ok_for_arm (~INTVAL (x
)))
5284 *total
= COSTS_N_INSNS (outer_code
== AND
? 0 : 1);
5285 else if (const_ok_for_arm (-INTVAL (x
)))
5287 if (outer_code
== COMPARE
|| outer_code
== PLUS
5288 || outer_code
== MINUS
)
5291 *total
= COSTS_N_INSNS (1);
5294 *total
= COSTS_N_INSNS (2);
5300 *total
= COSTS_N_INSNS (2);
5304 *total
= COSTS_N_INSNS (4);
5308 if (mode
!= VOIDmode
)
5309 *total
= COSTS_N_INSNS (ARM_NUM_REGS (mode
));
5311 *total
= COSTS_N_INSNS (4); /* How knows? */
5316 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5317 supported on any "slowmul" cores, so it can be ignored. */
5320 arm_slowmul_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
5322 enum machine_mode mode
= GET_MODE (x
);
5326 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
5333 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5340 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5342 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
5343 & (unsigned HOST_WIDE_INT
) 0xffffffff);
5344 int cost
, const_ok
= const_ok_for_arm (i
);
5345 int j
, booth_unit_size
;
5347 /* Tune as appropriate. */
5348 cost
= const_ok
? 4 : 8;
5349 booth_unit_size
= 2;
5350 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
5352 i
>>= booth_unit_size
;
5360 *total
= 30 + (REG_OR_SUBREG_REG (XEXP (x
, 0)) ? 0 : 4)
5361 + (REG_OR_SUBREG_REG (XEXP (x
, 1)) ? 0 : 4);
5365 *total
= arm_rtx_costs_1 (x
, code
, outer_code
);
5371 /* RTX cost for cores with a fast multiply unit (M variants). */
5374 arm_fastmul_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
5376 enum machine_mode mode
= GET_MODE (x
);
5380 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
5384 /* ??? should thumb2 use different costs? */
5388 /* There is no point basing this on the tuning, since it is always the
5389 fast variant if it exists at all. */
5391 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
5392 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
5393 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
5400 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5407 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5409 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
5410 & (unsigned HOST_WIDE_INT
) 0xffffffff);
5411 int cost
, const_ok
= const_ok_for_arm (i
);
5412 int j
, booth_unit_size
;
5414 /* Tune as appropriate. */
5415 cost
= const_ok
? 4 : 8;
5416 booth_unit_size
= 8;
5417 for (j
= 0; i
&& j
< 32; j
+= booth_unit_size
)
5419 i
>>= booth_unit_size
;
5427 *total
= 8 + (REG_OR_SUBREG_REG (XEXP (x
, 0)) ? 0 : 4)
5428 + (REG_OR_SUBREG_REG (XEXP (x
, 1)) ? 0 : 4);
5432 *total
= arm_rtx_costs_1 (x
, code
, outer_code
);
5438 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5439 so it can be ignored. */
5442 arm_xscale_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
5444 enum machine_mode mode
= GET_MODE (x
);
5448 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
5455 /* There is no point basing this on the tuning, since it is always the
5456 fast variant if it exists at all. */
5458 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
5459 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
5460 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
5467 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
5474 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5476 unsigned HOST_WIDE_INT i
= (INTVAL (XEXP (x
, 1))
5477 & (unsigned HOST_WIDE_INT
) 0xffffffff);
5478 int cost
, const_ok
= const_ok_for_arm (i
);
5479 unsigned HOST_WIDE_INT masked_const
;
5481 /* The cost will be related to two insns.
5482 First a load of the constant (MOV or LDR), then a multiply. */
5485 cost
+= 1; /* LDR is probably more expensive because
5486 of longer result latency. */
5487 masked_const
= i
& 0xffff8000;
5488 if (masked_const
!= 0 && masked_const
!= 0xffff8000)
5490 masked_const
= i
& 0xf8000000;
5491 if (masked_const
== 0 || masked_const
== 0xf8000000)
5500 *total
= 8 + (REG_OR_SUBREG_REG (XEXP (x
, 0)) ? 0 : 4)
5501 + (REG_OR_SUBREG_REG (XEXP (x
, 1)) ? 0 : 4);
5505 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5506 will stall until the multiplication is complete. */
5507 if (GET_CODE (XEXP (x
, 0)) == MULT
)
5508 *total
= 4 + rtx_cost (XEXP (x
, 0), code
);
5510 *total
= arm_rtx_costs_1 (x
, code
, outer_code
);
5514 *total
= arm_rtx_costs_1 (x
, code
, outer_code
);
5520 /* RTX costs for 9e (and later) cores. */
5523 arm_9e_rtx_costs (rtx x
, int code
, int outer_code
, int *total
)
5525 enum machine_mode mode
= GET_MODE (x
);
5534 *total
= COSTS_N_INSNS (3);
5538 *total
= thumb1_rtx_costs (x
, code
, outer_code
);
5546 /* There is no point basing this on the tuning, since it is always the
5547 fast variant if it exists at all. */
5549 && (GET_CODE (XEXP (x
, 0)) == GET_CODE (XEXP (x
, 1)))
5550 && (GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
5551 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
))
5558 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5575 *total
= cost
+ (REG_OR_SUBREG_REG (XEXP (x
, 0)) ? 0 : nonreg_cost
)
5576 + (REG_OR_SUBREG_REG (XEXP (x
, 1)) ? 0 : nonreg_cost
);
5580 *total
= arm_rtx_costs_1 (x
, code
, outer_code
);
5584 /* All address computations that can be done are free, but rtx cost returns
5585 the same for practically all of them. So we weight the different types
5586 of address here in the order (most pref first):
5587 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5589 arm_arm_address_cost (rtx x
)
5591 enum rtx_code c
= GET_CODE (x
);
5593 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== POST_INC
|| c
== POST_DEC
)
5595 if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
5598 if (c
== PLUS
|| c
== MINUS
)
5600 if (GET_CODE (XEXP (x
, 0)) == CONST_INT
)
5603 if (ARITHMETIC_P (XEXP (x
, 0)) || ARITHMETIC_P (XEXP (x
, 1)))
5613 arm_thumb_address_cost (rtx x
)
5615 enum rtx_code c
= GET_CODE (x
);
5620 && GET_CODE (XEXP (x
, 0)) == REG
5621 && GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5628 arm_address_cost (rtx x
)
5630 return TARGET_32BIT
? arm_arm_address_cost (x
) : arm_thumb_address_cost (x
);
5634 arm_adjust_cost (rtx insn
, rtx link
, rtx dep
, int cost
)
5638 /* Some true dependencies can have a higher cost depending
5639 on precisely how certain input operands are used. */
5641 && REG_NOTE_KIND (link
) == 0
5642 && recog_memoized (insn
) >= 0
5643 && recog_memoized (dep
) >= 0)
5645 int shift_opnum
= get_attr_shift (insn
);
5646 enum attr_type attr_type
= get_attr_type (dep
);
5648 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5649 operand for INSN. If we have a shifted input operand and the
5650 instruction we depend on is another ALU instruction, then we may
5651 have to account for an additional stall. */
5652 if (shift_opnum
!= 0
5653 && (attr_type
== TYPE_ALU_SHIFT
|| attr_type
== TYPE_ALU_SHIFT_REG
))
5655 rtx shifted_operand
;
5658 /* Get the shifted operand. */
5659 extract_insn (insn
);
5660 shifted_operand
= recog_data
.operand
[shift_opnum
];
5662 /* Iterate over all the operands in DEP. If we write an operand
5663 that overlaps with SHIFTED_OPERAND, then we have increase the
5664 cost of this dependency. */
5666 preprocess_constraints ();
5667 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
5669 /* We can ignore strict inputs. */
5670 if (recog_data
.operand_type
[opno
] == OP_IN
)
5673 if (reg_overlap_mentioned_p (recog_data
.operand
[opno
],
5680 /* XXX This is not strictly true for the FPA. */
5681 if (REG_NOTE_KIND (link
) == REG_DEP_ANTI
5682 || REG_NOTE_KIND (link
) == REG_DEP_OUTPUT
)
5685 /* Call insns don't incur a stall, even if they follow a load. */
5686 if (REG_NOTE_KIND (link
) == 0
5687 && GET_CODE (insn
) == CALL_INSN
)
5690 if ((i_pat
= single_set (insn
)) != NULL
5691 && GET_CODE (SET_SRC (i_pat
)) == MEM
5692 && (d_pat
= single_set (dep
)) != NULL
5693 && GET_CODE (SET_DEST (d_pat
)) == MEM
)
5695 rtx src_mem
= XEXP (SET_SRC (i_pat
), 0);
5696 /* This is a load after a store, there is no conflict if the load reads
5697 from a cached area. Assume that loads from the stack, and from the
5698 constant pool are cached, and that others will miss. This is a
5701 if ((GET_CODE (src_mem
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (src_mem
))
5702 || reg_mentioned_p (stack_pointer_rtx
, src_mem
)
5703 || reg_mentioned_p (frame_pointer_rtx
, src_mem
)
5704 || reg_mentioned_p (hard_frame_pointer_rtx
, src_mem
))
5711 static int fp_consts_inited
= 0;
5713 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5714 static const char * const strings_fp
[8] =
5717 "4", "5", "0.5", "10"
5720 static REAL_VALUE_TYPE values_fp
[8];
5723 init_fp_table (void)
5729 fp_consts_inited
= 1;
5731 fp_consts_inited
= 8;
5733 for (i
= 0; i
< fp_consts_inited
; i
++)
5735 r
= REAL_VALUE_ATOF (strings_fp
[i
], DFmode
);
5740 /* Return TRUE if rtx X is a valid immediate FP constant. */
5742 arm_const_double_rtx (rtx x
)
5747 if (!fp_consts_inited
)
5750 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5751 if (REAL_VALUE_MINUS_ZERO (r
))
5754 for (i
= 0; i
< fp_consts_inited
; i
++)
5755 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
5761 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5763 neg_const_double_rtx_ok_for_fpa (rtx x
)
5768 if (!fp_consts_inited
)
5771 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5772 r
= REAL_VALUE_NEGATE (r
);
5773 if (REAL_VALUE_MINUS_ZERO (r
))
5776 for (i
= 0; i
< 8; i
++)
5777 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
5784 /* VFPv3 has a fairly wide range of representable immediates, formed from
5785 "quarter-precision" floating-point values. These can be evaluated using this
5786 formula (with ^ for exponentiation):
5790 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5791 16 <= n <= 31 and 0 <= r <= 7.
5793 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5795 - A (most-significant) is the sign bit.
5796 - BCD are the exponent (encoded as r XOR 3).
5797 - EFGH are the mantissa (encoded as n - 16).
5800 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5801 fconst[sd] instruction, or -1 if X isn't suitable. */
5803 vfp3_const_double_index (rtx x
)
5805 REAL_VALUE_TYPE r
, m
;
5807 unsigned HOST_WIDE_INT mantissa
, mant_hi
;
5808 unsigned HOST_WIDE_INT mask
;
5809 HOST_WIDE_INT m1
, m2
;
5810 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
5812 if (!TARGET_VFP3
|| GET_CODE (x
) != CONST_DOUBLE
)
5815 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
5817 /* We can't represent these things, so detect them first. */
5818 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
) || REAL_VALUE_MINUS_ZERO (r
))
5821 /* Extract sign, exponent and mantissa. */
5822 sign
= REAL_VALUE_NEGATIVE (r
) ? 1 : 0;
5823 r
= REAL_VALUE_ABS (r
);
5824 exponent
= REAL_EXP (&r
);
5825 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5826 highest (sign) bit, with a fixed binary point at bit point_pos.
5827 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5828 bits for the mantissa, this may fail (low bits would be lost). */
5829 real_ldexp (&m
, &r
, point_pos
- exponent
);
5830 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
5834 /* If there are bits set in the low part of the mantissa, we can't
5835 represent this value. */
5839 /* Now make it so that mantissa contains the most-significant bits, and move
5840 the point_pos to indicate that the least-significant bits have been
5842 point_pos
-= HOST_BITS_PER_WIDE_INT
;
5845 /* We can permit four significant bits of mantissa only, plus a high bit
5846 which is always 1. */
5847 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
5848 if ((mantissa
& mask
) != 0)
5851 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5852 mantissa
>>= point_pos
- 5;
5854 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5855 floating-point immediate zero with Neon using an integer-zero load, but
5856 that case is handled elsewhere.) */
5860 gcc_assert (mantissa
>= 16 && mantissa
<= 31);
5862 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5863 normalized significands are in the range [1, 2). (Our mantissa is shifted
5864 left 4 places at this point relative to normalized IEEE754 values). GCC
5865 internally uses [0.5, 1) (see real.c), so the exponent returned from
5866 REAL_EXP must be altered. */
5867 exponent
= 5 - exponent
;
5869 if (exponent
< 0 || exponent
> 7)
5872 /* Sign, mantissa and exponent are now in the correct form to plug into the
5873 formula described in the comment above. */
5874 return (sign
<< 7) | ((exponent
^ 3) << 4) | (mantissa
- 16);
5877 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5879 vfp3_const_double_rtx (rtx x
)
5884 return vfp3_const_double_index (x
) != -1;
5887 /* Recognize immediates which can be used in various Neon instructions. Legal
5888 immediates are described by the following table (for VMVN variants, the
5889 bitwise inverse of the constant shown is recognized. In either case, VMOV
5890 is output and the correct instruction to use for a given constant is chosen
5891 by the assembler). The constant shown is replicated across all elements of
5892 the destination vector.
5894 insn elems variant constant (binary)
5895 ---- ----- ------- -----------------
5896 vmov i32 0 00000000 00000000 00000000 abcdefgh
5897 vmov i32 1 00000000 00000000 abcdefgh 00000000
5898 vmov i32 2 00000000 abcdefgh 00000000 00000000
5899 vmov i32 3 abcdefgh 00000000 00000000 00000000
5900 vmov i16 4 00000000 abcdefgh
5901 vmov i16 5 abcdefgh 00000000
5902 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5903 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5904 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5905 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5906 vmvn i16 10 00000000 abcdefgh
5907 vmvn i16 11 abcdefgh 00000000
5908 vmov i32 12 00000000 00000000 abcdefgh 11111111
5909 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5910 vmov i32 14 00000000 abcdefgh 11111111 11111111
5911 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5913 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5914 eeeeeeee ffffffff gggggggg hhhhhhhh
5915 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
5917 For case 18, B = !b. Representable values are exactly those accepted by
5918 vfp3_const_double_index, but are output as floating-point numbers rather
5921 Variants 0-5 (inclusive) may also be used as immediates for the second
5922 operand of VORR/VBIC instructions.
5924 The INVERSE argument causes the bitwise inverse of the given operand to be
5925 recognized instead (used for recognizing legal immediates for the VAND/VORN
5926 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
5927 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
5928 output, rather than the real insns vbic/vorr).
5930 INVERSE makes no difference to the recognition of float vectors.
5932 The return value is the variant of immediate as shown in the above table, or
5933 -1 if the given value doesn't match any of the listed patterns.
5936 neon_valid_immediate (rtx op
, enum machine_mode mode
, int inverse
,
5937 rtx
*modconst
, int *elementwidth
)
5939 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
5941 for (i = 0; i < idx; i += (STRIDE)) \
5946 immtype = (CLASS); \
5947 elsize = (ELSIZE); \
5951 unsigned int i
, elsize
, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
5952 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
5953 unsigned char bytes
[16];
5954 int immtype
= -1, matches
;
5955 unsigned int invmask
= inverse
? 0xff : 0;
5957 /* Vectors of float constants. */
5958 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
5960 rtx el0
= CONST_VECTOR_ELT (op
, 0);
5963 if (!vfp3_const_double_rtx (el0
))
5966 REAL_VALUE_FROM_CONST_DOUBLE (r0
, el0
);
5968 for (i
= 1; i
< n_elts
; i
++)
5970 rtx elt
= CONST_VECTOR_ELT (op
, i
);
5973 REAL_VALUE_FROM_CONST_DOUBLE (re
, elt
);
5975 if (!REAL_VALUES_EQUAL (r0
, re
))
5980 *modconst
= CONST_VECTOR_ELT (op
, 0);
5988 /* Splat vector constant out into a byte vector. */
5989 for (i
= 0; i
< n_elts
; i
++)
5991 rtx el
= CONST_VECTOR_ELT (op
, i
);
5992 unsigned HOST_WIDE_INT elpart
;
5993 unsigned int part
, parts
;
5995 if (GET_CODE (el
) == CONST_INT
)
5997 elpart
= INTVAL (el
);
6000 else if (GET_CODE (el
) == CONST_DOUBLE
)
6002 elpart
= CONST_DOUBLE_LOW (el
);
6008 for (part
= 0; part
< parts
; part
++)
6011 for (byte
= 0; byte
< innersize
; byte
++)
6013 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
6014 elpart
>>= BITS_PER_UNIT
;
6016 if (GET_CODE (el
) == CONST_DOUBLE
)
6017 elpart
= CONST_DOUBLE_HIGH (el
);
6022 gcc_assert (idx
== GET_MODE_SIZE (mode
));
6026 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
6027 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
6029 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6030 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
6032 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6033 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
6035 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6036 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3]);
6038 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0);
6040 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]);
6042 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
6043 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
6045 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6046 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
6048 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6049 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
6051 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6052 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3]);
6054 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff);
6056 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]);
6058 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6059 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0);
6061 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6062 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff);
6064 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6065 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0);
6067 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6068 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff);
6070 CHECK (1, 8, 16, bytes
[i
] == bytes
[0]);
6072 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
6073 && bytes
[i
] == bytes
[(i
+ 8) % idx
]);
6081 *elementwidth
= elsize
;
6085 unsigned HOST_WIDE_INT imm
= 0;
6087 /* Un-invert bytes of recognized vector, if necessary. */
6089 for (i
= 0; i
< idx
; i
++)
6090 bytes
[i
] ^= invmask
;
6094 /* FIXME: Broken on 32-bit H_W_I hosts. */
6095 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
6097 for (i
= 0; i
< 8; i
++)
6098 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
6099 << (i
* BITS_PER_UNIT
);
6101 *modconst
= GEN_INT (imm
);
6105 unsigned HOST_WIDE_INT imm
= 0;
6107 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
6108 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
6110 *modconst
= GEN_INT (imm
);
6118 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6119 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6120 float elements), and a modified constant (whatever should be output for a
6121 VMOV) in *MODCONST. */
6124 neon_immediate_valid_for_move (rtx op
, enum machine_mode mode
,
6125 rtx
*modconst
, int *elementwidth
)
6129 int retval
= neon_valid_immediate (op
, mode
, 0, &tmpconst
, &tmpwidth
);
6135 *modconst
= tmpconst
;
6138 *elementwidth
= tmpwidth
;
6143 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6144 the immediate is valid, write a constant suitable for using as an operand
6145 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6146 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6149 neon_immediate_valid_for_logic (rtx op
, enum machine_mode mode
, int inverse
,
6150 rtx
*modconst
, int *elementwidth
)
6154 int retval
= neon_valid_immediate (op
, mode
, inverse
, &tmpconst
, &tmpwidth
);
6156 if (retval
< 0 || retval
> 5)
6160 *modconst
= tmpconst
;
6163 *elementwidth
= tmpwidth
;
6168 /* Return a string suitable for output of Neon immediate logic operation
6172 neon_output_logic_immediate (const char *mnem
, rtx
*op2
, enum machine_mode mode
,
6173 int inverse
, int quad
)
6175 int width
, is_valid
;
6176 static char templ
[40];
6178 is_valid
= neon_immediate_valid_for_logic (*op2
, mode
, inverse
, op2
, &width
);
6180 gcc_assert (is_valid
!= 0);
6183 sprintf (templ
, "%s.i%d\t%%q0, %%2", mnem
, width
);
6185 sprintf (templ
, "%s.i%d\t%%P0, %%2", mnem
, width
);
6190 /* Output a sequence of pairwise operations to implement a reduction.
6191 NOTE: We do "too much work" here, because pairwise operations work on two
6192 registers-worth of operands in one go. Unfortunately we can't exploit those
6193 extra calculations to do the full operation in fewer steps, I don't think.
6194 Although all vector elements of the result but the first are ignored, we
6195 actually calculate the same result in each of the elements. An alternative
6196 such as initially loading a vector with zero to use as each of the second
6197 operands would use up an additional register and take an extra instruction,
6198 for no particular gain. */
6201 neon_pairwise_reduce (rtx op0
, rtx op1
, enum machine_mode mode
,
6202 rtx (*reduc
) (rtx
, rtx
, rtx
))
6204 enum machine_mode inner
= GET_MODE_INNER (mode
);
6205 unsigned int i
, parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (inner
);
6208 for (i
= parts
/ 2; i
>= 1; i
/= 2)
6210 rtx dest
= (i
== 1) ? op0
: gen_reg_rtx (mode
);
6211 emit_insn (reduc (dest
, tmpsum
, tmpsum
));
6216 /* Initialize a vector with non-constant elements. FIXME: We can do better
6217 than the current implementation (building a vector on the stack and then
6218 loading it) in many cases. See rs6000.c. */
6221 neon_expand_vector_init (rtx target
, rtx vals
)
6223 enum machine_mode mode
= GET_MODE (target
);
6224 enum machine_mode inner
= GET_MODE_INNER (mode
);
6225 unsigned int i
, n_elts
= GET_MODE_NUNITS (mode
);
6228 gcc_assert (VECTOR_MODE_P (mode
));
6230 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
), 0);
6231 for (i
= 0; i
< n_elts
; i
++)
6232 emit_move_insn (adjust_address_nv (mem
, inner
, i
* GET_MODE_SIZE (inner
)),
6233 XVECEXP (vals
, 0, i
));
6235 emit_move_insn (target
, mem
);
6238 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6239 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6240 reported source locations are bogus. */
6243 bounds_check (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
6248 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6250 lane
= INTVAL (operand
);
6252 if (lane
< low
|| lane
>= high
)
6256 /* Bounds-check lanes. */
6259 neon_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6261 bounds_check (operand
, low
, high
, "lane out of range");
6264 /* Bounds-check constants. */
6267 neon_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6269 bounds_check (operand
, low
, high
, "constant out of range");
6273 neon_element_bits (enum machine_mode mode
)
6276 return GET_MODE_BITSIZE (mode
);
6278 return GET_MODE_BITSIZE (GET_MODE_INNER (mode
));
6282 /* Predicates for `match_operand' and `match_operator'. */
6284 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6286 cirrus_memory_offset (rtx op
)
6288 /* Reject eliminable registers. */
6289 if (! (reload_in_progress
|| reload_completed
)
6290 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
6291 || reg_mentioned_p (arg_pointer_rtx
, op
)
6292 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
6293 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
6294 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
6295 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
6298 if (GET_CODE (op
) == MEM
)
6304 /* Match: (mem (reg)). */
6305 if (GET_CODE (ind
) == REG
)
6311 if (GET_CODE (ind
) == PLUS
6312 && GET_CODE (XEXP (ind
, 0)) == REG
6313 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
6314 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
)
6321 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6322 WB is true if full writeback address modes are allowed and is false
6323 if limited writeback address modes (POST_INC and PRE_DEC) are
6327 arm_coproc_mem_operand (rtx op
, bool wb
)
6331 /* Reject eliminable registers. */
6332 if (! (reload_in_progress
|| reload_completed
)
6333 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
6334 || reg_mentioned_p (arg_pointer_rtx
, op
)
6335 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
6336 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
6337 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
6338 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
6341 /* Constants are converted into offsets from labels. */
6342 if (GET_CODE (op
) != MEM
)
6347 if (reload_completed
6348 && (GET_CODE (ind
) == LABEL_REF
6349 || (GET_CODE (ind
) == CONST
6350 && GET_CODE (XEXP (ind
, 0)) == PLUS
6351 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
6352 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
6355 /* Match: (mem (reg)). */
6356 if (GET_CODE (ind
) == REG
)
6357 return arm_address_register_rtx_p (ind
, 0);
6359 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6360 acceptable in any case (subject to verification by
6361 arm_address_register_rtx_p). We need WB to be true to accept
6362 PRE_INC and POST_DEC. */
6363 if (GET_CODE (ind
) == POST_INC
6364 || GET_CODE (ind
) == PRE_DEC
6366 && (GET_CODE (ind
) == PRE_INC
6367 || GET_CODE (ind
) == POST_DEC
)))
6368 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
6371 && (GET_CODE (ind
) == POST_MODIFY
|| GET_CODE (ind
) == PRE_MODIFY
)
6372 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
6373 && GET_CODE (XEXP (ind
, 1)) == PLUS
6374 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
6375 ind
= XEXP (ind
, 1);
6380 if (GET_CODE (ind
) == PLUS
6381 && GET_CODE (XEXP (ind
, 0)) == REG
6382 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
6383 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
6384 && INTVAL (XEXP (ind
, 1)) > -1024
6385 && INTVAL (XEXP (ind
, 1)) < 1024
6386 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
6392 /* Return TRUE if OP is a memory operand which we can load or store a vector
6393 to/from. If CORE is true, we're moving from ARM registers not Neon
6396 neon_vector_mem_operand (rtx op
, bool core
)
6400 /* Reject eliminable registers. */
6401 if (! (reload_in_progress
|| reload_completed
)
6402 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
6403 || reg_mentioned_p (arg_pointer_rtx
, op
)
6404 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
6405 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
6406 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
6407 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
6410 /* Constants are converted into offsets from labels. */
6411 if (GET_CODE (op
) != MEM
)
6416 if (reload_completed
6417 && (GET_CODE (ind
) == LABEL_REF
6418 || (GET_CODE (ind
) == CONST
6419 && GET_CODE (XEXP (ind
, 0)) == PLUS
6420 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
6421 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
6424 /* Match: (mem (reg)). */
6425 if (GET_CODE (ind
) == REG
)
6426 return arm_address_register_rtx_p (ind
, 0);
6428 /* Allow post-increment with Neon registers. */
6429 if (!core
&& GET_CODE (ind
) == POST_INC
)
6430 return arm_address_register_rtx_p (XEXP (ind
, 0), 0);
6433 /* FIXME: We can support this too if we use VLD1/VST1. */
6435 && GET_CODE (ind
) == POST_MODIFY
6436 && arm_address_register_rtx_p (XEXP (ind
, 0), 0)
6437 && GET_CODE (XEXP (ind
, 1)) == PLUS
6438 && rtx_equal_p (XEXP (XEXP (ind
, 1), 0), XEXP (ind
, 0)))
6439 ind
= XEXP (ind
, 1);
6446 && GET_CODE (ind
) == PLUS
6447 && GET_CODE (XEXP (ind
, 0)) == REG
6448 && REG_MODE_OK_FOR_BASE_P (XEXP (ind
, 0), VOIDmode
)
6449 && GET_CODE (XEXP (ind
, 1)) == CONST_INT
6450 && INTVAL (XEXP (ind
, 1)) > -1024
6451 && INTVAL (XEXP (ind
, 1)) < 1016
6452 && (INTVAL (XEXP (ind
, 1)) & 3) == 0)
6458 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6461 neon_struct_mem_operand (rtx op
)
6465 /* Reject eliminable registers. */
6466 if (! (reload_in_progress
|| reload_completed
)
6467 && ( reg_mentioned_p (frame_pointer_rtx
, op
)
6468 || reg_mentioned_p (arg_pointer_rtx
, op
)
6469 || reg_mentioned_p (virtual_incoming_args_rtx
, op
)
6470 || reg_mentioned_p (virtual_outgoing_args_rtx
, op
)
6471 || reg_mentioned_p (virtual_stack_dynamic_rtx
, op
)
6472 || reg_mentioned_p (virtual_stack_vars_rtx
, op
)))
6475 /* Constants are converted into offsets from labels. */
6476 if (GET_CODE (op
) != MEM
)
6481 if (reload_completed
6482 && (GET_CODE (ind
) == LABEL_REF
6483 || (GET_CODE (ind
) == CONST
6484 && GET_CODE (XEXP (ind
, 0)) == PLUS
6485 && GET_CODE (XEXP (XEXP (ind
, 0), 0)) == LABEL_REF
6486 && GET_CODE (XEXP (XEXP (ind
, 0), 1)) == CONST_INT
)))
6489 /* Match: (mem (reg)). */
6490 if (GET_CODE (ind
) == REG
)
6491 return arm_address_register_rtx_p (ind
, 0);
6496 /* Return true if X is a register that will be eliminated later on. */
6498 arm_eliminable_register (rtx x
)
6500 return REG_P (x
) && (REGNO (x
) == FRAME_POINTER_REGNUM
6501 || REGNO (x
) == ARG_POINTER_REGNUM
6502 || (REGNO (x
) >= FIRST_VIRTUAL_REGISTER
6503 && REGNO (x
) <= LAST_VIRTUAL_REGISTER
));
6506 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6507 coprocessor registers. Otherwise return NO_REGS. */
6510 coproc_secondary_reload_class (enum machine_mode mode
, rtx x
, bool wb
)
6513 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6514 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6515 && neon_vector_mem_operand (x
, FALSE
))
6518 if (arm_coproc_mem_operand (x
, wb
) || s_register_operand (x
, mode
))
6521 return GENERAL_REGS
;
6524 /* Values which must be returned in the most-significant end of the return
6528 arm_return_in_msb (const_tree valtype
)
6530 return (TARGET_AAPCS_BASED
6532 && (AGGREGATE_TYPE_P (valtype
)
6533 || TREE_CODE (valtype
) == COMPLEX_TYPE
));
6536 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6537 Use by the Cirrus Maverick code which has to workaround
6538 a hardware bug triggered by such instructions. */
6540 arm_memory_load_p (rtx insn
)
6542 rtx body
, lhs
, rhs
;;
6544 if (insn
== NULL_RTX
|| GET_CODE (insn
) != INSN
)
6547 body
= PATTERN (insn
);
6549 if (GET_CODE (body
) != SET
)
6552 lhs
= XEXP (body
, 0);
6553 rhs
= XEXP (body
, 1);
6555 lhs
= REG_OR_SUBREG_RTX (lhs
);
6557 /* If the destination is not a general purpose
6558 register we do not have to worry. */
6559 if (GET_CODE (lhs
) != REG
6560 || REGNO_REG_CLASS (REGNO (lhs
)) != GENERAL_REGS
)
6563 /* As well as loads from memory we also have to react
6564 to loads of invalid constants which will be turned
6565 into loads from the minipool. */
6566 return (GET_CODE (rhs
) == MEM
6567 || GET_CODE (rhs
) == SYMBOL_REF
6568 || note_invalid_constants (insn
, -1, false));
6571 /* Return TRUE if INSN is a Cirrus instruction. */
6573 arm_cirrus_insn_p (rtx insn
)
6575 enum attr_cirrus attr
;
6577 /* get_attr cannot accept USE or CLOBBER. */
6579 || GET_CODE (insn
) != INSN
6580 || GET_CODE (PATTERN (insn
)) == USE
6581 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
6584 attr
= get_attr_cirrus (insn
);
6586 return attr
!= CIRRUS_NOT
;
6589 /* Cirrus reorg for invalid instruction combinations. */
6591 cirrus_reorg (rtx first
)
6593 enum attr_cirrus attr
;
6594 rtx body
= PATTERN (first
);
6598 /* Any branch must be followed by 2 non Cirrus instructions. */
6599 if (GET_CODE (first
) == JUMP_INSN
&& GET_CODE (body
) != RETURN
)
6602 t
= next_nonnote_insn (first
);
6604 if (arm_cirrus_insn_p (t
))
6607 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
6611 emit_insn_after (gen_nop (), first
);
6616 /* (float (blah)) is in parallel with a clobber. */
6617 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
6618 body
= XVECEXP (body
, 0, 0);
6620 if (GET_CODE (body
) == SET
)
6622 rtx lhs
= XEXP (body
, 0), rhs
= XEXP (body
, 1);
6624 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6625 be followed by a non Cirrus insn. */
6626 if (get_attr_cirrus (first
) == CIRRUS_DOUBLE
)
6628 if (arm_cirrus_insn_p (next_nonnote_insn (first
)))
6629 emit_insn_after (gen_nop (), first
);
6633 else if (arm_memory_load_p (first
))
6635 unsigned int arm_regno
;
6637 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6638 ldr/cfmv64hr combination where the Rd field is the same
6639 in both instructions must be split with a non Cirrus
6646 /* Get Arm register number for ldr insn. */
6647 if (GET_CODE (lhs
) == REG
)
6648 arm_regno
= REGNO (lhs
);
6651 gcc_assert (GET_CODE (rhs
) == REG
);
6652 arm_regno
= REGNO (rhs
);
6656 first
= next_nonnote_insn (first
);
6658 if (! arm_cirrus_insn_p (first
))
6661 body
= PATTERN (first
);
6663 /* (float (blah)) is in parallel with a clobber. */
6664 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0))
6665 body
= XVECEXP (body
, 0, 0);
6667 if (GET_CODE (body
) == FLOAT
)
6668 body
= XEXP (body
, 0);
6670 if (get_attr_cirrus (first
) == CIRRUS_MOVE
6671 && GET_CODE (XEXP (body
, 1)) == REG
6672 && arm_regno
== REGNO (XEXP (body
, 1)))
6673 emit_insn_after (gen_nop (), first
);
6679 /* get_attr cannot accept USE or CLOBBER. */
6681 || GET_CODE (first
) != INSN
6682 || GET_CODE (PATTERN (first
)) == USE
6683 || GET_CODE (PATTERN (first
)) == CLOBBER
)
6686 attr
= get_attr_cirrus (first
);
6688 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6689 must be followed by a non-coprocessor instruction. */
6690 if (attr
== CIRRUS_COMPARE
)
6694 t
= next_nonnote_insn (first
);
6696 if (arm_cirrus_insn_p (t
))
6699 if (arm_cirrus_insn_p (next_nonnote_insn (t
)))
6703 emit_insn_after (gen_nop (), first
);
6709 /* Return TRUE if X references a SYMBOL_REF. */
6711 symbol_mentioned_p (rtx x
)
6716 if (GET_CODE (x
) == SYMBOL_REF
)
6719 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6720 are constant offsets, not symbols. */
6721 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
6724 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
6726 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
6732 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
6733 if (symbol_mentioned_p (XVECEXP (x
, i
, j
)))
6736 else if (fmt
[i
] == 'e' && symbol_mentioned_p (XEXP (x
, i
)))
6743 /* Return TRUE if X references a LABEL_REF. */
6745 label_mentioned_p (rtx x
)
6750 if (GET_CODE (x
) == LABEL_REF
)
6753 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6754 instruction, but they are constant offsets, not symbols. */
6755 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
6758 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
6759 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
6765 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
6766 if (label_mentioned_p (XVECEXP (x
, i
, j
)))
6769 else if (fmt
[i
] == 'e' && label_mentioned_p (XEXP (x
, i
)))
6777 tls_mentioned_p (rtx x
)
6779 switch (GET_CODE (x
))
6782 return tls_mentioned_p (XEXP (x
, 0));
6785 if (XINT (x
, 1) == UNSPEC_TLS
)
6793 /* Must not copy a SET whose source operand is PC-relative. */
6796 arm_cannot_copy_insn_p (rtx insn
)
6798 rtx pat
= PATTERN (insn
);
6800 if (GET_CODE (pat
) == SET
)
6802 rtx rhs
= SET_SRC (pat
);
6804 if (GET_CODE (rhs
) == UNSPEC
6805 && XINT (rhs
, 1) == UNSPEC_PIC_BASE
)
6808 if (GET_CODE (rhs
) == MEM
6809 && GET_CODE (XEXP (rhs
, 0)) == UNSPEC
6810 && XINT (XEXP (rhs
, 0), 1) == UNSPEC_PIC_BASE
)
6820 enum rtx_code code
= GET_CODE (x
);
6837 /* Return 1 if memory locations are adjacent. */
6839 adjacent_mem_locations (rtx a
, rtx b
)
6841 /* We don't guarantee to preserve the order of these memory refs. */
6842 if (volatile_refs_p (a
) || volatile_refs_p (b
))
6845 if ((GET_CODE (XEXP (a
, 0)) == REG
6846 || (GET_CODE (XEXP (a
, 0)) == PLUS
6847 && GET_CODE (XEXP (XEXP (a
, 0), 1)) == CONST_INT
))
6848 && (GET_CODE (XEXP (b
, 0)) == REG
6849 || (GET_CODE (XEXP (b
, 0)) == PLUS
6850 && GET_CODE (XEXP (XEXP (b
, 0), 1)) == CONST_INT
)))
6852 HOST_WIDE_INT val0
= 0, val1
= 0;
6856 if (GET_CODE (XEXP (a
, 0)) == PLUS
)
6858 reg0
= XEXP (XEXP (a
, 0), 0);
6859 val0
= INTVAL (XEXP (XEXP (a
, 0), 1));
6864 if (GET_CODE (XEXP (b
, 0)) == PLUS
)
6866 reg1
= XEXP (XEXP (b
, 0), 0);
6867 val1
= INTVAL (XEXP (XEXP (b
, 0), 1));
6872 /* Don't accept any offset that will require multiple
6873 instructions to handle, since this would cause the
6874 arith_adjacentmem pattern to output an overlong sequence. */
6875 if (!const_ok_for_op (PLUS
, val0
) || !const_ok_for_op (PLUS
, val1
))
6878 /* Don't allow an eliminable register: register elimination can make
6879 the offset too large. */
6880 if (arm_eliminable_register (reg0
))
6883 val_diff
= val1
- val0
;
6887 /* If the target has load delay slots, then there's no benefit
6888 to using an ldm instruction unless the offset is zero and
6889 we are optimizing for size. */
6890 return (optimize_size
&& (REGNO (reg0
) == REGNO (reg1
))
6891 && (val0
== 0 || val1
== 0 || val0
== 4 || val1
== 4)
6892 && (val_diff
== 4 || val_diff
== -4));
6895 return ((REGNO (reg0
) == REGNO (reg1
))
6896 && (val_diff
== 4 || val_diff
== -4));
6903 load_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *base
,
6904 HOST_WIDE_INT
*load_offset
)
6906 int unsorted_regs
[4];
6907 HOST_WIDE_INT unsorted_offsets
[4];
6912 /* Can only handle 2, 3, or 4 insns at present,
6913 though could be easily extended if required. */
6914 gcc_assert (nops
>= 2 && nops
<= 4);
6916 /* Loop over the operands and check that the memory references are
6917 suitable (i.e. immediate offsets from the same base register). At
6918 the same time, extract the target register, and the memory
6920 for (i
= 0; i
< nops
; i
++)
6925 /* Convert a subreg of a mem into the mem itself. */
6926 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
6927 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
6929 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
6931 /* Don't reorder volatile memory references; it doesn't seem worth
6932 looking for the case where the order is ok anyway. */
6933 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
6936 offset
= const0_rtx
;
6938 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
6939 || (GET_CODE (reg
) == SUBREG
6940 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
6941 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
6942 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
6944 || (GET_CODE (reg
) == SUBREG
6945 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
6946 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
6951 base_reg
= REGNO (reg
);
6952 unsorted_regs
[0] = (GET_CODE (operands
[i
]) == REG
6953 ? REGNO (operands
[i
])
6954 : REGNO (SUBREG_REG (operands
[i
])));
6959 if (base_reg
!= (int) REGNO (reg
))
6960 /* Not addressed from the same base register. */
6963 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
6964 ? REGNO (operands
[i
])
6965 : REGNO (SUBREG_REG (operands
[i
])));
6966 if (unsorted_regs
[i
] < unsorted_regs
[order
[0]])
6970 /* If it isn't an integer register, or if it overwrites the
6971 base register but isn't the last insn in the list, then
6972 we can't do this. */
6973 if (unsorted_regs
[i
] < 0 || unsorted_regs
[i
] > 14
6974 || (i
!= nops
- 1 && unsorted_regs
[i
] == base_reg
))
6977 unsorted_offsets
[i
] = INTVAL (offset
);
6980 /* Not a suitable memory address. */
6984 /* All the useful information has now been extracted from the
6985 operands into unsorted_regs and unsorted_offsets; additionally,
6986 order[0] has been set to the lowest numbered register in the
6987 list. Sort the registers into order, and check that the memory
6988 offsets are ascending and adjacent. */
6990 for (i
= 1; i
< nops
; i
++)
6994 order
[i
] = order
[i
- 1];
6995 for (j
= 0; j
< nops
; j
++)
6996 if (unsorted_regs
[j
] > unsorted_regs
[order
[i
- 1]]
6997 && (order
[i
] == order
[i
- 1]
6998 || unsorted_regs
[j
] < unsorted_regs
[order
[i
]]))
7001 /* Have we found a suitable register? if not, one must be used more
7003 if (order
[i
] == order
[i
- 1])
7006 /* Is the memory address adjacent and ascending? */
7007 if (unsorted_offsets
[order
[i
]] != unsorted_offsets
[order
[i
- 1]] + 4)
7015 for (i
= 0; i
< nops
; i
++)
7016 regs
[i
] = unsorted_regs
[order
[i
]];
7018 *load_offset
= unsorted_offsets
[order
[0]];
7021 if (unsorted_offsets
[order
[0]] == 0)
7022 return 1; /* ldmia */
7024 if (TARGET_ARM
&& unsorted_offsets
[order
[0]] == 4)
7025 return 2; /* ldmib */
7027 if (TARGET_ARM
&& unsorted_offsets
[order
[nops
- 1]] == 0)
7028 return 3; /* ldmda */
7030 if (unsorted_offsets
[order
[nops
- 1]] == -4)
7031 return 4; /* ldmdb */
7033 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7034 if the offset isn't small enough. The reason 2 ldrs are faster
7035 is because these ARMs are able to do more than one cache access
7036 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7037 whilst the ARM8 has a double bandwidth cache. This means that
7038 these cores can do both an instruction fetch and a data fetch in
7039 a single cycle, so the trick of calculating the address into a
7040 scratch register (one of the result regs) and then doing a load
7041 multiple actually becomes slower (and no smaller in code size).
7042 That is the transformation
7044 ldr rd1, [rbase + offset]
7045 ldr rd2, [rbase + offset + 4]
7049 add rd1, rbase, offset
7050 ldmia rd1, {rd1, rd2}
7052 produces worse code -- '3 cycles + any stalls on rd2' instead of
7053 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7054 access per cycle, the first sequence could never complete in less
7055 than 6 cycles, whereas the ldm sequence would only take 5 and
7056 would make better use of sequential accesses if not hitting the
7059 We cheat here and test 'arm_ld_sched' which we currently know to
7060 only be true for the ARM8, ARM9 and StrongARM. If this ever
7061 changes, then the test below needs to be reworked. */
7062 if (nops
== 2 && arm_ld_sched
)
7065 /* Can't do it without setting up the offset, only do this if it takes
7066 no more than one insn. */
7067 return (const_ok_for_arm (unsorted_offsets
[order
[0]])
7068 || const_ok_for_arm (-unsorted_offsets
[order
[0]])) ? 5 : 0;
7072 emit_ldm_seq (rtx
*operands
, int nops
)
7076 HOST_WIDE_INT offset
;
7080 switch (load_multiple_sequence (operands
, nops
, regs
, &base_reg
, &offset
))
7083 strcpy (buf
, "ldm%(ia%)\t");
7087 strcpy (buf
, "ldm%(ib%)\t");
7091 strcpy (buf
, "ldm%(da%)\t");
7095 strcpy (buf
, "ldm%(db%)\t");
7100 sprintf (buf
, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX
,
7101 reg_names
[regs
[0]], REGISTER_PREFIX
, reg_names
[base_reg
],
7104 sprintf (buf
, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX
,
7105 reg_names
[regs
[0]], REGISTER_PREFIX
, reg_names
[base_reg
],
7107 output_asm_insn (buf
, operands
);
7109 strcpy (buf
, "ldm%(ia%)\t");
7116 sprintf (buf
+ strlen (buf
), "%s%s, {%s%s", REGISTER_PREFIX
,
7117 reg_names
[base_reg
], REGISTER_PREFIX
, reg_names
[regs
[0]]);
7119 for (i
= 1; i
< nops
; i
++)
7120 sprintf (buf
+ strlen (buf
), ", %s%s", REGISTER_PREFIX
,
7121 reg_names
[regs
[i
]]);
7123 strcat (buf
, "}\t%@ phole ldm");
7125 output_asm_insn (buf
, operands
);
7130 store_multiple_sequence (rtx
*operands
, int nops
, int *regs
, int *base
,
7131 HOST_WIDE_INT
* load_offset
)
7133 int unsorted_regs
[4];
7134 HOST_WIDE_INT unsorted_offsets
[4];
7139 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7140 extended if required. */
7141 gcc_assert (nops
>= 2 && nops
<= 4);
7143 /* Loop over the operands and check that the memory references are
7144 suitable (i.e. immediate offsets from the same base register). At
7145 the same time, extract the target register, and the memory
7147 for (i
= 0; i
< nops
; i
++)
7152 /* Convert a subreg of a mem into the mem itself. */
7153 if (GET_CODE (operands
[nops
+ i
]) == SUBREG
)
7154 operands
[nops
+ i
] = alter_subreg (operands
+ (nops
+ i
));
7156 gcc_assert (GET_CODE (operands
[nops
+ i
]) == MEM
);
7158 /* Don't reorder volatile memory references; it doesn't seem worth
7159 looking for the case where the order is ok anyway. */
7160 if (MEM_VOLATILE_P (operands
[nops
+ i
]))
7163 offset
= const0_rtx
;
7165 if ((GET_CODE (reg
= XEXP (operands
[nops
+ i
], 0)) == REG
7166 || (GET_CODE (reg
) == SUBREG
7167 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
7168 || (GET_CODE (XEXP (operands
[nops
+ i
], 0)) == PLUS
7169 && ((GET_CODE (reg
= XEXP (XEXP (operands
[nops
+ i
], 0), 0))
7171 || (GET_CODE (reg
) == SUBREG
7172 && GET_CODE (reg
= SUBREG_REG (reg
)) == REG
))
7173 && (GET_CODE (offset
= XEXP (XEXP (operands
[nops
+ i
], 0), 1))
7178 base_reg
= REGNO (reg
);
7179 unsorted_regs
[0] = (GET_CODE (operands
[i
]) == REG
7180 ? REGNO (operands
[i
])
7181 : REGNO (SUBREG_REG (operands
[i
])));
7186 if (base_reg
!= (int) REGNO (reg
))
7187 /* Not addressed from the same base register. */
7190 unsorted_regs
[i
] = (GET_CODE (operands
[i
]) == REG
7191 ? REGNO (operands
[i
])
7192 : REGNO (SUBREG_REG (operands
[i
])));
7193 if (unsorted_regs
[i
] < unsorted_regs
[order
[0]])
7197 /* If it isn't an integer register, then we can't do this. */
7198 if (unsorted_regs
[i
] < 0 || unsorted_regs
[i
] > 14)
7201 unsorted_offsets
[i
] = INTVAL (offset
);
7204 /* Not a suitable memory address. */
7208 /* All the useful information has now been extracted from the
7209 operands into unsorted_regs and unsorted_offsets; additionally,
7210 order[0] has been set to the lowest numbered register in the
7211 list. Sort the registers into order, and check that the memory
7212 offsets are ascending and adjacent. */
7214 for (i
= 1; i
< nops
; i
++)
7218 order
[i
] = order
[i
- 1];
7219 for (j
= 0; j
< nops
; j
++)
7220 if (unsorted_regs
[j
] > unsorted_regs
[order
[i
- 1]]
7221 && (order
[i
] == order
[i
- 1]
7222 || unsorted_regs
[j
] < unsorted_regs
[order
[i
]]))
7225 /* Have we found a suitable register? if not, one must be used more
7227 if (order
[i
] == order
[i
- 1])
7230 /* Is the memory address adjacent and ascending? */
7231 if (unsorted_offsets
[order
[i
]] != unsorted_offsets
[order
[i
- 1]] + 4)
7239 for (i
= 0; i
< nops
; i
++)
7240 regs
[i
] = unsorted_regs
[order
[i
]];
7242 *load_offset
= unsorted_offsets
[order
[0]];
7245 if (unsorted_offsets
[order
[0]] == 0)
7246 return 1; /* stmia */
7248 if (unsorted_offsets
[order
[0]] == 4)
7249 return 2; /* stmib */
7251 if (unsorted_offsets
[order
[nops
- 1]] == 0)
7252 return 3; /* stmda */
7254 if (unsorted_offsets
[order
[nops
- 1]] == -4)
7255 return 4; /* stmdb */
7261 emit_stm_seq (rtx
*operands
, int nops
)
7265 HOST_WIDE_INT offset
;
7269 switch (store_multiple_sequence (operands
, nops
, regs
, &base_reg
, &offset
))
7272 strcpy (buf
, "stm%(ia%)\t");
7276 strcpy (buf
, "stm%(ib%)\t");
7280 strcpy (buf
, "stm%(da%)\t");
7284 strcpy (buf
, "stm%(db%)\t");
7291 sprintf (buf
+ strlen (buf
), "%s%s, {%s%s", REGISTER_PREFIX
,
7292 reg_names
[base_reg
], REGISTER_PREFIX
, reg_names
[regs
[0]]);
7294 for (i
= 1; i
< nops
; i
++)
7295 sprintf (buf
+ strlen (buf
), ", %s%s", REGISTER_PREFIX
,
7296 reg_names
[regs
[i
]]);
7298 strcat (buf
, "}\t%@ phole stm");
7300 output_asm_insn (buf
, operands
);
7304 /* Routines for use in generating RTL. */
7307 arm_gen_load_multiple (int base_regno
, int count
, rtx from
, int up
,
7308 int write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
7310 HOST_WIDE_INT offset
= *offsetp
;
7313 int sign
= up
? 1 : -1;
7316 /* XScale has load-store double instructions, but they have stricter
7317 alignment requirements than load-store multiple, so we cannot
7320 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7321 the pipeline until completion.
7329 An ldr instruction takes 1-3 cycles, but does not block the
7338 Best case ldr will always win. However, the more ldr instructions
7339 we issue, the less likely we are to be able to schedule them well.
7340 Using ldr instructions also increases code size.
7342 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7343 for counts of 3 or 4 regs. */
7344 if (arm_tune_xscale
&& count
<= 2 && ! optimize_size
)
7350 for (i
= 0; i
< count
; i
++)
7352 addr
= plus_constant (from
, i
* 4 * sign
);
7353 mem
= adjust_automodify_address (basemem
, SImode
, addr
, offset
);
7354 emit_move_insn (gen_rtx_REG (SImode
, base_regno
+ i
), mem
);
7360 emit_move_insn (from
, plus_constant (from
, count
* 4 * sign
));
7370 result
= gen_rtx_PARALLEL (VOIDmode
,
7371 rtvec_alloc (count
+ (write_back
? 1 : 0)));
7374 XVECEXP (result
, 0, 0)
7375 = gen_rtx_SET (VOIDmode
, from
, plus_constant (from
, count
* 4 * sign
));
7380 for (j
= 0; i
< count
; i
++, j
++)
7382 addr
= plus_constant (from
, j
* 4 * sign
);
7383 mem
= adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
7384 XVECEXP (result
, 0, i
)
7385 = gen_rtx_SET (VOIDmode
, gen_rtx_REG (SImode
, base_regno
+ j
), mem
);
7396 arm_gen_store_multiple (int base_regno
, int count
, rtx to
, int up
,
7397 int write_back
, rtx basemem
, HOST_WIDE_INT
*offsetp
)
7399 HOST_WIDE_INT offset
= *offsetp
;
7402 int sign
= up
? 1 : -1;
7405 /* See arm_gen_load_multiple for discussion of
7406 the pros/cons of ldm/stm usage for XScale. */
7407 if (arm_tune_xscale
&& count
<= 2 && ! optimize_size
)
7413 for (i
= 0; i
< count
; i
++)
7415 addr
= plus_constant (to
, i
* 4 * sign
);
7416 mem
= adjust_automodify_address (basemem
, SImode
, addr
, offset
);
7417 emit_move_insn (mem
, gen_rtx_REG (SImode
, base_regno
+ i
));
7423 emit_move_insn (to
, plus_constant (to
, count
* 4 * sign
));
7433 result
= gen_rtx_PARALLEL (VOIDmode
,
7434 rtvec_alloc (count
+ (write_back
? 1 : 0)));
7437 XVECEXP (result
, 0, 0)
7438 = gen_rtx_SET (VOIDmode
, to
,
7439 plus_constant (to
, count
* 4 * sign
));
7444 for (j
= 0; i
< count
; i
++, j
++)
7446 addr
= plus_constant (to
, j
* 4 * sign
);
7447 mem
= adjust_automodify_address_nv (basemem
, SImode
, addr
, offset
);
7448 XVECEXP (result
, 0, i
)
7449 = gen_rtx_SET (VOIDmode
, mem
, gen_rtx_REG (SImode
, base_regno
+ j
));
7460 arm_gen_movmemqi (rtx
*operands
)
7462 HOST_WIDE_INT in_words_to_go
, out_words_to_go
, last_bytes
;
7463 HOST_WIDE_INT srcoffset
, dstoffset
;
7465 rtx src
, dst
, srcbase
, dstbase
;
7466 rtx part_bytes_reg
= NULL
;
7469 if (GET_CODE (operands
[2]) != CONST_INT
7470 || GET_CODE (operands
[3]) != CONST_INT
7471 || INTVAL (operands
[2]) > 64
7472 || INTVAL (operands
[3]) & 3)
7475 dstbase
= operands
[0];
7476 srcbase
= operands
[1];
7478 dst
= copy_to_mode_reg (SImode
, XEXP (dstbase
, 0));
7479 src
= copy_to_mode_reg (SImode
, XEXP (srcbase
, 0));
7481 in_words_to_go
= ARM_NUM_INTS (INTVAL (operands
[2]));
7482 out_words_to_go
= INTVAL (operands
[2]) / 4;
7483 last_bytes
= INTVAL (operands
[2]) & 3;
7484 dstoffset
= srcoffset
= 0;
7486 if (out_words_to_go
!= in_words_to_go
&& ((in_words_to_go
- 1) & 3) != 0)
7487 part_bytes_reg
= gen_rtx_REG (SImode
, (in_words_to_go
- 1) & 3);
7489 for (i
= 0; in_words_to_go
>= 2; i
+=4)
7491 if (in_words_to_go
> 4)
7492 emit_insn (arm_gen_load_multiple (0, 4, src
, TRUE
, TRUE
,
7493 srcbase
, &srcoffset
));
7495 emit_insn (arm_gen_load_multiple (0, in_words_to_go
, src
, TRUE
,
7496 FALSE
, srcbase
, &srcoffset
));
7498 if (out_words_to_go
)
7500 if (out_words_to_go
> 4)
7501 emit_insn (arm_gen_store_multiple (0, 4, dst
, TRUE
, TRUE
,
7502 dstbase
, &dstoffset
));
7503 else if (out_words_to_go
!= 1)
7504 emit_insn (arm_gen_store_multiple (0, out_words_to_go
,
7508 dstbase
, &dstoffset
));
7511 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
7512 emit_move_insn (mem
, gen_rtx_REG (SImode
, 0));
7513 if (last_bytes
!= 0)
7515 emit_insn (gen_addsi3 (dst
, dst
, GEN_INT (4)));
7521 in_words_to_go
-= in_words_to_go
< 4 ? in_words_to_go
: 4;
7522 out_words_to_go
-= out_words_to_go
< 4 ? out_words_to_go
: 4;
7525 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7526 if (out_words_to_go
)
7530 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
7531 sreg
= copy_to_reg (mem
);
7533 mem
= adjust_automodify_address (dstbase
, SImode
, dst
, dstoffset
);
7534 emit_move_insn (mem
, sreg
);
7537 gcc_assert (!in_words_to_go
); /* Sanity check */
7542 gcc_assert (in_words_to_go
> 0);
7544 mem
= adjust_automodify_address (srcbase
, SImode
, src
, srcoffset
);
7545 part_bytes_reg
= copy_to_mode_reg (SImode
, mem
);
7548 gcc_assert (!last_bytes
|| part_bytes_reg
);
7550 if (BYTES_BIG_ENDIAN
&& last_bytes
)
7552 rtx tmp
= gen_reg_rtx (SImode
);
7554 /* The bytes we want are in the top end of the word. */
7555 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
,
7556 GEN_INT (8 * (4 - last_bytes
))));
7557 part_bytes_reg
= tmp
;
7561 mem
= adjust_automodify_address (dstbase
, QImode
,
7562 plus_constant (dst
, last_bytes
- 1),
7563 dstoffset
+ last_bytes
- 1);
7564 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
7568 tmp
= gen_reg_rtx (SImode
);
7569 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (8)));
7570 part_bytes_reg
= tmp
;
7579 mem
= adjust_automodify_address (dstbase
, HImode
, dst
, dstoffset
);
7580 emit_move_insn (mem
, gen_lowpart (HImode
, part_bytes_reg
));
7584 rtx tmp
= gen_reg_rtx (SImode
);
7585 emit_insn (gen_addsi3 (dst
, dst
, const2_rtx
));
7586 emit_insn (gen_lshrsi3 (tmp
, part_bytes_reg
, GEN_INT (16)));
7587 part_bytes_reg
= tmp
;
7594 mem
= adjust_automodify_address (dstbase
, QImode
, dst
, dstoffset
);
7595 emit_move_insn (mem
, gen_lowpart (QImode
, part_bytes_reg
));
7602 /* Select a dominance comparison mode if possible for a test of the general
7603 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7604 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7605 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7606 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7607 In all cases OP will be either EQ or NE, but we don't need to know which
7608 here. If we are unable to support a dominance comparison we return
7609 CC mode. This will then fail to match for the RTL expressions that
7610 generate this call. */
7612 arm_select_dominance_cc_mode (rtx x
, rtx y
, HOST_WIDE_INT cond_or
)
7614 enum rtx_code cond1
, cond2
;
7617 /* Currently we will probably get the wrong result if the individual
7618 comparisons are not simple. This also ensures that it is safe to
7619 reverse a comparison if necessary. */
7620 if ((arm_select_cc_mode (cond1
= GET_CODE (x
), XEXP (x
, 0), XEXP (x
, 1))
7622 || (arm_select_cc_mode (cond2
= GET_CODE (y
), XEXP (y
, 0), XEXP (y
, 1))
7626 /* The if_then_else variant of this tests the second condition if the
7627 first passes, but is true if the first fails. Reverse the first
7628 condition to get a true "inclusive-or" expression. */
7629 if (cond_or
== DOM_CC_NX_OR_Y
)
7630 cond1
= reverse_condition (cond1
);
7632 /* If the comparisons are not equal, and one doesn't dominate the other,
7633 then we can't do this. */
7635 && !comparison_dominates_p (cond1
, cond2
)
7636 && (swapped
= 1, !comparison_dominates_p (cond2
, cond1
)))
7641 enum rtx_code temp
= cond1
;
7649 if (cond_or
== DOM_CC_X_AND_Y
)
7654 case EQ
: return CC_DEQmode
;
7655 case LE
: return CC_DLEmode
;
7656 case LEU
: return CC_DLEUmode
;
7657 case GE
: return CC_DGEmode
;
7658 case GEU
: return CC_DGEUmode
;
7659 default: gcc_unreachable ();
7663 if (cond_or
== DOM_CC_X_AND_Y
)
7679 if (cond_or
== DOM_CC_X_AND_Y
)
7695 if (cond_or
== DOM_CC_X_AND_Y
)
7711 if (cond_or
== DOM_CC_X_AND_Y
)
7726 /* The remaining cases only occur when both comparisons are the
7729 gcc_assert (cond1
== cond2
);
7733 gcc_assert (cond1
== cond2
);
7737 gcc_assert (cond1
== cond2
);
7741 gcc_assert (cond1
== cond2
);
7745 gcc_assert (cond1
== cond2
);
7754 arm_select_cc_mode (enum rtx_code op
, rtx x
, rtx y
)
7756 /* All floating point compares return CCFP if it is an equality
7757 comparison, and CCFPE otherwise. */
7758 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
7778 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
)
7787 /* A compare with a shifted operand. Because of canonicalization, the
7788 comparison will have to be swapped when we emit the assembler. */
7789 if (GET_MODE (y
) == SImode
&& GET_CODE (y
) == REG
7790 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
7791 || GET_CODE (x
) == LSHIFTRT
|| GET_CODE (x
) == ROTATE
7792 || GET_CODE (x
) == ROTATERT
))
7795 /* This operation is performed swapped, but since we only rely on the Z
7796 flag we don't need an additional mode. */
7797 if (GET_MODE (y
) == SImode
&& REG_P (y
)
7798 && GET_CODE (x
) == NEG
7799 && (op
== EQ
|| op
== NE
))
7802 /* This is a special case that is used by combine to allow a
7803 comparison of a shifted byte load to be split into a zero-extend
7804 followed by a comparison of the shifted integer (only valid for
7805 equalities and unsigned inequalities). */
7806 if (GET_MODE (x
) == SImode
7807 && GET_CODE (x
) == ASHIFT
7808 && GET_CODE (XEXP (x
, 1)) == CONST_INT
&& INTVAL (XEXP (x
, 1)) == 24
7809 && GET_CODE (XEXP (x
, 0)) == SUBREG
7810 && GET_CODE (SUBREG_REG (XEXP (x
, 0))) == MEM
7811 && GET_MODE (SUBREG_REG (XEXP (x
, 0))) == QImode
7812 && (op
== EQ
|| op
== NE
7813 || op
== GEU
|| op
== GTU
|| op
== LTU
|| op
== LEU
)
7814 && GET_CODE (y
) == CONST_INT
)
7817 /* A construct for a conditional compare, if the false arm contains
7818 0, then both conditions must be true, otherwise either condition
7819 must be true. Not all conditions are possible, so CCmode is
7820 returned if it can't be done. */
7821 if (GET_CODE (x
) == IF_THEN_ELSE
7822 && (XEXP (x
, 2) == const0_rtx
7823 || XEXP (x
, 2) == const1_rtx
)
7824 && COMPARISON_P (XEXP (x
, 0))
7825 && COMPARISON_P (XEXP (x
, 1)))
7826 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
7827 INTVAL (XEXP (x
, 2)));
7829 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7830 if (GET_CODE (x
) == AND
7831 && COMPARISON_P (XEXP (x
, 0))
7832 && COMPARISON_P (XEXP (x
, 1)))
7833 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
7836 if (GET_CODE (x
) == IOR
7837 && COMPARISON_P (XEXP (x
, 0))
7838 && COMPARISON_P (XEXP (x
, 1)))
7839 return arm_select_dominance_cc_mode (XEXP (x
, 0), XEXP (x
, 1),
7842 /* An operation (on Thumb) where we want to test for a single bit.
7843 This is done by shifting that bit up into the top bit of a
7844 scratch register; we can then branch on the sign bit. */
7846 && GET_MODE (x
) == SImode
7847 && (op
== EQ
|| op
== NE
)
7848 && GET_CODE (x
) == ZERO_EXTRACT
7849 && XEXP (x
, 1) == const1_rtx
)
7852 /* An operation that sets the condition codes as a side-effect, the
7853 V flag is not set correctly, so we can only use comparisons where
7854 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7856 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7857 if (GET_MODE (x
) == SImode
7859 && (op
== EQ
|| op
== NE
|| op
== LT
|| op
== GE
)
7860 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
7861 || GET_CODE (x
) == AND
|| GET_CODE (x
) == IOR
7862 || GET_CODE (x
) == XOR
|| GET_CODE (x
) == MULT
7863 || GET_CODE (x
) == NOT
|| GET_CODE (x
) == NEG
7864 || GET_CODE (x
) == LSHIFTRT
7865 || GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
7866 || GET_CODE (x
) == ROTATERT
7867 || (TARGET_32BIT
&& GET_CODE (x
) == ZERO_EXTRACT
)))
7870 if (GET_MODE (x
) == QImode
&& (op
== EQ
|| op
== NE
))
7873 if (GET_MODE (x
) == SImode
&& (op
== LTU
|| op
== GEU
)
7874 && GET_CODE (x
) == PLUS
7875 && (rtx_equal_p (XEXP (x
, 0), y
) || rtx_equal_p (XEXP (x
, 1), y
)))
7881 /* X and Y are two things to compare using CODE. Emit the compare insn and
7882 return the rtx for register 0 in the proper mode. FP means this is a
7883 floating point compare: I don't think that it is needed on the arm. */
7885 arm_gen_compare_reg (enum rtx_code code
, rtx x
, rtx y
)
7887 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
7888 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
7890 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
7895 /* Generate a sequence of insns that will generate the correct return
7896 address mask depending on the physical architecture that the program
7899 arm_gen_return_addr_mask (void)
7901 rtx reg
= gen_reg_rtx (Pmode
);
7903 emit_insn (gen_return_addr_mask (reg
));
7908 arm_reload_in_hi (rtx
*operands
)
7910 rtx ref
= operands
[1];
7912 HOST_WIDE_INT offset
= 0;
7914 if (GET_CODE (ref
) == SUBREG
)
7916 offset
= SUBREG_BYTE (ref
);
7917 ref
= SUBREG_REG (ref
);
7920 if (GET_CODE (ref
) == REG
)
7922 /* We have a pseudo which has been spilt onto the stack; there
7923 are two cases here: the first where there is a simple
7924 stack-slot replacement and a second where the stack-slot is
7925 out of range, or is used as a subreg. */
7926 if (reg_equiv_mem
[REGNO (ref
)])
7928 ref
= reg_equiv_mem
[REGNO (ref
)];
7929 base
= find_replacement (&XEXP (ref
, 0));
7932 /* The slot is out of range, or was dressed up in a SUBREG. */
7933 base
= reg_equiv_address
[REGNO (ref
)];
7936 base
= find_replacement (&XEXP (ref
, 0));
7938 /* Handle the case where the address is too complex to be offset by 1. */
7939 if (GET_CODE (base
) == MINUS
7940 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
7942 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
7944 emit_set_insn (base_plus
, base
);
7947 else if (GET_CODE (base
) == PLUS
)
7949 /* The addend must be CONST_INT, or we would have dealt with it above. */
7950 HOST_WIDE_INT hi
, lo
;
7952 offset
+= INTVAL (XEXP (base
, 1));
7953 base
= XEXP (base
, 0);
7955 /* Rework the address into a legal sequence of insns. */
7956 /* Valid range for lo is -4095 -> 4095 */
7959 : -((-offset
) & 0xfff));
7961 /* Corner case, if lo is the max offset then we would be out of range
7962 once we have added the additional 1 below, so bump the msb into the
7963 pre-loading insn(s). */
7967 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
7968 ^ (HOST_WIDE_INT
) 0x80000000)
7969 - (HOST_WIDE_INT
) 0x80000000);
7971 gcc_assert (hi
+ lo
== offset
);
7975 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
7977 /* Get the base address; addsi3 knows how to handle constants
7978 that require more than one insn. */
7979 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
7985 /* Operands[2] may overlap operands[0] (though it won't overlap
7986 operands[1]), that's why we asked for a DImode reg -- so we can
7987 use the bit that does not overlap. */
7988 if (REGNO (operands
[2]) == REGNO (operands
[0]))
7989 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
7991 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
7993 emit_insn (gen_zero_extendqisi2 (scratch
,
7994 gen_rtx_MEM (QImode
,
7995 plus_constant (base
,
7997 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode
, operands
[0], 0),
7998 gen_rtx_MEM (QImode
,
7999 plus_constant (base
,
8001 if (!BYTES_BIG_ENDIAN
)
8002 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
8003 gen_rtx_IOR (SImode
,
8006 gen_rtx_SUBREG (SImode
, operands
[0], 0),
8010 emit_set_insn (gen_rtx_SUBREG (SImode
, operands
[0], 0),
8011 gen_rtx_IOR (SImode
,
8012 gen_rtx_ASHIFT (SImode
, scratch
,
8014 gen_rtx_SUBREG (SImode
, operands
[0], 0)));
8017 /* Handle storing a half-word to memory during reload by synthesizing as two
8018 byte stores. Take care not to clobber the input values until after we
8019 have moved them somewhere safe. This code assumes that if the DImode
8020 scratch in operands[2] overlaps either the input value or output address
8021 in some way, then that value must die in this insn (we absolutely need
8022 two scratch registers for some corner cases). */
8024 arm_reload_out_hi (rtx
*operands
)
8026 rtx ref
= operands
[0];
8027 rtx outval
= operands
[1];
8029 HOST_WIDE_INT offset
= 0;
8031 if (GET_CODE (ref
) == SUBREG
)
8033 offset
= SUBREG_BYTE (ref
);
8034 ref
= SUBREG_REG (ref
);
8037 if (GET_CODE (ref
) == REG
)
8039 /* We have a pseudo which has been spilt onto the stack; there
8040 are two cases here: the first where there is a simple
8041 stack-slot replacement and a second where the stack-slot is
8042 out of range, or is used as a subreg. */
8043 if (reg_equiv_mem
[REGNO (ref
)])
8045 ref
= reg_equiv_mem
[REGNO (ref
)];
8046 base
= find_replacement (&XEXP (ref
, 0));
8049 /* The slot is out of range, or was dressed up in a SUBREG. */
8050 base
= reg_equiv_address
[REGNO (ref
)];
8053 base
= find_replacement (&XEXP (ref
, 0));
8055 scratch
= gen_rtx_REG (SImode
, REGNO (operands
[2]));
8057 /* Handle the case where the address is too complex to be offset by 1. */
8058 if (GET_CODE (base
) == MINUS
8059 || (GET_CODE (base
) == PLUS
&& GET_CODE (XEXP (base
, 1)) != CONST_INT
))
8061 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
8063 /* Be careful not to destroy OUTVAL. */
8064 if (reg_overlap_mentioned_p (base_plus
, outval
))
8066 /* Updating base_plus might destroy outval, see if we can
8067 swap the scratch and base_plus. */
8068 if (!reg_overlap_mentioned_p (scratch
, outval
))
8071 scratch
= base_plus
;
8076 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
8078 /* Be conservative and copy OUTVAL into the scratch now,
8079 this should only be necessary if outval is a subreg
8080 of something larger than a word. */
8081 /* XXX Might this clobber base? I can't see how it can,
8082 since scratch is known to overlap with OUTVAL, and
8083 must be wider than a word. */
8084 emit_insn (gen_movhi (scratch_hi
, outval
));
8085 outval
= scratch_hi
;
8089 emit_set_insn (base_plus
, base
);
8092 else if (GET_CODE (base
) == PLUS
)
8094 /* The addend must be CONST_INT, or we would have dealt with it above. */
8095 HOST_WIDE_INT hi
, lo
;
8097 offset
+= INTVAL (XEXP (base
, 1));
8098 base
= XEXP (base
, 0);
8100 /* Rework the address into a legal sequence of insns. */
8101 /* Valid range for lo is -4095 -> 4095 */
8104 : -((-offset
) & 0xfff));
8106 /* Corner case, if lo is the max offset then we would be out of range
8107 once we have added the additional 1 below, so bump the msb into the
8108 pre-loading insn(s). */
8112 hi
= ((((offset
- lo
) & (HOST_WIDE_INT
) 0xffffffff)
8113 ^ (HOST_WIDE_INT
) 0x80000000)
8114 - (HOST_WIDE_INT
) 0x80000000);
8116 gcc_assert (hi
+ lo
== offset
);
8120 rtx base_plus
= gen_rtx_REG (SImode
, REGNO (operands
[2]) + 1);
8122 /* Be careful not to destroy OUTVAL. */
8123 if (reg_overlap_mentioned_p (base_plus
, outval
))
8125 /* Updating base_plus might destroy outval, see if we
8126 can swap the scratch and base_plus. */
8127 if (!reg_overlap_mentioned_p (scratch
, outval
))
8130 scratch
= base_plus
;
8135 rtx scratch_hi
= gen_rtx_REG (HImode
, REGNO (operands
[2]));
8137 /* Be conservative and copy outval into scratch now,
8138 this should only be necessary if outval is a
8139 subreg of something larger than a word. */
8140 /* XXX Might this clobber base? I can't see how it
8141 can, since scratch is known to overlap with
8143 emit_insn (gen_movhi (scratch_hi
, outval
));
8144 outval
= scratch_hi
;
8148 /* Get the base address; addsi3 knows how to handle constants
8149 that require more than one insn. */
8150 emit_insn (gen_addsi3 (base_plus
, base
, GEN_INT (hi
)));
8156 if (BYTES_BIG_ENDIAN
)
8158 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
8159 plus_constant (base
, offset
+ 1)),
8160 gen_lowpart (QImode
, outval
)));
8161 emit_insn (gen_lshrsi3 (scratch
,
8162 gen_rtx_SUBREG (SImode
, outval
, 0),
8164 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
8165 gen_lowpart (QImode
, scratch
)));
8169 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (base
, offset
)),
8170 gen_lowpart (QImode
, outval
)));
8171 emit_insn (gen_lshrsi3 (scratch
,
8172 gen_rtx_SUBREG (SImode
, outval
, 0),
8174 emit_insn (gen_movqi (gen_rtx_MEM (QImode
,
8175 plus_constant (base
, offset
+ 1)),
8176 gen_lowpart (QImode
, scratch
)));
8180 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8181 (padded to the size of a word) should be passed in a register. */
8184 arm_must_pass_in_stack (enum machine_mode mode
, const_tree type
)
8186 if (TARGET_AAPCS_BASED
)
8187 return must_pass_in_stack_var_size (mode
, type
);
8189 return must_pass_in_stack_var_size_or_pad (mode
, type
);
8193 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8194 Return true if an argument passed on the stack should be padded upwards,
8195 i.e. if the least-significant byte has useful data.
8196 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8197 aggregate types are placed in the lowest memory address. */
8200 arm_pad_arg_upward (enum machine_mode mode
, const_tree type
)
8202 if (!TARGET_AAPCS_BASED
)
8203 return DEFAULT_FUNCTION_ARG_PADDING(mode
, type
) == upward
;
8205 if (type
&& BYTES_BIG_ENDIAN
&& INTEGRAL_TYPE_P (type
))
8212 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8213 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8214 byte of the register has useful data, and return the opposite if the
8215 most significant byte does.
8216 For AAPCS, small aggregates and small complex types are always padded
8220 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED
,
8221 tree type
, int first ATTRIBUTE_UNUSED
)
8223 if (TARGET_AAPCS_BASED
8225 && (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
)
8226 && int_size_in_bytes (type
) <= 4)
8229 /* Otherwise, use default padding. */
8230 return !BYTES_BIG_ENDIAN
;
8234 /* Print a symbolic form of X to the debug file, F. */
8236 arm_print_value (FILE *f
, rtx x
)
8238 switch (GET_CODE (x
))
8241 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (x
));
8245 fprintf (f
, "<0x%lx,0x%lx>", (long)XWINT (x
, 2), (long)XWINT (x
, 3));
8253 for (i
= 0; i
< CONST_VECTOR_NUNITS (x
); i
++)
8255 fprintf (f
, HOST_WIDE_INT_PRINT_HEX
, INTVAL (CONST_VECTOR_ELT (x
, i
)));
8256 if (i
< (CONST_VECTOR_NUNITS (x
) - 1))
8264 fprintf (f
, "\"%s\"", XSTR (x
, 0));
8268 fprintf (f
, "`%s'", XSTR (x
, 0));
8272 fprintf (f
, "L%d", INSN_UID (XEXP (x
, 0)));
8276 arm_print_value (f
, XEXP (x
, 0));
8280 arm_print_value (f
, XEXP (x
, 0));
8282 arm_print_value (f
, XEXP (x
, 1));
8290 fprintf (f
, "????");
8295 /* Routines for manipulation of the constant pool. */
8297 /* Arm instructions cannot load a large constant directly into a
8298 register; they have to come from a pc relative load. The constant
8299 must therefore be placed in the addressable range of the pc
8300 relative load. Depending on the precise pc relative load
8301 instruction the range is somewhere between 256 bytes and 4k. This
8302 means that we often have to dump a constant inside a function, and
8303 generate code to branch around it.
8305 It is important to minimize this, since the branches will slow
8306 things down and make the code larger.
8308 Normally we can hide the table after an existing unconditional
8309 branch so that there is no interruption of the flow, but in the
8310 worst case the code looks like this:
8328 We fix this by performing a scan after scheduling, which notices
8329 which instructions need to have their operands fetched from the
8330 constant table and builds the table.
8332 The algorithm starts by building a table of all the constants that
8333 need fixing up and all the natural barriers in the function (places
8334 where a constant table can be dropped without breaking the flow).
8335 For each fixup we note how far the pc-relative replacement will be
8336 able to reach and the offset of the instruction into the function.
8338 Having built the table we then group the fixes together to form
8339 tables that are as large as possible (subject to addressing
8340 constraints) and emit each table of constants after the last
8341 barrier that is within range of all the instructions in the group.
8342 If a group does not contain a barrier, then we forcibly create one
8343 by inserting a jump instruction into the flow. Once the table has
8344 been inserted, the insns are then modified to reference the
8345 relevant entry in the pool.
8347 Possible enhancements to the algorithm (not implemented) are:
8349 1) For some processors and object formats, there may be benefit in
8350 aligning the pools to the start of cache lines; this alignment
8351 would need to be taken into account when calculating addressability
8354 /* These typedefs are located at the start of this file, so that
8355 they can be used in the prototypes there. This comment is to
8356 remind readers of that fact so that the following structures
8357 can be understood more easily.
8359 typedef struct minipool_node Mnode;
8360 typedef struct minipool_fixup Mfix; */
8362 struct minipool_node
8364 /* Doubly linked chain of entries. */
8367 /* The maximum offset into the code that this entry can be placed. While
8368 pushing fixes for forward references, all entries are sorted in order
8369 of increasing max_address. */
8370 HOST_WIDE_INT max_address
;
8371 /* Similarly for an entry inserted for a backwards ref. */
8372 HOST_WIDE_INT min_address
;
8373 /* The number of fixes referencing this entry. This can become zero
8374 if we "unpush" an entry. In this case we ignore the entry when we
8375 come to emit the code. */
8377 /* The offset from the start of the minipool. */
8378 HOST_WIDE_INT offset
;
8379 /* The value in table. */
8381 /* The mode of value. */
8382 enum machine_mode mode
;
8383 /* The size of the value. With iWMMXt enabled
8384 sizes > 4 also imply an alignment of 8-bytes. */
8388 struct minipool_fixup
8392 HOST_WIDE_INT address
;
8394 enum machine_mode mode
;
8398 HOST_WIDE_INT forwards
;
8399 HOST_WIDE_INT backwards
;
8402 /* Fixes less than a word need padding out to a word boundary. */
8403 #define MINIPOOL_FIX_SIZE(mode) \
8404 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8406 static Mnode
* minipool_vector_head
;
8407 static Mnode
* minipool_vector_tail
;
8408 static rtx minipool_vector_label
;
8409 static int minipool_pad
;
8411 /* The linked list of all minipool fixes required for this function. */
8412 Mfix
* minipool_fix_head
;
8413 Mfix
* minipool_fix_tail
;
8414 /* The fix entry for the current minipool, once it has been placed. */
8415 Mfix
* minipool_barrier
;
8417 /* Determines if INSN is the start of a jump table. Returns the end
8418 of the TABLE or NULL_RTX. */
8420 is_jump_table (rtx insn
)
8424 if (GET_CODE (insn
) == JUMP_INSN
8425 && JUMP_LABEL (insn
) != NULL
8426 && ((table
= next_real_insn (JUMP_LABEL (insn
)))
8427 == next_real_insn (insn
))
8429 && GET_CODE (table
) == JUMP_INSN
8430 && (GET_CODE (PATTERN (table
)) == ADDR_VEC
8431 || GET_CODE (PATTERN (table
)) == ADDR_DIFF_VEC
))
8437 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8438 #define JUMP_TABLES_IN_TEXT_SECTION 0
8441 static HOST_WIDE_INT
8442 get_jump_table_size (rtx insn
)
8444 /* ADDR_VECs only take room if read-only data does into the text
8446 if (JUMP_TABLES_IN_TEXT_SECTION
|| readonly_data_section
== text_section
)
8448 rtx body
= PATTERN (insn
);
8449 int elt
= GET_CODE (body
) == ADDR_DIFF_VEC
? 1 : 0;
8451 HOST_WIDE_INT modesize
;
8453 modesize
= GET_MODE_SIZE (GET_MODE (body
));
8454 size
= modesize
* XVECLEN (body
, elt
);
8458 /* Round up size of TBB table to a halfword boundary. */
8459 size
= (size
+ 1) & ~(HOST_WIDE_INT
)1;
8462 /* No padding necessary for TBH. */
8465 /* Add two bytes for alignment on Thumb. */
8478 /* Move a minipool fix MP from its current location to before MAX_MP.
8479 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8480 constraints may need updating. */
8482 move_minipool_fix_forward_ref (Mnode
*mp
, Mnode
*max_mp
,
8483 HOST_WIDE_INT max_address
)
8485 /* The code below assumes these are different. */
8486 gcc_assert (mp
!= max_mp
);
8490 if (max_address
< mp
->max_address
)
8491 mp
->max_address
= max_address
;
8495 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
8496 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
8498 mp
->max_address
= max_address
;
8500 /* Unlink MP from its current position. Since max_mp is non-null,
8501 mp->prev must be non-null. */
8502 mp
->prev
->next
= mp
->next
;
8503 if (mp
->next
!= NULL
)
8504 mp
->next
->prev
= mp
->prev
;
8506 minipool_vector_tail
= mp
->prev
;
8508 /* Re-insert it before MAX_MP. */
8510 mp
->prev
= max_mp
->prev
;
8513 if (mp
->prev
!= NULL
)
8514 mp
->prev
->next
= mp
;
8516 minipool_vector_head
= mp
;
8519 /* Save the new entry. */
8522 /* Scan over the preceding entries and adjust their addresses as
8524 while (mp
->prev
!= NULL
8525 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
8527 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
8534 /* Add a constant to the minipool for a forward reference. Returns the
8535 node added or NULL if the constant will not fit in this pool. */
8537 add_minipool_forward_ref (Mfix
*fix
)
8539 /* If set, max_mp is the first pool_entry that has a lower
8540 constraint than the one we are trying to add. */
8541 Mnode
* max_mp
= NULL
;
8542 HOST_WIDE_INT max_address
= fix
->address
+ fix
->forwards
- minipool_pad
;
8545 /* If the minipool starts before the end of FIX->INSN then this FIX
8546 can not be placed into the current pool. Furthermore, adding the
8547 new constant pool entry may cause the pool to start FIX_SIZE bytes
8549 if (minipool_vector_head
&&
8550 (fix
->address
+ get_attr_length (fix
->insn
)
8551 >= minipool_vector_head
->max_address
- fix
->fix_size
))
8554 /* Scan the pool to see if a constant with the same value has
8555 already been added. While we are doing this, also note the
8556 location where we must insert the constant if it doesn't already
8558 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
8560 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
8561 && fix
->mode
== mp
->mode
8562 && (GET_CODE (fix
->value
) != CODE_LABEL
8563 || (CODE_LABEL_NUMBER (fix
->value
)
8564 == CODE_LABEL_NUMBER (mp
->value
)))
8565 && rtx_equal_p (fix
->value
, mp
->value
))
8567 /* More than one fix references this entry. */
8569 return move_minipool_fix_forward_ref (mp
, max_mp
, max_address
);
8572 /* Note the insertion point if necessary. */
8574 && mp
->max_address
> max_address
)
8577 /* If we are inserting an 8-bytes aligned quantity and
8578 we have not already found an insertion point, then
8579 make sure that all such 8-byte aligned quantities are
8580 placed at the start of the pool. */
8581 if (ARM_DOUBLEWORD_ALIGN
8583 && fix
->fix_size
>= 8
8584 && mp
->fix_size
< 8)
8587 max_address
= mp
->max_address
;
8591 /* The value is not currently in the minipool, so we need to create
8592 a new entry for it. If MAX_MP is NULL, the entry will be put on
8593 the end of the list since the placement is less constrained than
8594 any existing entry. Otherwise, we insert the new fix before
8595 MAX_MP and, if necessary, adjust the constraints on the other
8598 mp
->fix_size
= fix
->fix_size
;
8599 mp
->mode
= fix
->mode
;
8600 mp
->value
= fix
->value
;
8602 /* Not yet required for a backwards ref. */
8603 mp
->min_address
= -65536;
8607 mp
->max_address
= max_address
;
8609 mp
->prev
= minipool_vector_tail
;
8611 if (mp
->prev
== NULL
)
8613 minipool_vector_head
= mp
;
8614 minipool_vector_label
= gen_label_rtx ();
8617 mp
->prev
->next
= mp
;
8619 minipool_vector_tail
= mp
;
8623 if (max_address
> max_mp
->max_address
- mp
->fix_size
)
8624 mp
->max_address
= max_mp
->max_address
- mp
->fix_size
;
8626 mp
->max_address
= max_address
;
8629 mp
->prev
= max_mp
->prev
;
8631 if (mp
->prev
!= NULL
)
8632 mp
->prev
->next
= mp
;
8634 minipool_vector_head
= mp
;
8637 /* Save the new entry. */
8640 /* Scan over the preceding entries and adjust their addresses as
8642 while (mp
->prev
!= NULL
8643 && mp
->prev
->max_address
> mp
->max_address
- mp
->prev
->fix_size
)
8645 mp
->prev
->max_address
= mp
->max_address
- mp
->prev
->fix_size
;
8653 move_minipool_fix_backward_ref (Mnode
*mp
, Mnode
*min_mp
,
8654 HOST_WIDE_INT min_address
)
8656 HOST_WIDE_INT offset
;
8658 /* The code below assumes these are different. */
8659 gcc_assert (mp
!= min_mp
);
8663 if (min_address
> mp
->min_address
)
8664 mp
->min_address
= min_address
;
8668 /* We will adjust this below if it is too loose. */
8669 mp
->min_address
= min_address
;
8671 /* Unlink MP from its current position. Since min_mp is non-null,
8672 mp->next must be non-null. */
8673 mp
->next
->prev
= mp
->prev
;
8674 if (mp
->prev
!= NULL
)
8675 mp
->prev
->next
= mp
->next
;
8677 minipool_vector_head
= mp
->next
;
8679 /* Reinsert it after MIN_MP. */
8681 mp
->next
= min_mp
->next
;
8683 if (mp
->next
!= NULL
)
8684 mp
->next
->prev
= mp
;
8686 minipool_vector_tail
= mp
;
8692 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
8694 mp
->offset
= offset
;
8695 if (mp
->refcount
> 0)
8696 offset
+= mp
->fix_size
;
8698 if (mp
->next
&& mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
8699 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
8705 /* Add a constant to the minipool for a backward reference. Returns the
8706 node added or NULL if the constant will not fit in this pool.
8708 Note that the code for insertion for a backwards reference can be
8709 somewhat confusing because the calculated offsets for each fix do
8710 not take into account the size of the pool (which is still under
8713 add_minipool_backward_ref (Mfix
*fix
)
8715 /* If set, min_mp is the last pool_entry that has a lower constraint
8716 than the one we are trying to add. */
8717 Mnode
*min_mp
= NULL
;
8718 /* This can be negative, since it is only a constraint. */
8719 HOST_WIDE_INT min_address
= fix
->address
- fix
->backwards
;
8722 /* If we can't reach the current pool from this insn, or if we can't
8723 insert this entry at the end of the pool without pushing other
8724 fixes out of range, then we don't try. This ensures that we
8725 can't fail later on. */
8726 if (min_address
>= minipool_barrier
->address
8727 || (minipool_vector_tail
->min_address
+ fix
->fix_size
8728 >= minipool_barrier
->address
))
8731 /* Scan the pool to see if a constant with the same value has
8732 already been added. While we are doing this, also note the
8733 location where we must insert the constant if it doesn't already
8735 for (mp
= minipool_vector_tail
; mp
!= NULL
; mp
= mp
->prev
)
8737 if (GET_CODE (fix
->value
) == GET_CODE (mp
->value
)
8738 && fix
->mode
== mp
->mode
8739 && (GET_CODE (fix
->value
) != CODE_LABEL
8740 || (CODE_LABEL_NUMBER (fix
->value
)
8741 == CODE_LABEL_NUMBER (mp
->value
)))
8742 && rtx_equal_p (fix
->value
, mp
->value
)
8743 /* Check that there is enough slack to move this entry to the
8744 end of the table (this is conservative). */
8746 > (minipool_barrier
->address
8747 + minipool_vector_tail
->offset
8748 + minipool_vector_tail
->fix_size
)))
8751 return move_minipool_fix_backward_ref (mp
, min_mp
, min_address
);
8755 mp
->min_address
+= fix
->fix_size
;
8758 /* Note the insertion point if necessary. */
8759 if (mp
->min_address
< min_address
)
8761 /* For now, we do not allow the insertion of 8-byte alignment
8762 requiring nodes anywhere but at the start of the pool. */
8763 if (ARM_DOUBLEWORD_ALIGN
8764 && fix
->fix_size
>= 8 && mp
->fix_size
< 8)
8769 else if (mp
->max_address
8770 < minipool_barrier
->address
+ mp
->offset
+ fix
->fix_size
)
8772 /* Inserting before this entry would push the fix beyond
8773 its maximum address (which can happen if we have
8774 re-located a forwards fix); force the new fix to come
8777 min_address
= mp
->min_address
+ fix
->fix_size
;
8779 /* If we are inserting an 8-bytes aligned quantity and
8780 we have not already found an insertion point, then
8781 make sure that all such 8-byte aligned quantities are
8782 placed at the start of the pool. */
8783 else if (ARM_DOUBLEWORD_ALIGN
8785 && fix
->fix_size
>= 8
8786 && mp
->fix_size
< 8)
8789 min_address
= mp
->min_address
+ fix
->fix_size
;
8794 /* We need to create a new entry. */
8796 mp
->fix_size
= fix
->fix_size
;
8797 mp
->mode
= fix
->mode
;
8798 mp
->value
= fix
->value
;
8800 mp
->max_address
= minipool_barrier
->address
+ 65536;
8802 mp
->min_address
= min_address
;
8807 mp
->next
= minipool_vector_head
;
8809 if (mp
->next
== NULL
)
8811 minipool_vector_tail
= mp
;
8812 minipool_vector_label
= gen_label_rtx ();
8815 mp
->next
->prev
= mp
;
8817 minipool_vector_head
= mp
;
8821 mp
->next
= min_mp
->next
;
8825 if (mp
->next
!= NULL
)
8826 mp
->next
->prev
= mp
;
8828 minipool_vector_tail
= mp
;
8831 /* Save the new entry. */
8839 /* Scan over the following entries and adjust their offsets. */
8840 while (mp
->next
!= NULL
)
8842 if (mp
->next
->min_address
< mp
->min_address
+ mp
->fix_size
)
8843 mp
->next
->min_address
= mp
->min_address
+ mp
->fix_size
;
8846 mp
->next
->offset
= mp
->offset
+ mp
->fix_size
;
8848 mp
->next
->offset
= mp
->offset
;
8857 assign_minipool_offsets (Mfix
*barrier
)
8859 HOST_WIDE_INT offset
= 0;
8862 minipool_barrier
= barrier
;
8864 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
8866 mp
->offset
= offset
;
8868 if (mp
->refcount
> 0)
8869 offset
+= mp
->fix_size
;
8873 /* Output the literal table */
8875 dump_minipool (rtx scan
)
8881 if (ARM_DOUBLEWORD_ALIGN
)
8882 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= mp
->next
)
8883 if (mp
->refcount
> 0 && mp
->fix_size
>= 8)
8891 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8892 INSN_UID (scan
), (unsigned long) minipool_barrier
->address
, align64
? 8 : 4);
8894 scan
= emit_label_after (gen_label_rtx (), scan
);
8895 scan
= emit_insn_after (align64
? gen_align_8 () : gen_align_4 (), scan
);
8896 scan
= emit_label_after (minipool_vector_label
, scan
);
8898 for (mp
= minipool_vector_head
; mp
!= NULL
; mp
= nmp
)
8900 if (mp
->refcount
> 0)
8905 ";; Offset %u, min %ld, max %ld ",
8906 (unsigned) mp
->offset
, (unsigned long) mp
->min_address
,
8907 (unsigned long) mp
->max_address
);
8908 arm_print_value (dump_file
, mp
->value
);
8909 fputc ('\n', dump_file
);
8912 switch (mp
->fix_size
)
8914 #ifdef HAVE_consttable_1
8916 scan
= emit_insn_after (gen_consttable_1 (mp
->value
), scan
);
8920 #ifdef HAVE_consttable_2
8922 scan
= emit_insn_after (gen_consttable_2 (mp
->value
), scan
);
8926 #ifdef HAVE_consttable_4
8928 scan
= emit_insn_after (gen_consttable_4 (mp
->value
), scan
);
8932 #ifdef HAVE_consttable_8
8934 scan
= emit_insn_after (gen_consttable_8 (mp
->value
), scan
);
8938 #ifdef HAVE_consttable_16
8940 scan
= emit_insn_after (gen_consttable_16 (mp
->value
), scan
);
8953 minipool_vector_head
= minipool_vector_tail
= NULL
;
8954 scan
= emit_insn_after (gen_consttable_end (), scan
);
8955 scan
= emit_barrier_after (scan
);
8958 /* Return the cost of forcibly inserting a barrier after INSN. */
8960 arm_barrier_cost (rtx insn
)
8962 /* Basing the location of the pool on the loop depth is preferable,
8963 but at the moment, the basic block information seems to be
8964 corrupt by this stage of the compilation. */
8966 rtx next
= next_nonnote_insn (insn
);
8968 if (next
!= NULL
&& GET_CODE (next
) == CODE_LABEL
)
8971 switch (GET_CODE (insn
))
8974 /* It will always be better to place the table before the label, rather
8983 return base_cost
- 10;
8986 return base_cost
+ 10;
8990 /* Find the best place in the insn stream in the range
8991 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
8992 Create the barrier by inserting a jump and add a new fix entry for
8995 create_fix_barrier (Mfix
*fix
, HOST_WIDE_INT max_address
)
8997 HOST_WIDE_INT count
= 0;
8999 rtx from
= fix
->insn
;
9000 /* The instruction after which we will insert the jump. */
9001 rtx selected
= NULL
;
9003 /* The address at which the jump instruction will be placed. */
9004 HOST_WIDE_INT selected_address
;
9006 HOST_WIDE_INT max_count
= max_address
- fix
->address
;
9007 rtx label
= gen_label_rtx ();
9009 selected_cost
= arm_barrier_cost (from
);
9010 selected_address
= fix
->address
;
9012 while (from
&& count
< max_count
)
9017 /* This code shouldn't have been called if there was a natural barrier
9019 gcc_assert (GET_CODE (from
) != BARRIER
);
9021 /* Count the length of this insn. */
9022 count
+= get_attr_length (from
);
9024 /* If there is a jump table, add its length. */
9025 tmp
= is_jump_table (from
);
9028 count
+= get_jump_table_size (tmp
);
9030 /* Jump tables aren't in a basic block, so base the cost on
9031 the dispatch insn. If we select this location, we will
9032 still put the pool after the table. */
9033 new_cost
= arm_barrier_cost (from
);
9035 if (count
< max_count
9036 && (!selected
|| new_cost
<= selected_cost
))
9039 selected_cost
= new_cost
;
9040 selected_address
= fix
->address
+ count
;
9043 /* Continue after the dispatch table. */
9044 from
= NEXT_INSN (tmp
);
9048 new_cost
= arm_barrier_cost (from
);
9050 if (count
< max_count
9051 && (!selected
|| new_cost
<= selected_cost
))
9054 selected_cost
= new_cost
;
9055 selected_address
= fix
->address
+ count
;
9058 from
= NEXT_INSN (from
);
9061 /* Make sure that we found a place to insert the jump. */
9062 gcc_assert (selected
);
9064 /* Create a new JUMP_INSN that branches around a barrier. */
9065 from
= emit_jump_insn_after (gen_jump (label
), selected
);
9066 JUMP_LABEL (from
) = label
;
9067 barrier
= emit_barrier_after (from
);
9068 emit_label_after (label
, barrier
);
9070 /* Create a minipool barrier entry for the new barrier. */
9071 new_fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* new_fix
));
9072 new_fix
->insn
= barrier
;
9073 new_fix
->address
= selected_address
;
9074 new_fix
->next
= fix
->next
;
9075 fix
->next
= new_fix
;
9080 /* Record that there is a natural barrier in the insn stream at
9083 push_minipool_barrier (rtx insn
, HOST_WIDE_INT address
)
9085 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
9088 fix
->address
= address
;
9091 if (minipool_fix_head
!= NULL
)
9092 minipool_fix_tail
->next
= fix
;
9094 minipool_fix_head
= fix
;
9096 minipool_fix_tail
= fix
;
9099 /* Record INSN, which will need fixing up to load a value from the
9100 minipool. ADDRESS is the offset of the insn since the start of the
9101 function; LOC is a pointer to the part of the insn which requires
9102 fixing; VALUE is the constant that must be loaded, which is of type
9105 push_minipool_fix (rtx insn
, HOST_WIDE_INT address
, rtx
*loc
,
9106 enum machine_mode mode
, rtx value
)
9108 Mfix
* fix
= (Mfix
*) obstack_alloc (&minipool_obstack
, sizeof (* fix
));
9111 fix
->address
= address
;
9114 fix
->fix_size
= MINIPOOL_FIX_SIZE (mode
);
9116 fix
->forwards
= get_attr_pool_range (insn
);
9117 fix
->backwards
= get_attr_neg_pool_range (insn
);
9118 fix
->minipool
= NULL
;
9120 /* If an insn doesn't have a range defined for it, then it isn't
9121 expecting to be reworked by this code. Better to stop now than
9122 to generate duff assembly code. */
9123 gcc_assert (fix
->forwards
|| fix
->backwards
);
9125 /* If an entry requires 8-byte alignment then assume all constant pools
9126 require 4 bytes of padding. Trying to do this later on a per-pool
9127 basis is awkward because existing pool entries have to be modified. */
9128 if (ARM_DOUBLEWORD_ALIGN
&& fix
->fix_size
>= 8)
9134 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9135 GET_MODE_NAME (mode
),
9136 INSN_UID (insn
), (unsigned long) address
,
9137 -1 * (long)fix
->backwards
, (long)fix
->forwards
);
9138 arm_print_value (dump_file
, fix
->value
);
9139 fprintf (dump_file
, "\n");
9142 /* Add it to the chain of fixes. */
9145 if (minipool_fix_head
!= NULL
)
9146 minipool_fix_tail
->next
= fix
;
9148 minipool_fix_head
= fix
;
9150 minipool_fix_tail
= fix
;
9153 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9154 Returns the number of insns needed, or 99 if we don't know how to
9157 arm_const_double_inline_cost (rtx val
)
9159 rtx lowpart
, highpart
;
9160 enum machine_mode mode
;
9162 mode
= GET_MODE (val
);
9164 if (mode
== VOIDmode
)
9167 gcc_assert (GET_MODE_SIZE (mode
) == 8);
9169 lowpart
= gen_lowpart (SImode
, val
);
9170 highpart
= gen_highpart_mode (SImode
, mode
, val
);
9172 gcc_assert (GET_CODE (lowpart
) == CONST_INT
);
9173 gcc_assert (GET_CODE (highpart
) == CONST_INT
);
9175 return (arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (lowpart
),
9176 NULL_RTX
, NULL_RTX
, 0, 0)
9177 + arm_gen_constant (SET
, SImode
, NULL_RTX
, INTVAL (highpart
),
9178 NULL_RTX
, NULL_RTX
, 0, 0));
9181 /* Return true if it is worthwhile to split a 64-bit constant into two
9182 32-bit operations. This is the case if optimizing for size, or
9183 if we have load delay slots, or if one 32-bit part can be done with
9184 a single data operation. */
9186 arm_const_double_by_parts (rtx val
)
9188 enum machine_mode mode
= GET_MODE (val
);
9191 if (optimize_size
|| arm_ld_sched
)
9194 if (mode
== VOIDmode
)
9197 part
= gen_highpart_mode (SImode
, mode
, val
);
9199 gcc_assert (GET_CODE (part
) == CONST_INT
);
9201 if (const_ok_for_arm (INTVAL (part
))
9202 || const_ok_for_arm (~INTVAL (part
)))
9205 part
= gen_lowpart (SImode
, val
);
9207 gcc_assert (GET_CODE (part
) == CONST_INT
);
9209 if (const_ok_for_arm (INTVAL (part
))
9210 || const_ok_for_arm (~INTVAL (part
)))
9216 /* Scan INSN and note any of its operands that need fixing.
9217 If DO_PUSHES is false we do not actually push any of the fixups
9218 needed. The function returns TRUE if any fixups were needed/pushed.
9219 This is used by arm_memory_load_p() which needs to know about loads
9220 of constants that will be converted into minipool loads. */
9222 note_invalid_constants (rtx insn
, HOST_WIDE_INT address
, int do_pushes
)
9224 bool result
= false;
9227 extract_insn (insn
);
9229 if (!constrain_operands (1))
9230 fatal_insn_not_found (insn
);
9232 if (recog_data
.n_alternatives
== 0)
9235 /* Fill in recog_op_alt with information about the constraints of
9237 preprocess_constraints ();
9239 for (opno
= 0; opno
< recog_data
.n_operands
; opno
++)
9241 /* Things we need to fix can only occur in inputs. */
9242 if (recog_data
.operand_type
[opno
] != OP_IN
)
9245 /* If this alternative is a memory reference, then any mention
9246 of constants in this alternative is really to fool reload
9247 into allowing us to accept one there. We need to fix them up
9248 now so that we output the right code. */
9249 if (recog_op_alt
[opno
][which_alternative
].memory_ok
)
9251 rtx op
= recog_data
.operand
[opno
];
9253 if (CONSTANT_P (op
))
9256 push_minipool_fix (insn
, address
, recog_data
.operand_loc
[opno
],
9257 recog_data
.operand_mode
[opno
], op
);
9260 else if (GET_CODE (op
) == MEM
9261 && GET_CODE (XEXP (op
, 0)) == SYMBOL_REF
9262 && CONSTANT_POOL_ADDRESS_P (XEXP (op
, 0)))
9266 rtx cop
= avoid_constant_pool_reference (op
);
9268 /* Casting the address of something to a mode narrower
9269 than a word can cause avoid_constant_pool_reference()
9270 to return the pool reference itself. That's no good to
9271 us here. Lets just hope that we can use the
9272 constant pool value directly. */
9274 cop
= get_pool_constant (XEXP (op
, 0));
9276 push_minipool_fix (insn
, address
,
9277 recog_data
.operand_loc
[opno
],
9278 recog_data
.operand_mode
[opno
], cop
);
9289 /* Gcc puts the pool in the wrong place for ARM, since we can only
9290 load addresses a limited distance around the pc. We do some
9291 special munging to move the constant pool values to the correct
9292 point in the code. */
9297 HOST_WIDE_INT address
= 0;
9300 minipool_fix_head
= minipool_fix_tail
= NULL
;
9302 /* The first insn must always be a note, or the code below won't
9303 scan it properly. */
9304 insn
= get_insns ();
9305 gcc_assert (GET_CODE (insn
) == NOTE
);
9308 /* Scan all the insns and record the operands that will need fixing. */
9309 for (insn
= next_nonnote_insn (insn
); insn
; insn
= next_nonnote_insn (insn
))
9311 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9312 && (arm_cirrus_insn_p (insn
)
9313 || GET_CODE (insn
) == JUMP_INSN
9314 || arm_memory_load_p (insn
)))
9315 cirrus_reorg (insn
);
9317 if (GET_CODE (insn
) == BARRIER
)
9318 push_minipool_barrier (insn
, address
);
9319 else if (INSN_P (insn
))
9323 note_invalid_constants (insn
, address
, true);
9324 address
+= get_attr_length (insn
);
9326 /* If the insn is a vector jump, add the size of the table
9327 and skip the table. */
9328 if ((table
= is_jump_table (insn
)) != NULL
)
9330 address
+= get_jump_table_size (table
);
9336 fix
= minipool_fix_head
;
9338 /* Now scan the fixups and perform the required changes. */
9343 Mfix
* last_added_fix
;
9344 Mfix
* last_barrier
= NULL
;
9347 /* Skip any further barriers before the next fix. */
9348 while (fix
&& GET_CODE (fix
->insn
) == BARRIER
)
9351 /* No more fixes. */
9355 last_added_fix
= NULL
;
9357 for (ftmp
= fix
; ftmp
; ftmp
= ftmp
->next
)
9359 if (GET_CODE (ftmp
->insn
) == BARRIER
)
9361 if (ftmp
->address
>= minipool_vector_head
->max_address
)
9364 last_barrier
= ftmp
;
9366 else if ((ftmp
->minipool
= add_minipool_forward_ref (ftmp
)) == NULL
)
9369 last_added_fix
= ftmp
; /* Keep track of the last fix added. */
9372 /* If we found a barrier, drop back to that; any fixes that we
9373 could have reached but come after the barrier will now go in
9374 the next mini-pool. */
9375 if (last_barrier
!= NULL
)
9377 /* Reduce the refcount for those fixes that won't go into this
9379 for (fdel
= last_barrier
->next
;
9380 fdel
&& fdel
!= ftmp
;
9383 fdel
->minipool
->refcount
--;
9384 fdel
->minipool
= NULL
;
9387 ftmp
= last_barrier
;
9391 /* ftmp is first fix that we can't fit into this pool and
9392 there no natural barriers that we could use. Insert a
9393 new barrier in the code somewhere between the previous
9394 fix and this one, and arrange to jump around it. */
9395 HOST_WIDE_INT max_address
;
9397 /* The last item on the list of fixes must be a barrier, so
9398 we can never run off the end of the list of fixes without
9399 last_barrier being set. */
9402 max_address
= minipool_vector_head
->max_address
;
9403 /* Check that there isn't another fix that is in range that
9404 we couldn't fit into this pool because the pool was
9405 already too large: we need to put the pool before such an
9406 instruction. The pool itself may come just after the
9407 fix because create_fix_barrier also allows space for a
9408 jump instruction. */
9409 if (ftmp
->address
< max_address
)
9410 max_address
= ftmp
->address
+ 1;
9412 last_barrier
= create_fix_barrier (last_added_fix
, max_address
);
9415 assign_minipool_offsets (last_barrier
);
9419 if (GET_CODE (ftmp
->insn
) != BARRIER
9420 && ((ftmp
->minipool
= add_minipool_backward_ref (ftmp
))
9427 /* Scan over the fixes we have identified for this pool, fixing them
9428 up and adding the constants to the pool itself. */
9429 for (this_fix
= fix
; this_fix
&& ftmp
!= this_fix
;
9430 this_fix
= this_fix
->next
)
9431 if (GET_CODE (this_fix
->insn
) != BARRIER
)
9434 = plus_constant (gen_rtx_LABEL_REF (VOIDmode
,
9435 minipool_vector_label
),
9436 this_fix
->minipool
->offset
);
9437 *this_fix
->loc
= gen_rtx_MEM (this_fix
->mode
, addr
);
9440 dump_minipool (last_barrier
->insn
);
9444 /* From now on we must synthesize any constants that we can't handle
9445 directly. This can happen if the RTL gets split during final
9446 instruction generation. */
9447 after_arm_reorg
= 1;
9449 /* Free the minipool memory. */
9450 obstack_free (&minipool_obstack
, minipool_startobj
);
9453 /* Routines to output assembly language. */
9455 /* If the rtx is the correct value then return the string of the number.
9456 In this way we can ensure that valid double constants are generated even
9457 when cross compiling. */
9459 fp_immediate_constant (rtx x
)
9464 if (!fp_consts_inited
)
9467 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
9468 for (i
= 0; i
< 8; i
++)
9469 if (REAL_VALUES_EQUAL (r
, values_fp
[i
]))
9470 return strings_fp
[i
];
9475 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9477 fp_const_from_val (REAL_VALUE_TYPE
*r
)
9481 if (!fp_consts_inited
)
9484 for (i
= 0; i
< 8; i
++)
9485 if (REAL_VALUES_EQUAL (*r
, values_fp
[i
]))
9486 return strings_fp
[i
];
9491 /* Output the operands of a LDM/STM instruction to STREAM.
9492 MASK is the ARM register set mask of which only bits 0-15 are important.
9493 REG is the base register, either the frame pointer or the stack pointer,
9494 INSTR is the possibly suffixed load or store instruction.
9495 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9498 print_multi_reg (FILE *stream
, const char *instr
, unsigned reg
,
9499 unsigned long mask
, int rfe
)
9502 bool not_first
= FALSE
;
9504 gcc_assert (!rfe
|| (mask
& (1 << PC_REGNUM
)));
9505 fputc ('\t', stream
);
9506 asm_fprintf (stream
, instr
, reg
);
9507 fputc ('{', stream
);
9509 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
9510 if (mask
& (1 << i
))
9513 fprintf (stream
, ", ");
9515 asm_fprintf (stream
, "%r", i
);
9520 fprintf (stream
, "}^\n");
9522 fprintf (stream
, "}\n");
9526 /* Output a FLDMD instruction to STREAM.
9527 BASE if the register containing the address.
9528 REG and COUNT specify the register range.
9529 Extra registers may be added to avoid hardware bugs.
9531 We output FLDMD even for ARMv5 VFP implementations. Although
9532 FLDMD is technically not supported until ARMv6, it is believed
9533 that all VFP implementations support its use in this context. */
9536 vfp_output_fldmd (FILE * stream
, unsigned int base
, int reg
, int count
)
9540 /* Workaround ARM10 VFPr1 bug. */
9541 if (count
== 2 && !arm_arch6
)
9548 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9549 load into multiple parts if we have to handle more than 16 registers. */
9552 vfp_output_fldmd (stream
, base
, reg
, 16);
9553 vfp_output_fldmd (stream
, base
, reg
+ 16, count
- 16);
9557 fputc ('\t', stream
);
9558 asm_fprintf (stream
, "fldmfdd\t%r!, {", base
);
9560 for (i
= reg
; i
< reg
+ count
; i
++)
9563 fputs (", ", stream
);
9564 asm_fprintf (stream
, "d%d", i
);
9566 fputs ("}\n", stream
);
9571 /* Output the assembly for a store multiple. */
9574 vfp_output_fstmd (rtx
* operands
)
9581 strcpy (pattern
, "fstmfdd\t%m0!, {%P1");
9582 p
= strlen (pattern
);
9584 gcc_assert (GET_CODE (operands
[1]) == REG
);
9586 base
= (REGNO (operands
[1]) - FIRST_VFP_REGNUM
) / 2;
9587 for (i
= 1; i
< XVECLEN (operands
[2], 0); i
++)
9589 p
+= sprintf (&pattern
[p
], ", d%d", base
+ i
);
9591 strcpy (&pattern
[p
], "}");
9593 output_asm_insn (pattern
, operands
);
9598 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9599 number of bytes pushed. */
9602 vfp_emit_fstmd (int base_reg
, int count
)
9609 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9610 register pairs are stored by a store multiple insn. We avoid this
9611 by pushing an extra pair. */
9612 if (count
== 2 && !arm_arch6
)
9614 if (base_reg
== LAST_VFP_REGNUM
- 3)
9619 /* FSTMD may not store more than 16 doubleword registers at once. Split
9620 larger stores into multiple parts (up to a maximum of two, in
9625 /* NOTE: base_reg is an internal register number, so each D register
9627 saved
= vfp_emit_fstmd (base_reg
+ 32, count
- 16);
9628 saved
+= vfp_emit_fstmd (base_reg
, 16);
9632 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
9633 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
9635 reg
= gen_rtx_REG (DFmode
, base_reg
);
9639 = gen_rtx_SET (VOIDmode
,
9640 gen_frame_mem (BLKmode
,
9641 gen_rtx_PRE_DEC (BLKmode
,
9642 stack_pointer_rtx
)),
9643 gen_rtx_UNSPEC (BLKmode
,
9647 tmp
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
9648 plus_constant (stack_pointer_rtx
, -(count
* 8)));
9649 RTX_FRAME_RELATED_P (tmp
) = 1;
9650 XVECEXP (dwarf
, 0, 0) = tmp
;
9652 tmp
= gen_rtx_SET (VOIDmode
,
9653 gen_frame_mem (DFmode
, stack_pointer_rtx
),
9655 RTX_FRAME_RELATED_P (tmp
) = 1;
9656 XVECEXP (dwarf
, 0, 1) = tmp
;
9658 for (i
= 1; i
< count
; i
++)
9660 reg
= gen_rtx_REG (DFmode
, base_reg
);
9662 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
9664 tmp
= gen_rtx_SET (VOIDmode
,
9665 gen_frame_mem (DFmode
,
9666 plus_constant (stack_pointer_rtx
,
9669 RTX_FRAME_RELATED_P (tmp
) = 1;
9670 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
9673 par
= emit_insn (par
);
9674 REG_NOTES (par
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, dwarf
,
9676 RTX_FRAME_RELATED_P (par
) = 1;
9681 /* Emit a call instruction with pattern PAT. ADDR is the address of
9685 arm_emit_call_insn (rtx pat
, rtx addr
)
9689 insn
= emit_call_insn (pat
);
9691 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9692 If the call might use such an entry, add a use of the PIC register
9693 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9694 if (TARGET_VXWORKS_RTP
9696 && GET_CODE (addr
) == SYMBOL_REF
9697 && (SYMBOL_REF_DECL (addr
)
9698 ? !targetm
.binds_local_p (SYMBOL_REF_DECL (addr
))
9699 : !SYMBOL_REF_LOCAL_P (addr
)))
9701 require_pic_register ();
9702 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), cfun
->machine
->pic_reg
);
9706 /* Output a 'call' insn. */
9708 output_call (rtx
*operands
)
9710 gcc_assert (!arm_arch5
); /* Patterns should call blx <reg> directly. */
9712 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9713 if (REGNO (operands
[0]) == LR_REGNUM
)
9715 operands
[0] = gen_rtx_REG (SImode
, IP_REGNUM
);
9716 output_asm_insn ("mov%?\t%0, %|lr", operands
);
9719 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
9721 if (TARGET_INTERWORK
|| arm_arch4t
)
9722 output_asm_insn ("bx%?\t%0", operands
);
9724 output_asm_insn ("mov%?\t%|pc, %0", operands
);
9729 /* Output a 'call' insn that is a reference in memory. */
9731 output_call_mem (rtx
*operands
)
9733 if (TARGET_INTERWORK
&& !arm_arch5
)
9735 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
9736 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
9737 output_asm_insn ("bx%?\t%|ip", operands
);
9739 else if (regno_use_in (LR_REGNUM
, operands
[0]))
9741 /* LR is used in the memory address. We load the address in the
9742 first instruction. It's safe to use IP as the target of the
9743 load since the call will kill it anyway. */
9744 output_asm_insn ("ldr%?\t%|ip, %0", operands
);
9746 output_asm_insn ("blx%?\t%|ip", operands
);
9749 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
9751 output_asm_insn ("bx%?\t%|ip", operands
);
9753 output_asm_insn ("mov%?\t%|pc, %|ip", operands
);
9758 output_asm_insn ("mov%?\t%|lr, %|pc", operands
);
9759 output_asm_insn ("ldr%?\t%|pc, %0", operands
);
9766 /* Output a move from arm registers to an fpa registers.
9767 OPERANDS[0] is an fpa register.
9768 OPERANDS[1] is the first registers of an arm register pair. */
9770 output_mov_long_double_fpa_from_arm (rtx
*operands
)
9772 int arm_reg0
= REGNO (operands
[1]);
9775 gcc_assert (arm_reg0
!= IP_REGNUM
);
9777 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
9778 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
9779 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
9781 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
9782 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands
);
9787 /* Output a move from an fpa register to arm registers.
9788 OPERANDS[0] is the first registers of an arm register pair.
9789 OPERANDS[1] is an fpa register. */
9791 output_mov_long_double_arm_from_fpa (rtx
*operands
)
9793 int arm_reg0
= REGNO (operands
[0]);
9796 gcc_assert (arm_reg0
!= IP_REGNUM
);
9798 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
9799 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
9800 ops
[2] = gen_rtx_REG (SImode
, 2 + arm_reg0
);
9802 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands
);
9803 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops
);
9807 /* Output a move from arm registers to arm registers of a long double
9808 OPERANDS[0] is the destination.
9809 OPERANDS[1] is the source. */
9811 output_mov_long_double_arm_from_arm (rtx
*operands
)
9813 /* We have to be careful here because the two might overlap. */
9814 int dest_start
= REGNO (operands
[0]);
9815 int src_start
= REGNO (operands
[1]);
9819 if (dest_start
< src_start
)
9821 for (i
= 0; i
< 3; i
++)
9823 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
9824 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
9825 output_asm_insn ("mov%?\t%0, %1", ops
);
9830 for (i
= 2; i
>= 0; i
--)
9832 ops
[0] = gen_rtx_REG (SImode
, dest_start
+ i
);
9833 ops
[1] = gen_rtx_REG (SImode
, src_start
+ i
);
9834 output_asm_insn ("mov%?\t%0, %1", ops
);
9842 /* Output a move from arm registers to an fpa registers.
9843 OPERANDS[0] is an fpa register.
9844 OPERANDS[1] is the first registers of an arm register pair. */
9846 output_mov_double_fpa_from_arm (rtx
*operands
)
9848 int arm_reg0
= REGNO (operands
[1]);
9851 gcc_assert (arm_reg0
!= IP_REGNUM
);
9853 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
9854 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
9855 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops
);
9856 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands
);
9860 /* Output a move from an fpa register to arm registers.
9861 OPERANDS[0] is the first registers of an arm register pair.
9862 OPERANDS[1] is an fpa register. */
9864 output_mov_double_arm_from_fpa (rtx
*operands
)
9866 int arm_reg0
= REGNO (operands
[0]);
9869 gcc_assert (arm_reg0
!= IP_REGNUM
);
9871 ops
[0] = gen_rtx_REG (SImode
, arm_reg0
);
9872 ops
[1] = gen_rtx_REG (SImode
, 1 + arm_reg0
);
9873 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands
);
9874 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops
);
9878 /* Output a move between double words.
9879 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9880 or MEM<-REG and all MEMs must be offsettable addresses. */
9882 output_move_double (rtx
*operands
)
9884 enum rtx_code code0
= GET_CODE (operands
[0]);
9885 enum rtx_code code1
= GET_CODE (operands
[1]);
9890 int reg0
= REGNO (operands
[0]);
9892 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
9894 gcc_assert (code1
== MEM
); /* Constraints should ensure this. */
9896 switch (GET_CODE (XEXP (operands
[1], 0)))
9900 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands
);
9902 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
9906 gcc_assert (TARGET_LDRD
);
9907 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands
);
9912 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands
);
9914 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands
);
9919 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands
);
9921 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands
);
9925 gcc_assert (TARGET_LDRD
);
9926 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands
);
9931 otherops
[0] = operands
[0];
9932 otherops
[1] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 0);
9933 otherops
[2] = XEXP (XEXP (XEXP (operands
[1], 0), 1), 1);
9935 if (GET_CODE (XEXP (operands
[1], 0)) == PRE_MODIFY
)
9937 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
9939 /* Registers overlap so split out the increment. */
9940 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
9941 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops
);
9945 /* IWMMXT allows offsets larger than ldrd can handle,
9946 fix these up with a pair of ldr. */
9947 if (GET_CODE (otherops
[2]) == CONST_INT
9948 && (INTVAL(otherops
[2]) <= -256
9949 || INTVAL(otherops
[2]) >= 256))
9951 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
9952 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
9953 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops
);
9956 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops
);
9961 /* IWMMXT allows offsets larger than ldrd can handle,
9962 fix these up with a pair of ldr. */
9963 if (GET_CODE (otherops
[2]) == CONST_INT
9964 && (INTVAL(otherops
[2]) <= -256
9965 || INTVAL(otherops
[2]) >= 256))
9967 otherops
[0] = gen_rtx_REG (SImode
, 1 + reg0
);
9968 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops
);
9969 otherops
[0] = operands
[0];
9970 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
9973 /* We only allow constant increments, so this is safe. */
9974 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops
);
9980 /* We might be able to use ldrd %0, %1 here. However the range is
9981 different to ldr/adr, and it is broken on some ARMv7-M
9983 output_asm_insn ("adr%?\t%0, %1", operands
);
9985 output_asm_insn ("ldr%(d%)\t%0, [%0]", operands
);
9987 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
9990 /* ??? This needs checking for thumb2. */
9992 if (arm_add_operand (XEXP (XEXP (operands
[1], 0), 1),
9993 GET_MODE (XEXP (XEXP (operands
[1], 0), 1))))
9995 otherops
[0] = operands
[0];
9996 otherops
[1] = XEXP (XEXP (operands
[1], 0), 0);
9997 otherops
[2] = XEXP (XEXP (operands
[1], 0), 1);
9999 if (GET_CODE (XEXP (operands
[1], 0)) == PLUS
)
10001 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
10003 switch ((int) INTVAL (otherops
[2]))
10006 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops
);
10011 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops
);
10016 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops
);
10021 && (GET_CODE (otherops
[2]) == REG
10022 || (GET_CODE (otherops
[2]) == CONST_INT
10023 && INTVAL (otherops
[2]) > -256
10024 && INTVAL (otherops
[2]) < 256)))
10026 if (reg_overlap_mentioned_p (otherops
[0],
10029 /* Swap base and index registers over to
10030 avoid a conflict. */
10031 otherops
[1] = XEXP (XEXP (operands
[1], 0), 1);
10032 otherops
[2] = XEXP (XEXP (operands
[1], 0), 0);
10034 /* If both registers conflict, it will usually
10035 have been fixed by a splitter. */
10036 if (reg_overlap_mentioned_p (otherops
[0], otherops
[2]))
10038 output_asm_insn ("add%?\t%1, %1, %2", otherops
);
10039 output_asm_insn ("ldr%(d%)\t%0, [%1]",
10043 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops
);
10047 if (GET_CODE (otherops
[2]) == CONST_INT
)
10049 if (!(const_ok_for_arm (INTVAL (otherops
[2]))))
10050 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops
);
10052 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
10055 output_asm_insn ("add%?\t%0, %1, %2", otherops
);
10058 output_asm_insn ("sub%?\t%0, %1, %2", otherops
);
10061 return "ldr%(d%)\t%0, [%0]";
10063 return "ldm%(ia%)\t%0, %M0";
10067 otherops
[1] = adjust_address (operands
[1], SImode
, 4);
10068 /* Take care of overlapping base/data reg. */
10069 if (reg_mentioned_p (operands
[0], operands
[1]))
10071 output_asm_insn ("ldr%?\t%0, %1", otherops
);
10072 output_asm_insn ("ldr%?\t%0, %1", operands
);
10076 output_asm_insn ("ldr%?\t%0, %1", operands
);
10077 output_asm_insn ("ldr%?\t%0, %1", otherops
);
10084 /* Constraints should ensure this. */
10085 gcc_assert (code0
== MEM
&& code1
== REG
);
10086 gcc_assert (REGNO (operands
[1]) != IP_REGNUM
);
10088 switch (GET_CODE (XEXP (operands
[0], 0)))
10092 output_asm_insn ("str%(d%)\t%1, [%m0]", operands
);
10094 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
10098 gcc_assert (TARGET_LDRD
);
10099 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands
);
10104 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands
);
10106 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands
);
10111 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands
);
10113 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands
);
10117 gcc_assert (TARGET_LDRD
);
10118 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands
);
10123 otherops
[0] = operands
[1];
10124 otherops
[1] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 0);
10125 otherops
[2] = XEXP (XEXP (XEXP (operands
[0], 0), 1), 1);
10127 /* IWMMXT allows offsets larger than ldrd can handle,
10128 fix these up with a pair of ldr. */
10129 if (GET_CODE (otherops
[2]) == CONST_INT
10130 && (INTVAL(otherops
[2]) <= -256
10131 || INTVAL(otherops
[2]) >= 256))
10134 reg1
= gen_rtx_REG (SImode
, 1 + REGNO (operands
[1]));
10135 if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
10137 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops
);
10138 otherops
[0] = reg1
;
10139 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops
);
10143 otherops
[0] = reg1
;
10144 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops
);
10145 otherops
[0] = operands
[1];
10146 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops
);
10149 else if (GET_CODE (XEXP (operands
[0], 0)) == PRE_MODIFY
)
10150 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops
);
10152 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops
);
10156 otherops
[2] = XEXP (XEXP (operands
[0], 0), 1);
10157 if (GET_CODE (otherops
[2]) == CONST_INT
&& !TARGET_LDRD
)
10159 switch ((int) INTVAL (XEXP (XEXP (operands
[0], 0), 1)))
10162 output_asm_insn ("stm%(db%)\t%m0, %M1", operands
);
10168 output_asm_insn ("stm%(da%)\t%m0, %M1", operands
);
10174 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands
);
10179 && (GET_CODE (otherops
[2]) == REG
10180 || (GET_CODE (otherops
[2]) == CONST_INT
10181 && INTVAL (otherops
[2]) > -256
10182 && INTVAL (otherops
[2]) < 256)))
10184 otherops
[0] = operands
[1];
10185 otherops
[1] = XEXP (XEXP (operands
[0], 0), 0);
10186 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops
);
10192 otherops
[0] = adjust_address (operands
[0], SImode
, 4);
10193 otherops
[1] = gen_rtx_REG (SImode
, 1 + REGNO (operands
[1]));
10194 output_asm_insn ("str%?\t%1, %0", operands
);
10195 output_asm_insn ("str%?\t%1, %0", otherops
);
10202 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10203 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10206 output_move_quad (rtx
*operands
)
10208 if (REG_P (operands
[0]))
10210 /* Load, or reg->reg move. */
10212 if (MEM_P (operands
[1]))
10214 switch (GET_CODE (XEXP (operands
[1], 0)))
10217 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands
);
10222 output_asm_insn ("adr%?\t%0, %1", operands
);
10223 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands
);
10227 gcc_unreachable ();
10235 gcc_assert (REG_P (operands
[1]));
10237 dest
= REGNO (operands
[0]);
10238 src
= REGNO (operands
[1]);
10240 /* This seems pretty dumb, but hopefully GCC won't try to do it
10243 for (i
= 0; i
< 4; i
++)
10245 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
10246 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
10247 output_asm_insn ("mov%?\t%0, %1", ops
);
10250 for (i
= 3; i
>= 0; i
--)
10252 ops
[0] = gen_rtx_REG (SImode
, dest
+ i
);
10253 ops
[1] = gen_rtx_REG (SImode
, src
+ i
);
10254 output_asm_insn ("mov%?\t%0, %1", ops
);
10260 gcc_assert (MEM_P (operands
[0]));
10261 gcc_assert (REG_P (operands
[1]));
10262 gcc_assert (!reg_overlap_mentioned_p (operands
[1], operands
[0]));
10264 switch (GET_CODE (XEXP (operands
[0], 0)))
10267 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands
);
10271 gcc_unreachable ();
10278 /* Output a VFP load or store instruction. */
10281 output_move_vfp (rtx
*operands
)
10283 rtx reg
, mem
, addr
, ops
[2];
10284 int load
= REG_P (operands
[0]);
10285 int dp
= GET_MODE_SIZE (GET_MODE (operands
[0])) == 8;
10286 int integer_p
= GET_MODE_CLASS (GET_MODE (operands
[0])) == MODE_INT
;
10287 const char *template;
10289 enum machine_mode mode
;
10291 reg
= operands
[!load
];
10292 mem
= operands
[load
];
10294 mode
= GET_MODE (reg
);
10296 gcc_assert (REG_P (reg
));
10297 gcc_assert (IS_VFP_REGNUM (REGNO (reg
)));
10298 gcc_assert (mode
== SFmode
10302 || (TARGET_NEON
&& VALID_NEON_DREG_MODE (mode
)));
10303 gcc_assert (MEM_P (mem
));
10305 addr
= XEXP (mem
, 0);
10307 switch (GET_CODE (addr
))
10310 template = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10311 ops
[0] = XEXP (addr
, 0);
10316 template = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10317 ops
[0] = XEXP (addr
, 0);
10322 template = "f%s%c%%?\t%%%s0, %%1%s";
10328 sprintf (buff
, template,
10329 load
? "ld" : "st",
10332 integer_p
? "\t%@ int" : "");
10333 output_asm_insn (buff
, ops
);
10338 /* Output a Neon quad-word load or store, or a load or store for
10339 larger structure modes. We could also support post-modify forms using
10340 VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
10342 WARNING: The ordering of elements in memory is weird in big-endian mode,
10343 because we use VSTM instead of VST1, to make it easy to make vector stores
10344 via ARM registers write values in the same order as stores direct from Neon
10345 registers. For example, the byte ordering of a quadword vector with 16-byte
10346 elements like this:
10348 [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
10350 will be (with lowest address first, h = most-significant byte,
10351 l = least-significant byte of element):
10353 [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
10354 e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
10356 When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
10359 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10361 So that STM/LDM can be used on vectors in ARM registers, and the same memory
10362 layout will result as if VSTM/VLDM were used. */
10365 output_move_neon (rtx
*operands
)
10367 rtx reg
, mem
, addr
, ops
[2];
10368 int regno
, load
= REG_P (operands
[0]);
10369 const char *template;
10371 enum machine_mode mode
;
10373 reg
= operands
[!load
];
10374 mem
= operands
[load
];
10376 mode
= GET_MODE (reg
);
10378 gcc_assert (REG_P (reg
));
10379 regno
= REGNO (reg
);
10380 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno
)
10381 || NEON_REGNO_OK_FOR_QUAD (regno
));
10382 gcc_assert (VALID_NEON_DREG_MODE (mode
)
10383 || VALID_NEON_QREG_MODE (mode
)
10384 || VALID_NEON_STRUCT_MODE (mode
));
10385 gcc_assert (MEM_P (mem
));
10387 addr
= XEXP (mem
, 0);
10389 /* Strip off const from addresses like (const (plus (...))). */
10390 if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
)
10391 addr
= XEXP (addr
, 0);
10393 switch (GET_CODE (addr
))
10396 template = "v%smia%%?\t%%0!, %%h1";
10397 ops
[0] = XEXP (addr
, 0);
10402 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10403 gcc_unreachable ();
10408 int nregs
= HARD_REGNO_NREGS (REGNO (reg
), mode
) / 2;
10411 for (i
= 0; i
< nregs
; i
++)
10413 /* We're only using DImode here because it's a convenient size. */
10414 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * i
);
10415 ops
[1] = adjust_address (mem
, SImode
, 8 * i
);
10416 if (reg_overlap_mentioned_p (ops
[0], mem
))
10418 gcc_assert (overlap
== -1);
10423 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
10424 output_asm_insn (buff
, ops
);
10429 ops
[0] = gen_rtx_REG (DImode
, REGNO (reg
) + 2 * overlap
);
10430 ops
[1] = adjust_address (mem
, SImode
, 8 * overlap
);
10431 sprintf (buff
, "v%sr%%?\t%%P0, %%1", load
? "ld" : "st");
10432 output_asm_insn (buff
, ops
);
10439 template = "v%smia%%?\t%%m0, %%h1";
10444 sprintf (buff
, template, load
? "ld" : "st");
10445 output_asm_insn (buff
, ops
);
10450 /* Output an ADD r, s, #n where n may be too big for one instruction.
10451 If adding zero to one register, output nothing. */
10453 output_add_immediate (rtx
*operands
)
10455 HOST_WIDE_INT n
= INTVAL (operands
[2]);
10457 if (n
!= 0 || REGNO (operands
[0]) != REGNO (operands
[1]))
10460 output_multi_immediate (operands
,
10461 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10464 output_multi_immediate (operands
,
10465 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10472 /* Output a multiple immediate operation.
10473 OPERANDS is the vector of operands referred to in the output patterns.
10474 INSTR1 is the output pattern to use for the first constant.
10475 INSTR2 is the output pattern to use for subsequent constants.
10476 IMMED_OP is the index of the constant slot in OPERANDS.
10477 N is the constant value. */
10478 static const char *
10479 output_multi_immediate (rtx
*operands
, const char *instr1
, const char *instr2
,
10480 int immed_op
, HOST_WIDE_INT n
)
10482 #if HOST_BITS_PER_WIDE_INT > 32
10488 /* Quick and easy output. */
10489 operands
[immed_op
] = const0_rtx
;
10490 output_asm_insn (instr1
, operands
);
10495 const char * instr
= instr1
;
10497 /* Note that n is never zero here (which would give no output). */
10498 for (i
= 0; i
< 32; i
+= 2)
10502 operands
[immed_op
] = GEN_INT (n
& (255 << i
));
10503 output_asm_insn (instr
, operands
);
10513 /* Return the name of a shifter operation. */
10514 static const char *
10515 arm_shift_nmem(enum rtx_code code
)
10520 return ARM_LSL_NAME
;
10536 /* Return the appropriate ARM instruction for the operation code.
10537 The returned result should not be overwritten. OP is the rtx of the
10538 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10541 arithmetic_instr (rtx op
, int shift_first_arg
)
10543 switch (GET_CODE (op
))
10549 return shift_first_arg
? "rsb" : "sub";
10564 return arm_shift_nmem(GET_CODE(op
));
10567 gcc_unreachable ();
10571 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10572 for the operation code. The returned result should not be overwritten.
10573 OP is the rtx code of the shift.
10574 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10576 static const char *
10577 shift_op (rtx op
, HOST_WIDE_INT
*amountp
)
10580 enum rtx_code code
= GET_CODE (op
);
10582 switch (GET_CODE (XEXP (op
, 1)))
10590 *amountp
= INTVAL (XEXP (op
, 1));
10594 gcc_unreachable ();
10600 gcc_assert (*amountp
!= -1);
10601 *amountp
= 32 - *amountp
;
10604 /* Fall through. */
10610 mnem
= arm_shift_nmem(code
);
10614 /* We never have to worry about the amount being other than a
10615 power of 2, since this case can never be reloaded from a reg. */
10616 gcc_assert (*amountp
!= -1);
10617 *amountp
= int_log2 (*amountp
);
10618 return ARM_LSL_NAME
;
10621 gcc_unreachable ();
10624 if (*amountp
!= -1)
10626 /* This is not 100% correct, but follows from the desire to merge
10627 multiplication by a power of 2 with the recognizer for a
10628 shift. >=32 is not a valid shift for "lsl", so we must try and
10629 output a shift that produces the correct arithmetical result.
10630 Using lsr #32 is identical except for the fact that the carry bit
10631 is not set correctly if we set the flags; but we never use the
10632 carry bit from such an operation, so we can ignore that. */
10633 if (code
== ROTATERT
)
10634 /* Rotate is just modulo 32. */
10636 else if (*amountp
!= (*amountp
& 31))
10638 if (code
== ASHIFT
)
10643 /* Shifts of 0 are no-ops. */
10651 /* Obtain the shift from the POWER of two. */
10653 static HOST_WIDE_INT
10654 int_log2 (HOST_WIDE_INT power
)
10656 HOST_WIDE_INT shift
= 0;
10658 while ((((HOST_WIDE_INT
) 1 << shift
) & power
) == 0)
10660 gcc_assert (shift
<= 31);
10667 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10668 because /bin/as is horribly restrictive. The judgement about
10669 whether or not each character is 'printable' (and can be output as
10670 is) or not (and must be printed with an octal escape) must be made
10671 with reference to the *host* character set -- the situation is
10672 similar to that discussed in the comments above pp_c_char in
10673 c-pretty-print.c. */
10675 #define MAX_ASCII_LEN 51
10678 output_ascii_pseudo_op (FILE *stream
, const unsigned char *p
, int len
)
10681 int len_so_far
= 0;
10683 fputs ("\t.ascii\t\"", stream
);
10685 for (i
= 0; i
< len
; i
++)
10689 if (len_so_far
>= MAX_ASCII_LEN
)
10691 fputs ("\"\n\t.ascii\t\"", stream
);
10697 if (c
== '\\' || c
== '\"')
10699 putc ('\\', stream
);
10707 fprintf (stream
, "\\%03o", c
);
10712 fputs ("\"\n", stream
);
10715 /* Compute the register save mask for registers 0 through 12
10716 inclusive. This code is used by arm_compute_save_reg_mask. */
10718 static unsigned long
10719 arm_compute_save_reg0_reg12_mask (void)
10721 unsigned long func_type
= arm_current_func_type ();
10722 unsigned long save_reg_mask
= 0;
10725 if (IS_INTERRUPT (func_type
))
10727 unsigned int max_reg
;
10728 /* Interrupt functions must not corrupt any registers,
10729 even call clobbered ones. If this is a leaf function
10730 we can just examine the registers used by the RTL, but
10731 otherwise we have to assume that whatever function is
10732 called might clobber anything, and so we have to save
10733 all the call-clobbered registers as well. */
10734 if (ARM_FUNC_TYPE (func_type
) == ARM_FT_FIQ
)
10735 /* FIQ handlers have registers r8 - r12 banked, so
10736 we only need to check r0 - r7, Normal ISRs only
10737 bank r14 and r15, so we must check up to r12.
10738 r13 is the stack pointer which is always preserved,
10739 so we do not need to consider it here. */
10744 for (reg
= 0; reg
<= max_reg
; reg
++)
10745 if (df_regs_ever_live_p (reg
)
10746 || (! current_function_is_leaf
&& call_used_regs
[reg
]))
10747 save_reg_mask
|= (1 << reg
);
10749 /* Also save the pic base register if necessary. */
10751 && !TARGET_SINGLE_PIC_BASE
10752 && arm_pic_register
!= INVALID_REGNUM
10753 && crtl
->uses_pic_offset_table
)
10754 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
10758 /* In the normal case we only need to save those registers
10759 which are call saved and which are used by this function. */
10760 for (reg
= 0; reg
<= 11; reg
++)
10761 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
10762 save_reg_mask
|= (1 << reg
);
10764 /* Handle the frame pointer as a special case. */
10765 if (frame_pointer_needed
)
10766 save_reg_mask
|= 1 << HARD_FRAME_POINTER_REGNUM
;
10768 /* If we aren't loading the PIC register,
10769 don't stack it even though it may be live. */
10771 && !TARGET_SINGLE_PIC_BASE
10772 && arm_pic_register
!= INVALID_REGNUM
10773 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
)
10774 || crtl
->uses_pic_offset_table
))
10775 save_reg_mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
10777 /* The prologue will copy SP into R0, so save it. */
10778 if (IS_STACKALIGN (func_type
))
10779 save_reg_mask
|= 1;
10782 /* Save registers so the exception handler can modify them. */
10783 if (crtl
->calls_eh_return
)
10789 reg
= EH_RETURN_DATA_REGNO (i
);
10790 if (reg
== INVALID_REGNUM
)
10792 save_reg_mask
|= 1 << reg
;
10796 return save_reg_mask
;
10800 /* Compute a bit mask of which registers need to be
10801 saved on the stack for the current function.
10802 This is used by arm_get_frame_offsets, which may add extra registers. */
10804 static unsigned long
10805 arm_compute_save_reg_mask (void)
10807 unsigned int save_reg_mask
= 0;
10808 unsigned long func_type
= arm_current_func_type ();
10811 if (IS_NAKED (func_type
))
10812 /* This should never really happen. */
10815 /* If we are creating a stack frame, then we must save the frame pointer,
10816 IP (which will hold the old stack pointer), LR and the PC. */
10817 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
10819 (1 << ARM_HARD_FRAME_POINTER_REGNUM
)
10822 | (1 << PC_REGNUM
);
10824 /* Volatile functions do not return, so there
10825 is no need to save any other registers. */
10826 if (IS_VOLATILE (func_type
))
10827 return save_reg_mask
;
10829 save_reg_mask
|= arm_compute_save_reg0_reg12_mask ();
10831 /* Decide if we need to save the link register.
10832 Interrupt routines have their own banked link register,
10833 so they never need to save it.
10834 Otherwise if we do not use the link register we do not need to save
10835 it. If we are pushing other registers onto the stack however, we
10836 can save an instruction in the epilogue by pushing the link register
10837 now and then popping it back into the PC. This incurs extra memory
10838 accesses though, so we only do it when optimizing for size, and only
10839 if we know that we will not need a fancy return sequence. */
10840 if (df_regs_ever_live_p (LR_REGNUM
)
10843 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
10844 && !crtl
->calls_eh_return
))
10845 save_reg_mask
|= 1 << LR_REGNUM
;
10847 if (cfun
->machine
->lr_save_eliminated
)
10848 save_reg_mask
&= ~ (1 << LR_REGNUM
);
10850 if (TARGET_REALLY_IWMMXT
10851 && ((bit_count (save_reg_mask
)
10852 + ARM_NUM_INTS (crtl
->args
.pretend_args_size
)) % 2) != 0)
10854 /* The total number of registers that are going to be pushed
10855 onto the stack is odd. We need to ensure that the stack
10856 is 64-bit aligned before we start to save iWMMXt registers,
10857 and also before we start to create locals. (A local variable
10858 might be a double or long long which we will load/store using
10859 an iWMMXt instruction). Therefore we need to push another
10860 ARM register, so that the stack will be 64-bit aligned. We
10861 try to avoid using the arg registers (r0 -r3) as they might be
10862 used to pass values in a tail call. */
10863 for (reg
= 4; reg
<= 12; reg
++)
10864 if ((save_reg_mask
& (1 << reg
)) == 0)
10868 save_reg_mask
|= (1 << reg
);
10871 cfun
->machine
->sibcall_blocked
= 1;
10872 save_reg_mask
|= (1 << 3);
10876 /* We may need to push an additional register for use initializing the
10877 PIC base register. */
10878 if (TARGET_THUMB2
&& IS_NESTED (func_type
) && flag_pic
10879 && (save_reg_mask
& THUMB2_WORK_REGS
) == 0)
10881 reg
= thumb_find_work_register (1 << 4);
10882 if (!call_used_regs
[reg
])
10883 save_reg_mask
|= (1 << reg
);
10886 return save_reg_mask
;
10890 /* Compute a bit mask of which registers need to be
10891 saved on the stack for the current function. */
10892 static unsigned long
10893 thumb1_compute_save_reg_mask (void)
10895 unsigned long mask
;
10899 for (reg
= 0; reg
< 12; reg
++)
10900 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
10904 && !TARGET_SINGLE_PIC_BASE
10905 && arm_pic_register
!= INVALID_REGNUM
10906 && crtl
->uses_pic_offset_table
)
10907 mask
|= 1 << PIC_OFFSET_TABLE_REGNUM
;
10909 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
10910 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
10911 mask
|= 1 << ARM_HARD_FRAME_POINTER_REGNUM
;
10913 /* LR will also be pushed if any lo regs are pushed. */
10914 if (mask
& 0xff || thumb_force_lr_save ())
10915 mask
|= (1 << LR_REGNUM
);
10917 /* Make sure we have a low work register if we need one.
10918 We will need one if we are going to push a high register,
10919 but we are not currently intending to push a low register. */
10920 if ((mask
& 0xff) == 0
10921 && ((mask
& 0x0f00) || TARGET_BACKTRACE
))
10923 /* Use thumb_find_work_register to choose which register
10924 we will use. If the register is live then we will
10925 have to push it. Use LAST_LO_REGNUM as our fallback
10926 choice for the register to select. */
10927 reg
= thumb_find_work_register (1 << LAST_LO_REGNUM
);
10928 /* Make sure the register returned by thumb_find_work_register is
10929 not part of the return value. */
10930 if (reg
* UNITS_PER_WORD
<= (unsigned) arm_size_return_regs ())
10931 reg
= LAST_LO_REGNUM
;
10933 if (! call_used_regs
[reg
])
10941 /* Return the number of bytes required to save VFP registers. */
10943 arm_get_vfp_saved_size (void)
10945 unsigned int regno
;
10950 /* Space for saved VFP registers. */
10951 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
10954 for (regno
= FIRST_VFP_REGNUM
;
10955 regno
< LAST_VFP_REGNUM
;
10958 if ((!df_regs_ever_live_p (regno
) || call_used_regs
[regno
])
10959 && (!df_regs_ever_live_p (regno
+ 1) || call_used_regs
[regno
+ 1]))
10963 /* Workaround ARM10 VFPr1 bug. */
10964 if (count
== 2 && !arm_arch6
)
10966 saved
+= count
* 8;
10975 if (count
== 2 && !arm_arch6
)
10977 saved
+= count
* 8;
10984 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
10985 everything bar the final return instruction. */
10987 output_return_instruction (rtx operand
, int really_return
, int reverse
)
10989 char conditional
[10];
10992 unsigned long live_regs_mask
;
10993 unsigned long func_type
;
10994 arm_stack_offsets
*offsets
;
10996 func_type
= arm_current_func_type ();
10998 if (IS_NAKED (func_type
))
11001 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
11003 /* If this function was declared non-returning, and we have
11004 found a tail call, then we have to trust that the called
11005 function won't return. */
11010 /* Otherwise, trap an attempted return by aborting. */
11012 ops
[1] = gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)"
11014 assemble_external_libcall (ops
[1]);
11015 output_asm_insn (reverse
? "bl%D0\t%a1" : "bl%d0\t%a1", ops
);
11021 gcc_assert (!cfun
->calls_alloca
|| really_return
);
11023 sprintf (conditional
, "%%?%%%c0", reverse
? 'D' : 'd');
11025 return_used_this_function
= 1;
11027 offsets
= arm_get_frame_offsets ();
11028 live_regs_mask
= offsets
->saved_regs_mask
;
11030 if (live_regs_mask
)
11032 const char * return_reg
;
11034 /* If we do not have any special requirements for function exit
11035 (e.g. interworking) then we can load the return address
11036 directly into the PC. Otherwise we must load it into LR. */
11038 && (IS_INTERRUPT (func_type
) || !TARGET_INTERWORK
))
11039 return_reg
= reg_names
[PC_REGNUM
];
11041 return_reg
= reg_names
[LR_REGNUM
];
11043 if ((live_regs_mask
& (1 << IP_REGNUM
)) == (1 << IP_REGNUM
))
11045 /* There are three possible reasons for the IP register
11046 being saved. 1) a stack frame was created, in which case
11047 IP contains the old stack pointer, or 2) an ISR routine
11048 corrupted it, or 3) it was saved to align the stack on
11049 iWMMXt. In case 1, restore IP into SP, otherwise just
11051 if (frame_pointer_needed
)
11053 live_regs_mask
&= ~ (1 << IP_REGNUM
);
11054 live_regs_mask
|= (1 << SP_REGNUM
);
11057 gcc_assert (IS_INTERRUPT (func_type
) || TARGET_REALLY_IWMMXT
);
11060 /* On some ARM architectures it is faster to use LDR rather than
11061 LDM to load a single register. On other architectures, the
11062 cost is the same. In 26 bit mode, or for exception handlers,
11063 we have to use LDM to load the PC so that the CPSR is also
11065 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
11066 if (live_regs_mask
== (1U << reg
))
11069 if (reg
<= LAST_ARM_REGNUM
11070 && (reg
!= LR_REGNUM
11072 || ! IS_INTERRUPT (func_type
)))
11074 sprintf (instr
, "ldr%s\t%%|%s, [%%|sp], #4", conditional
,
11075 (reg
== LR_REGNUM
) ? return_reg
: reg_names
[reg
]);
11082 /* Generate the load multiple instruction to restore the
11083 registers. Note we can get here, even if
11084 frame_pointer_needed is true, but only if sp already
11085 points to the base of the saved core registers. */
11086 if (live_regs_mask
& (1 << SP_REGNUM
))
11088 unsigned HOST_WIDE_INT stack_adjust
;
11090 stack_adjust
= offsets
->outgoing_args
- offsets
->saved_regs
;
11091 gcc_assert (stack_adjust
== 0 || stack_adjust
== 4);
11093 if (stack_adjust
&& arm_arch5
&& TARGET_ARM
)
11094 sprintf (instr
, "ldm%sib\t%%|sp, {", conditional
);
11097 /* If we can't use ldmib (SA110 bug),
11098 then try to pop r3 instead. */
11100 live_regs_mask
|= 1 << 3;
11101 sprintf (instr
, "ldm%sfd\t%%|sp, {", conditional
);
11105 sprintf (instr
, "ldm%sfd\t%%|sp!, {", conditional
);
11107 p
= instr
+ strlen (instr
);
11109 for (reg
= 0; reg
<= SP_REGNUM
; reg
++)
11110 if (live_regs_mask
& (1 << reg
))
11112 int l
= strlen (reg_names
[reg
]);
11118 memcpy (p
, ", ", 2);
11122 memcpy (p
, "%|", 2);
11123 memcpy (p
+ 2, reg_names
[reg
], l
);
11127 if (live_regs_mask
& (1 << LR_REGNUM
))
11129 sprintf (p
, "%s%%|%s}", first
? "" : ", ", return_reg
);
11130 /* If returning from an interrupt, restore the CPSR. */
11131 if (IS_INTERRUPT (func_type
))
11138 output_asm_insn (instr
, & operand
);
11140 /* See if we need to generate an extra instruction to
11141 perform the actual function return. */
11143 && func_type
!= ARM_FT_INTERWORKED
11144 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0)
11146 /* The return has already been handled
11147 by loading the LR into the PC. */
11154 switch ((int) ARM_FUNC_TYPE (func_type
))
11158 /* ??? This is wrong for unified assembly syntax. */
11159 sprintf (instr
, "sub%ss\t%%|pc, %%|lr, #4", conditional
);
11162 case ARM_FT_INTERWORKED
:
11163 sprintf (instr
, "bx%s\t%%|lr", conditional
);
11166 case ARM_FT_EXCEPTION
:
11167 /* ??? This is wrong for unified assembly syntax. */
11168 sprintf (instr
, "mov%ss\t%%|pc, %%|lr", conditional
);
11172 /* Use bx if it's available. */
11173 if (arm_arch5
|| arm_arch4t
)
11174 sprintf (instr
, "bx%s\t%%|lr", conditional
);
11176 sprintf (instr
, "mov%s\t%%|pc, %%|lr", conditional
);
11180 output_asm_insn (instr
, & operand
);
11186 /* Write the function name into the code section, directly preceding
11187 the function prologue.
11189 Code will be output similar to this:
11191 .ascii "arm_poke_function_name", 0
11194 .word 0xff000000 + (t1 - t0)
11195 arm_poke_function_name
11197 stmfd sp!, {fp, ip, lr, pc}
11200 When performing a stack backtrace, code can inspect the value
11201 of 'pc' stored at 'fp' + 0. If the trace function then looks
11202 at location pc - 12 and the top 8 bits are set, then we know
11203 that there is a function name embedded immediately preceding this
11204 location and has length ((pc[-3]) & 0xff000000).
11206 We assume that pc is declared as a pointer to an unsigned long.
11208 It is of no benefit to output the function name if we are assembling
11209 a leaf function. These function types will not contain a stack
11210 backtrace structure, therefore it is not possible to determine the
11213 arm_poke_function_name (FILE *stream
, const char *name
)
11215 unsigned long alignlength
;
11216 unsigned long length
;
11219 length
= strlen (name
) + 1;
11220 alignlength
= ROUND_UP_WORD (length
);
11222 ASM_OUTPUT_ASCII (stream
, name
, length
);
11223 ASM_OUTPUT_ALIGN (stream
, 2);
11224 x
= GEN_INT ((unsigned HOST_WIDE_INT
) 0xff000000 + alignlength
);
11225 assemble_aligned_integer (UNITS_PER_WORD
, x
);
11228 /* Place some comments into the assembler stream
11229 describing the current function. */
11231 arm_output_function_prologue (FILE *f
, HOST_WIDE_INT frame_size
)
11233 unsigned long func_type
;
11237 thumb1_output_function_prologue (f
, frame_size
);
11241 /* Sanity check. */
11242 gcc_assert (!arm_ccfsm_state
&& !arm_target_insn
);
11244 func_type
= arm_current_func_type ();
11246 switch ((int) ARM_FUNC_TYPE (func_type
))
11249 case ARM_FT_NORMAL
:
11251 case ARM_FT_INTERWORKED
:
11252 asm_fprintf (f
, "\t%@ Function supports interworking.\n");
11255 asm_fprintf (f
, "\t%@ Interrupt Service Routine.\n");
11258 asm_fprintf (f
, "\t%@ Fast Interrupt Service Routine.\n");
11260 case ARM_FT_EXCEPTION
:
11261 asm_fprintf (f
, "\t%@ ARM Exception Handler.\n");
11265 if (IS_NAKED (func_type
))
11266 asm_fprintf (f
, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11268 if (IS_VOLATILE (func_type
))
11269 asm_fprintf (f
, "\t%@ Volatile: function does not return.\n");
11271 if (IS_NESTED (func_type
))
11272 asm_fprintf (f
, "\t%@ Nested: function declared inside another function.\n");
11273 if (IS_STACKALIGN (func_type
))
11274 asm_fprintf (f
, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11276 asm_fprintf (f
, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11278 crtl
->args
.pretend_args_size
, frame_size
);
11280 asm_fprintf (f
, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11281 frame_pointer_needed
,
11282 cfun
->machine
->uses_anonymous_args
);
11284 if (cfun
->machine
->lr_save_eliminated
)
11285 asm_fprintf (f
, "\t%@ link register save eliminated.\n");
11287 if (crtl
->calls_eh_return
)
11288 asm_fprintf (f
, "\t@ Calls __builtin_eh_return.\n");
11290 return_used_this_function
= 0;
11294 arm_output_epilogue (rtx sibling
)
11297 unsigned long saved_regs_mask
;
11298 unsigned long func_type
;
11299 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11300 frame that is $fp + 4 for a non-variadic function. */
11301 int floats_offset
= 0;
11303 FILE * f
= asm_out_file
;
11304 unsigned int lrm_count
= 0;
11305 int really_return
= (sibling
== NULL
);
11307 arm_stack_offsets
*offsets
;
11309 /* If we have already generated the return instruction
11310 then it is futile to generate anything else. */
11311 if (use_return_insn (FALSE
, sibling
) && return_used_this_function
)
11314 func_type
= arm_current_func_type ();
11316 if (IS_NAKED (func_type
))
11317 /* Naked functions don't have epilogues. */
11320 if (IS_VOLATILE (func_type
) && TARGET_ABORT_NORETURN
)
11324 /* A volatile function should never return. Call abort. */
11325 op
= gen_rtx_SYMBOL_REF (Pmode
, NEED_PLT_RELOC
? "abort(PLT)" : "abort");
11326 assemble_external_libcall (op
);
11327 output_asm_insn ("bl\t%a0", &op
);
11332 /* If we are throwing an exception, then we really must be doing a
11333 return, so we can't tail-call. */
11334 gcc_assert (!crtl
->calls_eh_return
|| really_return
);
11336 offsets
= arm_get_frame_offsets ();
11337 saved_regs_mask
= offsets
->saved_regs_mask
;
11340 lrm_count
= bit_count (saved_regs_mask
);
11342 floats_offset
= offsets
->saved_args
;
11343 /* Compute how far away the floats will be. */
11344 for (reg
= 0; reg
<= LAST_ARM_REGNUM
; reg
++)
11345 if (saved_regs_mask
& (1 << reg
))
11346 floats_offset
+= 4;
11348 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
11350 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11351 int vfp_offset
= offsets
->frame
;
11353 if (arm_fpu_arch
== FPUTYPE_FPA_EMU2
)
11355 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
11356 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
11358 floats_offset
+= 12;
11359 asm_fprintf (f
, "\tldfe\t%r, [%r, #-%d]\n",
11360 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
11365 start_reg
= LAST_FPA_REGNUM
;
11367 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
11369 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
11371 floats_offset
+= 12;
11373 /* We can't unstack more than four registers at once. */
11374 if (start_reg
- reg
== 3)
11376 asm_fprintf (f
, "\tlfm\t%r, 4, [%r, #-%d]\n",
11377 reg
, FP_REGNUM
, floats_offset
- vfp_offset
);
11378 start_reg
= reg
- 1;
11383 if (reg
!= start_reg
)
11384 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
11385 reg
+ 1, start_reg
- reg
,
11386 FP_REGNUM
, floats_offset
- vfp_offset
);
11387 start_reg
= reg
- 1;
11391 /* Just in case the last register checked also needs unstacking. */
11392 if (reg
!= start_reg
)
11393 asm_fprintf (f
, "\tlfm\t%r, %d, [%r, #-%d]\n",
11394 reg
+ 1, start_reg
- reg
,
11395 FP_REGNUM
, floats_offset
- vfp_offset
);
11398 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
11402 /* The fldmd insns do not have base+offset addressing
11403 modes, so we use IP to hold the address. */
11404 saved_size
= arm_get_vfp_saved_size ();
11406 if (saved_size
> 0)
11408 floats_offset
+= saved_size
;
11409 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", IP_REGNUM
,
11410 FP_REGNUM
, floats_offset
- vfp_offset
);
11412 start_reg
= FIRST_VFP_REGNUM
;
11413 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
11415 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
11416 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
11418 if (start_reg
!= reg
)
11419 vfp_output_fldmd (f
, IP_REGNUM
,
11420 (start_reg
- FIRST_VFP_REGNUM
) / 2,
11421 (reg
- start_reg
) / 2);
11422 start_reg
= reg
+ 2;
11425 if (start_reg
!= reg
)
11426 vfp_output_fldmd (f
, IP_REGNUM
,
11427 (start_reg
- FIRST_VFP_REGNUM
) / 2,
11428 (reg
- start_reg
) / 2);
11433 /* The frame pointer is guaranteed to be non-double-word aligned.
11434 This is because it is set to (old_stack_pointer - 4) and the
11435 old_stack_pointer was double word aligned. Thus the offset to
11436 the iWMMXt registers to be loaded must also be non-double-word
11437 sized, so that the resultant address *is* double-word aligned.
11438 We can ignore floats_offset since that was already included in
11439 the live_regs_mask. */
11440 lrm_count
+= (lrm_count
% 2 ? 2 : 1);
11442 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
11443 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
11445 asm_fprintf (f
, "\twldrd\t%r, [%r, #-%d]\n",
11446 reg
, FP_REGNUM
, lrm_count
* 4);
11451 /* saved_regs_mask should contain the IP, which at the time of stack
11452 frame generation actually contains the old stack pointer. So a
11453 quick way to unwind the stack is just pop the IP register directly
11454 into the stack pointer. */
11455 gcc_assert (saved_regs_mask
& (1 << IP_REGNUM
));
11456 saved_regs_mask
&= ~ (1 << IP_REGNUM
);
11457 saved_regs_mask
|= (1 << SP_REGNUM
);
11459 /* There are two registers left in saved_regs_mask - LR and PC. We
11460 only need to restore the LR register (the return address), but to
11461 save time we can load it directly into the PC, unless we need a
11462 special function exit sequence, or we are not really returning. */
11464 && ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
11465 && !crtl
->calls_eh_return
)
11466 /* Delete the LR from the register mask, so that the LR on
11467 the stack is loaded into the PC in the register mask. */
11468 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
11470 saved_regs_mask
&= ~ (1 << PC_REGNUM
);
11472 /* We must use SP as the base register, because SP is one of the
11473 registers being restored. If an interrupt or page fault
11474 happens in the ldm instruction, the SP might or might not
11475 have been restored. That would be bad, as then SP will no
11476 longer indicate the safe area of stack, and we can get stack
11477 corruption. Using SP as the base register means that it will
11478 be reset correctly to the original value, should an interrupt
11479 occur. If the stack pointer already points at the right
11480 place, then omit the subtraction. */
11481 if (offsets
->outgoing_args
!= (1 + (int) bit_count (saved_regs_mask
))
11482 || cfun
->calls_alloca
)
11483 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n", SP_REGNUM
, FP_REGNUM
,
11484 4 * bit_count (saved_regs_mask
));
11485 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
, 0);
11487 if (IS_INTERRUPT (func_type
))
11488 /* Interrupt handlers will have pushed the
11489 IP onto the stack, so restore it now. */
11490 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, 1 << IP_REGNUM
, 0);
11494 /* This branch is executed for ARM mode (non-apcs frames) and
11495 Thumb-2 mode. Frame layout is essentially the same for those
11496 cases, except that in ARM mode frame pointer points to the
11497 first saved register, while in Thumb-2 mode the frame pointer points
11498 to the last saved register.
11500 It is possible to make frame pointer point to last saved
11501 register in both cases, and remove some conditionals below.
11502 That means that fp setup in prologue would be just "mov fp, sp"
11503 and sp restore in epilogue would be just "mov sp, fp", whereas
11504 now we have to use add/sub in those cases. However, the value
11505 of that would be marginal, as both mov and add/sub are 32-bit
11506 in ARM mode, and it would require extra conditionals
11507 in arm_expand_prologue to distingish ARM-apcs-frame case
11508 (where frame pointer is required to point at first register)
11509 and ARM-non-apcs-frame. Therefore, such change is postponed
11510 until real need arise. */
11511 HOST_WIDE_INT amount
;
11513 /* Restore stack pointer if necessary. */
11514 if (TARGET_ARM
&& frame_pointer_needed
)
11516 operands
[0] = stack_pointer_rtx
;
11517 operands
[1] = hard_frame_pointer_rtx
;
11519 operands
[2] = GEN_INT (offsets
->frame
- offsets
->saved_regs
);
11520 output_add_immediate (operands
);
11524 if (frame_pointer_needed
)
11526 /* For Thumb-2 restore sp from the frame pointer.
11527 Operand restrictions mean we have to incrememnt FP, then copy
11529 amount
= offsets
->locals_base
- offsets
->saved_regs
;
11530 operands
[0] = hard_frame_pointer_rtx
;
11534 unsigned long count
;
11535 operands
[0] = stack_pointer_rtx
;
11536 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
11537 /* pop call clobbered registers if it avoids a
11538 separate stack adjustment. */
11539 count
= offsets
->saved_regs
- offsets
->saved_args
;
11542 && !crtl
->calls_eh_return
11543 && bit_count(saved_regs_mask
) * 4 == count
11544 && !IS_INTERRUPT (func_type
)
11545 && !crtl
->tail_call_emit
)
11547 unsigned long mask
;
11548 mask
= (1 << (arm_size_return_regs() / 4)) - 1;
11550 mask
&= ~saved_regs_mask
;
11552 while (bit_count (mask
) * 4 > amount
)
11554 while ((mask
& (1 << reg
)) == 0)
11556 mask
&= ~(1 << reg
);
11558 if (bit_count (mask
) * 4 == amount
) {
11560 saved_regs_mask
|= mask
;
11567 operands
[1] = operands
[0];
11568 operands
[2] = GEN_INT (amount
);
11569 output_add_immediate (operands
);
11571 if (frame_pointer_needed
)
11572 asm_fprintf (f
, "\tmov\t%r, %r\n",
11573 SP_REGNUM
, HARD_FRAME_POINTER_REGNUM
);
11576 if (arm_fpu_arch
== FPUTYPE_FPA_EMU2
)
11578 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
11579 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
11580 asm_fprintf (f
, "\tldfe\t%r, [%r], #12\n",
11585 start_reg
= FIRST_FPA_REGNUM
;
11587 for (reg
= FIRST_FPA_REGNUM
; reg
<= LAST_FPA_REGNUM
; reg
++)
11589 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
11591 if (reg
- start_reg
== 3)
11593 asm_fprintf (f
, "\tlfmfd\t%r, 4, [%r]!\n",
11594 start_reg
, SP_REGNUM
);
11595 start_reg
= reg
+ 1;
11600 if (reg
!= start_reg
)
11601 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
11602 start_reg
, reg
- start_reg
,
11605 start_reg
= reg
+ 1;
11609 /* Just in case the last register checked also needs unstacking. */
11610 if (reg
!= start_reg
)
11611 asm_fprintf (f
, "\tlfmfd\t%r, %d, [%r]!\n",
11612 start_reg
, reg
- start_reg
, SP_REGNUM
);
11615 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
11617 start_reg
= FIRST_VFP_REGNUM
;
11618 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
11620 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
11621 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
11623 if (start_reg
!= reg
)
11624 vfp_output_fldmd (f
, SP_REGNUM
,
11625 (start_reg
- FIRST_VFP_REGNUM
) / 2,
11626 (reg
- start_reg
) / 2);
11627 start_reg
= reg
+ 2;
11630 if (start_reg
!= reg
)
11631 vfp_output_fldmd (f
, SP_REGNUM
,
11632 (start_reg
- FIRST_VFP_REGNUM
) / 2,
11633 (reg
- start_reg
) / 2);
11636 for (reg
= FIRST_IWMMXT_REGNUM
; reg
<= LAST_IWMMXT_REGNUM
; reg
++)
11637 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
11638 asm_fprintf (f
, "\twldrd\t%r, [%r], #8\n", reg
, SP_REGNUM
);
11640 /* If we can, restore the LR into the PC. */
11641 if (ARM_FUNC_TYPE (func_type
) != ARM_FT_INTERWORKED
11642 && (TARGET_ARM
|| ARM_FUNC_TYPE (func_type
) == ARM_FT_NORMAL
)
11643 && !IS_STACKALIGN (func_type
)
11645 && crtl
->args
.pretend_args_size
== 0
11646 && saved_regs_mask
& (1 << LR_REGNUM
)
11647 && !crtl
->calls_eh_return
)
11649 saved_regs_mask
&= ~ (1 << LR_REGNUM
);
11650 saved_regs_mask
|= (1 << PC_REGNUM
);
11651 rfe
= IS_INTERRUPT (func_type
);
11656 /* Load the registers off the stack. If we only have one register
11657 to load use the LDR instruction - it is faster. For Thumb-2
11658 always use pop and the assembler will pick the best instruction.*/
11659 if (TARGET_ARM
&& saved_regs_mask
== (1 << LR_REGNUM
)
11660 && !IS_INTERRUPT(func_type
))
11662 asm_fprintf (f
, "\tldr\t%r, [%r], #4\n", LR_REGNUM
, SP_REGNUM
);
11664 else if (saved_regs_mask
)
11666 if (saved_regs_mask
& (1 << SP_REGNUM
))
11667 /* Note - write back to the stack register is not enabled
11668 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11669 in the list of registers and if we add writeback the
11670 instruction becomes UNPREDICTABLE. */
11671 print_multi_reg (f
, "ldmfd\t%r, ", SP_REGNUM
, saved_regs_mask
,
11673 else if (TARGET_ARM
)
11674 print_multi_reg (f
, "ldmfd\t%r!, ", SP_REGNUM
, saved_regs_mask
,
11677 print_multi_reg (f
, "pop\t", SP_REGNUM
, saved_regs_mask
, 0);
11680 if (crtl
->args
.pretend_args_size
)
11682 /* Unwind the pre-pushed regs. */
11683 operands
[0] = operands
[1] = stack_pointer_rtx
;
11684 operands
[2] = GEN_INT (crtl
->args
.pretend_args_size
);
11685 output_add_immediate (operands
);
11689 /* We may have already restored PC directly from the stack. */
11690 if (!really_return
|| saved_regs_mask
& (1 << PC_REGNUM
))
11693 /* Stack adjustment for exception handler. */
11694 if (crtl
->calls_eh_return
)
11695 asm_fprintf (f
, "\tadd\t%r, %r, %r\n", SP_REGNUM
, SP_REGNUM
,
11696 ARM_EH_STACKADJ_REGNUM
);
11698 /* Generate the return instruction. */
11699 switch ((int) ARM_FUNC_TYPE (func_type
))
11703 asm_fprintf (f
, "\tsubs\t%r, %r, #4\n", PC_REGNUM
, LR_REGNUM
);
11706 case ARM_FT_EXCEPTION
:
11707 asm_fprintf (f
, "\tmovs\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
11710 case ARM_FT_INTERWORKED
:
11711 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
11715 if (IS_STACKALIGN (func_type
))
11717 /* See comment in arm_expand_prologue. */
11718 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, 0);
11720 if (arm_arch5
|| arm_arch4t
)
11721 asm_fprintf (f
, "\tbx\t%r\n", LR_REGNUM
);
11723 asm_fprintf (f
, "\tmov\t%r, %r\n", PC_REGNUM
, LR_REGNUM
);
11731 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED
,
11732 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED
)
11734 arm_stack_offsets
*offsets
;
11740 /* Emit any call-via-reg trampolines that are needed for v4t support
11741 of call_reg and call_value_reg type insns. */
11742 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
11744 rtx label
= cfun
->machine
->call_via
[regno
];
11748 switch_to_section (function_section (current_function_decl
));
11749 targetm
.asm_out
.internal_label (asm_out_file
, "L",
11750 CODE_LABEL_NUMBER (label
));
11751 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
11755 /* ??? Probably not safe to set this here, since it assumes that a
11756 function will be emitted as assembly immediately after we generate
11757 RTL for it. This does not happen for inline functions. */
11758 return_used_this_function
= 0;
11760 else /* TARGET_32BIT */
11762 /* We need to take into account any stack-frame rounding. */
11763 offsets
= arm_get_frame_offsets ();
11765 gcc_assert (!use_return_insn (FALSE
, NULL
)
11766 || !return_used_this_function
11767 || offsets
->saved_regs
== offsets
->outgoing_args
11768 || frame_pointer_needed
);
11770 /* Reset the ARM-specific per-function variables. */
11771 after_arm_reorg
= 0;
11775 /* Generate and emit an insn that we will recognize as a push_multi.
11776 Unfortunately, since this insn does not reflect very well the actual
11777 semantics of the operation, we need to annotate the insn for the benefit
11778 of DWARF2 frame unwind information. */
11780 emit_multi_reg_push (unsigned long mask
)
11783 int num_dwarf_regs
;
11787 int dwarf_par_index
;
11790 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
11791 if (mask
& (1 << i
))
11794 gcc_assert (num_regs
&& num_regs
<= 16);
11796 /* We don't record the PC in the dwarf frame information. */
11797 num_dwarf_regs
= num_regs
;
11798 if (mask
& (1 << PC_REGNUM
))
11801 /* For the body of the insn we are going to generate an UNSPEC in
11802 parallel with several USEs. This allows the insn to be recognized
11803 by the push_multi pattern in the arm.md file. The insn looks
11804 something like this:
11807 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11808 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11809 (use (reg:SI 11 fp))
11810 (use (reg:SI 12 ip))
11811 (use (reg:SI 14 lr))
11812 (use (reg:SI 15 pc))
11815 For the frame note however, we try to be more explicit and actually
11816 show each register being stored into the stack frame, plus a (single)
11817 decrement of the stack pointer. We do it this way in order to be
11818 friendly to the stack unwinding code, which only wants to see a single
11819 stack decrement per instruction. The RTL we generate for the note looks
11820 something like this:
11823 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11824 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11825 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11826 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11827 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11830 This sequence is used both by the code to support stack unwinding for
11831 exceptions handlers and the code to generate dwarf2 frame debugging. */
11833 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (num_regs
));
11834 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (num_dwarf_regs
+ 1));
11835 dwarf_par_index
= 1;
11837 for (i
= 0; i
<= LAST_ARM_REGNUM
; i
++)
11839 if (mask
& (1 << i
))
11841 reg
= gen_rtx_REG (SImode
, i
);
11843 XVECEXP (par
, 0, 0)
11844 = gen_rtx_SET (VOIDmode
,
11845 gen_frame_mem (BLKmode
,
11846 gen_rtx_PRE_DEC (BLKmode
,
11847 stack_pointer_rtx
)),
11848 gen_rtx_UNSPEC (BLKmode
,
11849 gen_rtvec (1, reg
),
11850 UNSPEC_PUSH_MULT
));
11852 if (i
!= PC_REGNUM
)
11854 tmp
= gen_rtx_SET (VOIDmode
,
11855 gen_frame_mem (SImode
, stack_pointer_rtx
),
11857 RTX_FRAME_RELATED_P (tmp
) = 1;
11858 XVECEXP (dwarf
, 0, dwarf_par_index
) = tmp
;
11866 for (j
= 1, i
++; j
< num_regs
; i
++)
11868 if (mask
& (1 << i
))
11870 reg
= gen_rtx_REG (SImode
, i
);
11872 XVECEXP (par
, 0, j
) = gen_rtx_USE (VOIDmode
, reg
);
11874 if (i
!= PC_REGNUM
)
11877 = gen_rtx_SET (VOIDmode
,
11878 gen_frame_mem (SImode
,
11879 plus_constant (stack_pointer_rtx
,
11882 RTX_FRAME_RELATED_P (tmp
) = 1;
11883 XVECEXP (dwarf
, 0, dwarf_par_index
++) = tmp
;
11890 par
= emit_insn (par
);
11892 tmp
= gen_rtx_SET (VOIDmode
,
11894 plus_constant (stack_pointer_rtx
, -4 * num_regs
));
11895 RTX_FRAME_RELATED_P (tmp
) = 1;
11896 XVECEXP (dwarf
, 0, 0) = tmp
;
11898 REG_NOTES (par
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, dwarf
,
11903 /* Calculate the size of the return value that is passed in registers. */
11905 arm_size_return_regs (void)
11907 enum machine_mode mode
;
11909 if (crtl
->return_rtx
!= 0)
11910 mode
= GET_MODE (crtl
->return_rtx
);
11912 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
11914 return GET_MODE_SIZE (mode
);
11918 emit_sfm (int base_reg
, int count
)
11925 par
= gen_rtx_PARALLEL (VOIDmode
, rtvec_alloc (count
));
11926 dwarf
= gen_rtx_SEQUENCE (VOIDmode
, rtvec_alloc (count
+ 1));
11928 reg
= gen_rtx_REG (XFmode
, base_reg
++);
11930 XVECEXP (par
, 0, 0)
11931 = gen_rtx_SET (VOIDmode
,
11932 gen_frame_mem (BLKmode
,
11933 gen_rtx_PRE_DEC (BLKmode
,
11934 stack_pointer_rtx
)),
11935 gen_rtx_UNSPEC (BLKmode
,
11936 gen_rtvec (1, reg
),
11937 UNSPEC_PUSH_MULT
));
11938 tmp
= gen_rtx_SET (VOIDmode
,
11939 gen_frame_mem (XFmode
, stack_pointer_rtx
), reg
);
11940 RTX_FRAME_RELATED_P (tmp
) = 1;
11941 XVECEXP (dwarf
, 0, 1) = tmp
;
11943 for (i
= 1; i
< count
; i
++)
11945 reg
= gen_rtx_REG (XFmode
, base_reg
++);
11946 XVECEXP (par
, 0, i
) = gen_rtx_USE (VOIDmode
, reg
);
11948 tmp
= gen_rtx_SET (VOIDmode
,
11949 gen_frame_mem (XFmode
,
11950 plus_constant (stack_pointer_rtx
,
11953 RTX_FRAME_RELATED_P (tmp
) = 1;
11954 XVECEXP (dwarf
, 0, i
+ 1) = tmp
;
11957 tmp
= gen_rtx_SET (VOIDmode
,
11959 plus_constant (stack_pointer_rtx
, -12 * count
));
11961 RTX_FRAME_RELATED_P (tmp
) = 1;
11962 XVECEXP (dwarf
, 0, 0) = tmp
;
11964 par
= emit_insn (par
);
11965 REG_NOTES (par
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, dwarf
,
11971 /* Return true if the current function needs to save/restore LR. */
11974 thumb_force_lr_save (void)
11976 return !cfun
->machine
->lr_save_eliminated
11977 && (!leaf_function_p ()
11978 || thumb_far_jump_used_p ()
11979 || df_regs_ever_live_p (LR_REGNUM
));
11983 /* Compute the distance from register FROM to register TO.
11984 These can be the arg pointer (26), the soft frame pointer (25),
11985 the stack pointer (13) or the hard frame pointer (11).
11986 In thumb mode r7 is used as the soft frame pointer, if needed.
11987 Typical stack layout looks like this:
11989 old stack pointer -> | |
11992 | | saved arguments for
11993 | | vararg functions
11996 hard FP & arg pointer -> | | \
12004 soft frame pointer -> | | /
12009 locals base pointer -> | | /
12014 current stack pointer -> | | /
12017 For a given function some or all of these stack components
12018 may not be needed, giving rise to the possibility of
12019 eliminating some of the registers.
12021 The values returned by this function must reflect the behavior
12022 of arm_expand_prologue() and arm_compute_save_reg_mask().
12024 The sign of the number returned reflects the direction of stack
12025 growth, so the values are positive for all eliminations except
12026 from the soft frame pointer to the hard frame pointer.
12028 SFP may point just inside the local variables block to ensure correct
12032 /* Calculate stack offsets. These are used to calculate register elimination
12033 offsets and in prologue/epilogue code. Also calculates which registers
12034 should be saved. */
12036 static arm_stack_offsets
*
12037 arm_get_frame_offsets (void)
12039 struct arm_stack_offsets
*offsets
;
12040 unsigned long func_type
;
12044 HOST_WIDE_INT frame_size
;
12047 offsets
= &cfun
->machine
->stack_offsets
;
12049 /* We need to know if we are a leaf function. Unfortunately, it
12050 is possible to be called after start_sequence has been called,
12051 which causes get_insns to return the insns for the sequence,
12052 not the function, which will cause leaf_function_p to return
12053 the incorrect result.
12055 to know about leaf functions once reload has completed, and the
12056 frame size cannot be changed after that time, so we can safely
12057 use the cached value. */
12059 if (reload_completed
)
12062 /* Initially this is the size of the local variables. It will translated
12063 into an offset once we have determined the size of preceding data. */
12064 frame_size
= ROUND_UP_WORD (get_frame_size ());
12066 leaf
= leaf_function_p ();
12068 /* Space for variadic functions. */
12069 offsets
->saved_args
= crtl
->args
.pretend_args_size
;
12071 /* In Thumb mode this is incorrect, but never used. */
12072 offsets
->frame
= offsets
->saved_args
+ (frame_pointer_needed
? 4 : 0);
12076 unsigned int regno
;
12078 offsets
->saved_regs_mask
= arm_compute_save_reg_mask ();
12079 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
12080 saved
= core_saved
;
12082 /* We know that SP will be doubleword aligned on entry, and we must
12083 preserve that condition at any subroutine call. We also require the
12084 soft frame pointer to be doubleword aligned. */
12086 if (TARGET_REALLY_IWMMXT
)
12088 /* Check for the call-saved iWMMXt registers. */
12089 for (regno
= FIRST_IWMMXT_REGNUM
;
12090 regno
<= LAST_IWMMXT_REGNUM
;
12092 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
12096 func_type
= arm_current_func_type ();
12097 if (! IS_VOLATILE (func_type
))
12099 /* Space for saved FPA registers. */
12100 for (regno
= FIRST_FPA_REGNUM
; regno
<= LAST_FPA_REGNUM
; regno
++)
12101 if (df_regs_ever_live_p (regno
) && ! call_used_regs
[regno
])
12104 /* Space for saved VFP registers. */
12105 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
12106 saved
+= arm_get_vfp_saved_size ();
12109 else /* TARGET_THUMB1 */
12111 offsets
->saved_regs_mask
= thumb1_compute_save_reg_mask ();
12112 core_saved
= bit_count (offsets
->saved_regs_mask
) * 4;
12113 saved
= core_saved
;
12114 if (TARGET_BACKTRACE
)
12118 /* Saved registers include the stack frame. */
12119 offsets
->saved_regs
= offsets
->saved_args
+ saved
;
12120 offsets
->soft_frame
= offsets
->saved_regs
+ CALLER_INTERWORKING_SLOT_SIZE
;
12121 /* A leaf function does not need any stack alignment if it has nothing
12123 if (leaf
&& frame_size
== 0)
12125 offsets
->outgoing_args
= offsets
->soft_frame
;
12126 offsets
->locals_base
= offsets
->soft_frame
;
12130 /* Ensure SFP has the correct alignment. */
12131 if (ARM_DOUBLEWORD_ALIGN
12132 && (offsets
->soft_frame
& 7))
12134 offsets
->soft_frame
+= 4;
12135 /* Try to align stack by pushing an extra reg. Don't bother doing this
12136 when there is a stack frame as the alignment will be rolled into
12137 the normal stack adjustment. */
12138 if (frame_size
+ crtl
->outgoing_args_size
== 0)
12142 for (i
= 4; i
<= (TARGET_THUMB1
? LAST_LO_REGNUM
: 11); i
++)
12144 if ((offsets
->saved_regs_mask
& (1 << i
)) == 0)
12151 if (reg
== -1 && arm_size_return_regs () <= 12
12152 && !crtl
->tail_call_emit
)
12154 /* Push/pop an argument register (r3) if all callee saved
12155 registers are already being pushed. */
12161 offsets
->saved_regs
+= 4;
12162 offsets
->saved_regs_mask
|= (1 << reg
);
12167 offsets
->locals_base
= offsets
->soft_frame
+ frame_size
;
12168 offsets
->outgoing_args
= (offsets
->locals_base
12169 + crtl
->outgoing_args_size
);
12171 if (ARM_DOUBLEWORD_ALIGN
)
12173 /* Ensure SP remains doubleword aligned. */
12174 if (offsets
->outgoing_args
& 7)
12175 offsets
->outgoing_args
+= 4;
12176 gcc_assert (!(offsets
->outgoing_args
& 7));
12183 /* Calculate the relative offsets for the different stack pointers. Positive
12184 offsets are in the direction of stack growth. */
12187 arm_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
12189 arm_stack_offsets
*offsets
;
12191 offsets
= arm_get_frame_offsets ();
12193 /* OK, now we have enough information to compute the distances.
12194 There must be an entry in these switch tables for each pair
12195 of registers in ELIMINABLE_REGS, even if some of the entries
12196 seem to be redundant or useless. */
12199 case ARG_POINTER_REGNUM
:
12202 case THUMB_HARD_FRAME_POINTER_REGNUM
:
12205 case FRAME_POINTER_REGNUM
:
12206 /* This is the reverse of the soft frame pointer
12207 to hard frame pointer elimination below. */
12208 return offsets
->soft_frame
- offsets
->saved_args
;
12210 case ARM_HARD_FRAME_POINTER_REGNUM
:
12211 /* If there is no stack frame then the hard
12212 frame pointer and the arg pointer coincide. */
12213 if (offsets
->frame
== offsets
->saved_regs
)
12215 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
12216 return (frame_pointer_needed
12217 && cfun
->static_chain_decl
!= NULL
12218 && ! cfun
->machine
->uses_anonymous_args
) ? 4 : 0;
12220 case STACK_POINTER_REGNUM
:
12221 /* If nothing has been pushed on the stack at all
12222 then this will return -4. This *is* correct! */
12223 return offsets
->outgoing_args
- (offsets
->saved_args
+ 4);
12226 gcc_unreachable ();
12228 gcc_unreachable ();
12230 case FRAME_POINTER_REGNUM
:
12233 case THUMB_HARD_FRAME_POINTER_REGNUM
:
12236 case ARM_HARD_FRAME_POINTER_REGNUM
:
12237 /* The hard frame pointer points to the top entry in the
12238 stack frame. The soft frame pointer to the bottom entry
12239 in the stack frame. If there is no stack frame at all,
12240 then they are identical. */
12242 return offsets
->frame
- offsets
->soft_frame
;
12244 case STACK_POINTER_REGNUM
:
12245 return offsets
->outgoing_args
- offsets
->soft_frame
;
12248 gcc_unreachable ();
12250 gcc_unreachable ();
12253 /* You cannot eliminate from the stack pointer.
12254 In theory you could eliminate from the hard frame
12255 pointer to the stack pointer, but this will never
12256 happen, since if a stack frame is not needed the
12257 hard frame pointer will never be used. */
12258 gcc_unreachable ();
12263 /* Emit RTL to save coprocessor registers on function entry. Returns the
12264 number of bytes pushed. */
12267 arm_save_coproc_regs(void)
12269 int saved_size
= 0;
12271 unsigned start_reg
;
12274 for (reg
= LAST_IWMMXT_REGNUM
; reg
>= FIRST_IWMMXT_REGNUM
; reg
--)
12275 if (df_regs_ever_live_p (reg
) && ! call_used_regs
[reg
])
12277 insn
= gen_rtx_PRE_DEC (V2SImode
, stack_pointer_rtx
);
12278 insn
= gen_rtx_MEM (V2SImode
, insn
);
12279 insn
= emit_set_insn (insn
, gen_rtx_REG (V2SImode
, reg
));
12280 RTX_FRAME_RELATED_P (insn
) = 1;
12284 /* Save any floating point call-saved registers used by this
12286 if (arm_fpu_arch
== FPUTYPE_FPA_EMU2
)
12288 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
12289 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
12291 insn
= gen_rtx_PRE_DEC (XFmode
, stack_pointer_rtx
);
12292 insn
= gen_rtx_MEM (XFmode
, insn
);
12293 insn
= emit_set_insn (insn
, gen_rtx_REG (XFmode
, reg
));
12294 RTX_FRAME_RELATED_P (insn
) = 1;
12300 start_reg
= LAST_FPA_REGNUM
;
12302 for (reg
= LAST_FPA_REGNUM
; reg
>= FIRST_FPA_REGNUM
; reg
--)
12304 if (df_regs_ever_live_p (reg
) && !call_used_regs
[reg
])
12306 if (start_reg
- reg
== 3)
12308 insn
= emit_sfm (reg
, 4);
12309 RTX_FRAME_RELATED_P (insn
) = 1;
12311 start_reg
= reg
- 1;
12316 if (start_reg
!= reg
)
12318 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
12319 RTX_FRAME_RELATED_P (insn
) = 1;
12320 saved_size
+= (start_reg
- reg
) * 12;
12322 start_reg
= reg
- 1;
12326 if (start_reg
!= reg
)
12328 insn
= emit_sfm (reg
+ 1, start_reg
- reg
);
12329 saved_size
+= (start_reg
- reg
) * 12;
12330 RTX_FRAME_RELATED_P (insn
) = 1;
12333 if (TARGET_HARD_FLOAT
&& TARGET_VFP
)
12335 start_reg
= FIRST_VFP_REGNUM
;
12337 for (reg
= FIRST_VFP_REGNUM
; reg
< LAST_VFP_REGNUM
; reg
+= 2)
12339 if ((!df_regs_ever_live_p (reg
) || call_used_regs
[reg
])
12340 && (!df_regs_ever_live_p (reg
+ 1) || call_used_regs
[reg
+ 1]))
12342 if (start_reg
!= reg
)
12343 saved_size
+= vfp_emit_fstmd (start_reg
,
12344 (reg
- start_reg
) / 2);
12345 start_reg
= reg
+ 2;
12348 if (start_reg
!= reg
)
12349 saved_size
+= vfp_emit_fstmd (start_reg
,
12350 (reg
- start_reg
) / 2);
12356 /* Set the Thumb frame pointer from the stack pointer. */
12359 thumb_set_frame_pointer (arm_stack_offsets
*offsets
)
12361 HOST_WIDE_INT amount
;
12364 amount
= offsets
->outgoing_args
- offsets
->locals_base
;
12366 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
12367 stack_pointer_rtx
, GEN_INT (amount
)));
12370 emit_insn (gen_movsi (hard_frame_pointer_rtx
, GEN_INT (amount
)));
12371 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
12372 expects the first two operands to be the same. */
12375 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
12377 hard_frame_pointer_rtx
));
12381 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
12382 hard_frame_pointer_rtx
,
12383 stack_pointer_rtx
));
12385 dwarf
= gen_rtx_SET (VOIDmode
, hard_frame_pointer_rtx
,
12386 plus_constant (stack_pointer_rtx
, amount
));
12387 RTX_FRAME_RELATED_P (dwarf
) = 1;
12388 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, dwarf
,
12392 RTX_FRAME_RELATED_P (insn
) = 1;
12395 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12398 arm_expand_prologue (void)
12403 unsigned long live_regs_mask
;
12404 unsigned long func_type
;
12406 int saved_pretend_args
= 0;
12407 int saved_regs
= 0;
12408 unsigned HOST_WIDE_INT args_to_push
;
12409 arm_stack_offsets
*offsets
;
12411 func_type
= arm_current_func_type ();
12413 /* Naked functions don't have prologues. */
12414 if (IS_NAKED (func_type
))
12417 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12418 args_to_push
= crtl
->args
.pretend_args_size
;
12420 /* Compute which register we will have to save onto the stack. */
12421 offsets
= arm_get_frame_offsets ();
12422 live_regs_mask
= offsets
->saved_regs_mask
;
12424 ip_rtx
= gen_rtx_REG (SImode
, IP_REGNUM
);
12426 if (IS_STACKALIGN (func_type
))
12431 /* Handle a word-aligned stack pointer. We generate the following:
12436 <save and restore r0 in normal prologue/epilogue>
12440 The unwinder doesn't need to know about the stack realignment.
12441 Just tell it we saved SP in r0. */
12442 gcc_assert (TARGET_THUMB2
&& !arm_arch_notm
&& args_to_push
== 0);
12444 r0
= gen_rtx_REG (SImode
, 0);
12445 r1
= gen_rtx_REG (SImode
, 1);
12446 dwarf
= gen_rtx_UNSPEC (SImode
, NULL_RTVEC
, UNSPEC_STACK_ALIGN
);
12447 dwarf
= gen_rtx_SET (VOIDmode
, r0
, dwarf
);
12448 insn
= gen_movsi (r0
, stack_pointer_rtx
);
12449 RTX_FRAME_RELATED_P (insn
) = 1;
12450 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
12451 dwarf
, REG_NOTES (insn
));
12453 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (~(HOST_WIDE_INT
)7)));
12454 emit_insn (gen_movsi (stack_pointer_rtx
, r1
));
12457 /* For APCS frames, if IP register is clobbered
12458 when creating frame, save that register in a special
12460 if (TARGET_APCS_FRAME
&& frame_pointer_needed
&& TARGET_ARM
)
12462 if (IS_INTERRUPT (func_type
))
12464 /* Interrupt functions must not corrupt any registers.
12465 Creating a frame pointer however, corrupts the IP
12466 register, so we must push it first. */
12467 insn
= emit_multi_reg_push (1 << IP_REGNUM
);
12469 /* Do not set RTX_FRAME_RELATED_P on this insn.
12470 The dwarf stack unwinding code only wants to see one
12471 stack decrement per function, and this is not it. If
12472 this instruction is labeled as being part of the frame
12473 creation sequence then dwarf2out_frame_debug_expr will
12474 die when it encounters the assignment of IP to FP
12475 later on, since the use of SP here establishes SP as
12476 the CFA register and not IP.
12478 Anyway this instruction is not really part of the stack
12479 frame creation although it is part of the prologue. */
12481 else if (IS_NESTED (func_type
))
12483 /* The Static chain register is the same as the IP register
12484 used as a scratch register during stack frame creation.
12485 To get around this need to find somewhere to store IP
12486 whilst the frame is being created. We try the following
12489 1. The last argument register.
12490 2. A slot on the stack above the frame. (This only
12491 works if the function is not a varargs function).
12492 3. Register r3, after pushing the argument registers
12495 Note - we only need to tell the dwarf2 backend about the SP
12496 adjustment in the second variant; the static chain register
12497 doesn't need to be unwound, as it doesn't contain a value
12498 inherited from the caller. */
12500 if (df_regs_ever_live_p (3) == false)
12501 insn
= emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
12502 else if (args_to_push
== 0)
12506 insn
= gen_rtx_PRE_DEC (SImode
, stack_pointer_rtx
);
12507 insn
= emit_set_insn (gen_frame_mem (SImode
, insn
), ip_rtx
);
12510 /* Just tell the dwarf backend that we adjusted SP. */
12511 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
12512 plus_constant (stack_pointer_rtx
,
12514 RTX_FRAME_RELATED_P (insn
) = 1;
12515 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
12516 dwarf
, REG_NOTES (insn
));
12520 /* Store the args on the stack. */
12521 if (cfun
->machine
->uses_anonymous_args
)
12522 insn
= emit_multi_reg_push
12523 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
12526 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
12527 GEN_INT (- args_to_push
)));
12529 RTX_FRAME_RELATED_P (insn
) = 1;
12531 saved_pretend_args
= 1;
12532 fp_offset
= args_to_push
;
12535 /* Now reuse r3 to preserve IP. */
12536 emit_set_insn (gen_rtx_REG (SImode
, 3), ip_rtx
);
12540 insn
= emit_set_insn (ip_rtx
,
12541 plus_constant (stack_pointer_rtx
, fp_offset
));
12542 RTX_FRAME_RELATED_P (insn
) = 1;
12547 /* Push the argument registers, or reserve space for them. */
12548 if (cfun
->machine
->uses_anonymous_args
)
12549 insn
= emit_multi_reg_push
12550 ((0xf0 >> (args_to_push
/ 4)) & 0xf);
12553 (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
12554 GEN_INT (- args_to_push
)));
12555 RTX_FRAME_RELATED_P (insn
) = 1;
12558 /* If this is an interrupt service routine, and the link register
12559 is going to be pushed, and we're not generating extra
12560 push of IP (needed when frame is needed and frame layout if apcs),
12561 subtracting four from LR now will mean that the function return
12562 can be done with a single instruction. */
12563 if ((func_type
== ARM_FT_ISR
|| func_type
== ARM_FT_FIQ
)
12564 && (live_regs_mask
& (1 << LR_REGNUM
)) != 0
12565 && !(frame_pointer_needed
&& TARGET_APCS_FRAME
)
12568 rtx lr
= gen_rtx_REG (SImode
, LR_REGNUM
);
12570 emit_set_insn (lr
, plus_constant (lr
, -4));
12573 if (live_regs_mask
)
12575 saved_regs
+= bit_count (live_regs_mask
) * 4;
12576 if (optimize_size
&& !frame_pointer_needed
12577 && saved_regs
== offsets
->saved_regs
- offsets
->saved_args
)
12579 /* If no coprocessor registers are being pushed and we don't have
12580 to worry about a frame pointer then push extra registers to
12581 create the stack frame. This is done is a way that does not
12582 alter the frame layout, so is independent of the epilogue. */
12586 while (n
< 8 && (live_regs_mask
& (1 << n
)) == 0)
12588 frame
= offsets
->outgoing_args
- (offsets
->saved_args
+ saved_regs
);
12589 if (frame
&& n
* 4 >= frame
)
12592 live_regs_mask
|= (1 << n
) - 1;
12593 saved_regs
+= frame
;
12596 insn
= emit_multi_reg_push (live_regs_mask
);
12597 RTX_FRAME_RELATED_P (insn
) = 1;
12600 if (! IS_VOLATILE (func_type
))
12601 saved_regs
+= arm_save_coproc_regs ();
12603 if (frame_pointer_needed
&& TARGET_ARM
)
12605 /* Create the new frame pointer. */
12606 if (TARGET_APCS_FRAME
)
12608 insn
= GEN_INT (-(4 + args_to_push
+ fp_offset
));
12609 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
, ip_rtx
, insn
));
12610 RTX_FRAME_RELATED_P (insn
) = 1;
12612 if (IS_NESTED (func_type
))
12614 /* Recover the static chain register. */
12615 if (!df_regs_ever_live_p (3)
12616 || saved_pretend_args
)
12617 insn
= gen_rtx_REG (SImode
, 3);
12618 else /* if (crtl->args.pretend_args_size == 0) */
12620 insn
= plus_constant (hard_frame_pointer_rtx
, 4);
12621 insn
= gen_frame_mem (SImode
, insn
);
12623 emit_set_insn (ip_rtx
, insn
);
12624 /* Add a USE to stop propagate_one_insn() from barfing. */
12625 emit_insn (gen_prologue_use (ip_rtx
));
12630 insn
= GEN_INT (saved_regs
- 4);
12631 insn
= emit_insn (gen_addsi3 (hard_frame_pointer_rtx
,
12632 stack_pointer_rtx
, insn
));
12633 RTX_FRAME_RELATED_P (insn
) = 1;
12637 if (offsets
->outgoing_args
!= offsets
->saved_args
+ saved_regs
)
12639 /* This add can produce multiple insns for a large constant, so we
12640 need to get tricky. */
12641 rtx last
= get_last_insn ();
12643 amount
= GEN_INT (offsets
->saved_args
+ saved_regs
12644 - offsets
->outgoing_args
);
12646 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
12650 last
= last
? NEXT_INSN (last
) : get_insns ();
12651 RTX_FRAME_RELATED_P (last
) = 1;
12653 while (last
!= insn
);
12655 /* If the frame pointer is needed, emit a special barrier that
12656 will prevent the scheduler from moving stores to the frame
12657 before the stack adjustment. */
12658 if (frame_pointer_needed
)
12659 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
12660 hard_frame_pointer_rtx
));
12664 if (frame_pointer_needed
&& TARGET_THUMB2
)
12665 thumb_set_frame_pointer (offsets
);
12667 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
12669 unsigned long mask
;
12671 mask
= live_regs_mask
;
12672 mask
&= THUMB2_WORK_REGS
;
12673 if (!IS_NESTED (func_type
))
12674 mask
|= (1 << IP_REGNUM
);
12675 arm_load_pic_register (mask
);
12678 /* If we are profiling, make sure no instructions are scheduled before
12679 the call to mcount. Similarly if the user has requested no
12680 scheduling in the prolog. Similarly if we want non-call exceptions
12681 using the EABI unwinder, to prevent faulting instructions from being
12682 swapped with a stack adjustment. */
12683 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
12684 || (ARM_EABI_UNWIND_TABLES
&& flag_non_call_exceptions
))
12685 emit_insn (gen_blockage ());
12687 /* If the link register is being kept alive, with the return address in it,
12688 then make sure that it does not get reused by the ce2 pass. */
12689 if ((live_regs_mask
& (1 << LR_REGNUM
)) == 0)
12690 cfun
->machine
->lr_save_eliminated
= 1;
12693 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12695 arm_print_condition (FILE *stream
)
12697 if (arm_ccfsm_state
== 3 || arm_ccfsm_state
== 4)
12699 /* Branch conversion is not implemented for Thumb-2. */
12702 output_operand_lossage ("predicated Thumb instruction");
12705 if (current_insn_predicate
!= NULL
)
12707 output_operand_lossage
12708 ("predicated instruction in conditional sequence");
12712 fputs (arm_condition_codes
[arm_current_cc
], stream
);
12714 else if (current_insn_predicate
)
12716 enum arm_cond_code code
;
12720 output_operand_lossage ("predicated Thumb instruction");
12724 code
= get_arm_condition_code (current_insn_predicate
);
12725 fputs (arm_condition_codes
[code
], stream
);
12730 /* If CODE is 'd', then the X is a condition operand and the instruction
12731 should only be executed if the condition is true.
12732 if CODE is 'D', then the X is a condition operand and the instruction
12733 should only be executed if the condition is false: however, if the mode
12734 of the comparison is CCFPEmode, then always execute the instruction -- we
12735 do this because in these circumstances !GE does not necessarily imply LT;
12736 in these cases the instruction pattern will take care to make sure that
12737 an instruction containing %d will follow, thereby undoing the effects of
12738 doing this instruction unconditionally.
12739 If CODE is 'N' then X is a floating point operand that must be negated
12741 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12742 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12744 arm_print_operand (FILE *stream
, rtx x
, int code
)
12749 fputs (ASM_COMMENT_START
, stream
);
12753 fputs (user_label_prefix
, stream
);
12757 fputs (REGISTER_PREFIX
, stream
);
12761 arm_print_condition (stream
);
12765 /* Nothing in unified syntax, otherwise the current condition code. */
12766 if (!TARGET_UNIFIED_ASM
)
12767 arm_print_condition (stream
);
12771 /* The current condition code in unified syntax, otherwise nothing. */
12772 if (TARGET_UNIFIED_ASM
)
12773 arm_print_condition (stream
);
12777 /* The current condition code for a condition code setting instruction.
12778 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12779 if (TARGET_UNIFIED_ASM
)
12781 fputc('s', stream
);
12782 arm_print_condition (stream
);
12786 arm_print_condition (stream
);
12787 fputc('s', stream
);
12792 /* If the instruction is conditionally executed then print
12793 the current condition code, otherwise print 's'. */
12794 gcc_assert (TARGET_THUMB2
&& TARGET_UNIFIED_ASM
);
12795 if (current_insn_predicate
)
12796 arm_print_condition (stream
);
12798 fputc('s', stream
);
12801 /* %# is a "break" sequence. It doesn't output anything, but is used to
12802 separate e.g. operand numbers from following text, if that text consists
12803 of further digits which we don't want to be part of the operand
12811 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
12812 r
= REAL_VALUE_NEGATE (r
);
12813 fprintf (stream
, "%s", fp_const_from_val (&r
));
12817 /* An integer without a preceding # sign. */
12819 gcc_assert (GET_CODE (x
) == CONST_INT
);
12820 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
12824 if (GET_CODE (x
) == CONST_INT
)
12827 val
= ARM_SIGN_EXTEND (~INTVAL (x
));
12828 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, val
);
12832 putc ('~', stream
);
12833 output_addr_const (stream
, x
);
12838 /* The low 16 bits of an immediate constant. */
12839 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL(x
) & 0xffff);
12843 fprintf (stream
, "%s", arithmetic_instr (x
, 1));
12846 /* Truncate Cirrus shift counts. */
12848 if (GET_CODE (x
) == CONST_INT
)
12850 fprintf (stream
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 0x3f);
12853 arm_print_operand (stream
, x
, 0);
12857 fprintf (stream
, "%s", arithmetic_instr (x
, 0));
12865 if (!shift_operator (x
, SImode
))
12867 output_operand_lossage ("invalid shift operand");
12871 shift
= shift_op (x
, &val
);
12875 fprintf (stream
, ", %s ", shift
);
12877 arm_print_operand (stream
, XEXP (x
, 1), 0);
12879 fprintf (stream
, "#" HOST_WIDE_INT_PRINT_DEC
, val
);
12884 /* An explanation of the 'Q', 'R' and 'H' register operands:
12886 In a pair of registers containing a DI or DF value the 'Q'
12887 operand returns the register number of the register containing
12888 the least significant part of the value. The 'R' operand returns
12889 the register number of the register containing the most
12890 significant part of the value.
12892 The 'H' operand returns the higher of the two register numbers.
12893 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
12894 same as the 'Q' operand, since the most significant part of the
12895 value is held in the lower number register. The reverse is true
12896 on systems where WORDS_BIG_ENDIAN is false.
12898 The purpose of these operands is to distinguish between cases
12899 where the endian-ness of the values is important (for example
12900 when they are added together), and cases where the endian-ness
12901 is irrelevant, but the order of register operations is important.
12902 For example when loading a value from memory into a register
12903 pair, the endian-ness does not matter. Provided that the value
12904 from the lower memory address is put into the lower numbered
12905 register, and the value from the higher address is put into the
12906 higher numbered register, the load will work regardless of whether
12907 the value being loaded is big-wordian or little-wordian. The
12908 order of the two register loads can matter however, if the address
12909 of the memory location is actually held in one of the registers
12910 being overwritten by the load. */
12912 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
12914 output_operand_lossage ("invalid operand for code '%c'", code
);
12918 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 1 : 0));
12922 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
12924 output_operand_lossage ("invalid operand for code '%c'", code
);
12928 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 0 : 1));
12932 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
12934 output_operand_lossage ("invalid operand for code '%c'", code
);
12938 asm_fprintf (stream
, "%r", REGNO (x
) + 1);
12942 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
12944 output_operand_lossage ("invalid operand for code '%c'", code
);
12948 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 3 : 2));
12952 if (GET_CODE (x
) != REG
|| REGNO (x
) > LAST_ARM_REGNUM
)
12954 output_operand_lossage ("invalid operand for code '%c'", code
);
12958 asm_fprintf (stream
, "%r", REGNO (x
) + (WORDS_BIG_ENDIAN
? 2 : 3));
12962 asm_fprintf (stream
, "%r",
12963 GET_CODE (XEXP (x
, 0)) == REG
12964 ? REGNO (XEXP (x
, 0)) : REGNO (XEXP (XEXP (x
, 0), 0)));
12968 asm_fprintf (stream
, "{%r-%r}",
12970 REGNO (x
) + ARM_NUM_REGS (GET_MODE (x
)) - 1);
12973 /* Like 'M', but writing doubleword vector registers, for use by Neon
12977 int regno
= (REGNO (x
) - FIRST_VFP_REGNUM
) / 2;
12978 int numregs
= ARM_NUM_REGS (GET_MODE (x
)) / 2;
12980 asm_fprintf (stream
, "{d%d}", regno
);
12982 asm_fprintf (stream
, "{d%d-d%d}", regno
, regno
+ numregs
- 1);
12987 /* CONST_TRUE_RTX means always -- that's the default. */
12988 if (x
== const_true_rtx
)
12991 if (!COMPARISON_P (x
))
12993 output_operand_lossage ("invalid operand for code '%c'", code
);
12997 fputs (arm_condition_codes
[get_arm_condition_code (x
)],
13002 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
13003 want to do that. */
13004 if (x
== const_true_rtx
)
13006 output_operand_lossage ("instruction never executed");
13009 if (!COMPARISON_P (x
))
13011 output_operand_lossage ("invalid operand for code '%c'", code
);
13015 fputs (arm_condition_codes
[ARM_INVERSE_CONDITION_CODE
13016 (get_arm_condition_code (x
))],
13020 /* Cirrus registers can be accessed in a variety of ways:
13021 single floating point (f)
13022 double floating point (d)
13024 64bit integer (dx). */
13025 case 'W': /* Cirrus register in F mode. */
13026 case 'X': /* Cirrus register in D mode. */
13027 case 'Y': /* Cirrus register in FX mode. */
13028 case 'Z': /* Cirrus register in DX mode. */
13029 gcc_assert (GET_CODE (x
) == REG
13030 && REGNO_REG_CLASS (REGNO (x
)) == CIRRUS_REGS
);
13032 fprintf (stream
, "mv%s%s",
13034 : code
== 'X' ? "d"
13035 : code
== 'Y' ? "fx" : "dx", reg_names
[REGNO (x
)] + 2);
13039 /* Print cirrus register in the mode specified by the register's mode. */
13042 int mode
= GET_MODE (x
);
13044 if (GET_CODE (x
) != REG
|| REGNO_REG_CLASS (REGNO (x
)) != CIRRUS_REGS
)
13046 output_operand_lossage ("invalid operand for code '%c'", code
);
13050 fprintf (stream
, "mv%s%s",
13051 mode
== DFmode
? "d"
13052 : mode
== SImode
? "fx"
13053 : mode
== DImode
? "dx"
13054 : "f", reg_names
[REGNO (x
)] + 2);
13060 if (GET_CODE (x
) != REG
13061 || REGNO (x
) < FIRST_IWMMXT_GR_REGNUM
13062 || REGNO (x
) > LAST_IWMMXT_GR_REGNUM
)
13063 /* Bad value for wCG register number. */
13065 output_operand_lossage ("invalid operand for code '%c'", code
);
13070 fprintf (stream
, "%d", REGNO (x
) - FIRST_IWMMXT_GR_REGNUM
);
13073 /* Print an iWMMXt control register name. */
13075 if (GET_CODE (x
) != CONST_INT
13077 || INTVAL (x
) >= 16)
13078 /* Bad value for wC register number. */
13080 output_operand_lossage ("invalid operand for code '%c'", code
);
13086 static const char * wc_reg_names
[16] =
13088 "wCID", "wCon", "wCSSF", "wCASF",
13089 "wC4", "wC5", "wC6", "wC7",
13090 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13091 "wC12", "wC13", "wC14", "wC15"
13094 fprintf (stream
, wc_reg_names
[INTVAL (x
)]);
13098 /* Print a VFP/Neon double precision or quad precision register name. */
13102 int mode
= GET_MODE (x
);
13103 int is_quad
= (code
== 'q');
13106 if (GET_MODE_SIZE (mode
) != (is_quad
? 16 : 8))
13108 output_operand_lossage ("invalid operand for code '%c'", code
);
13112 if (GET_CODE (x
) != REG
13113 || !IS_VFP_REGNUM (REGNO (x
)))
13115 output_operand_lossage ("invalid operand for code '%c'", code
);
13120 if ((is_quad
&& !NEON_REGNO_OK_FOR_QUAD (regno
))
13121 || (!is_quad
&& !VFP_REGNO_OK_FOR_DOUBLE (regno
)))
13123 output_operand_lossage ("invalid operand for code '%c'", code
);
13127 fprintf (stream
, "%c%d", is_quad
? 'q' : 'd',
13128 (regno
- FIRST_VFP_REGNUM
) >> (is_quad
? 2 : 1));
13132 /* These two codes print the low/high doubleword register of a Neon quad
13133 register, respectively. For pair-structure types, can also print
13134 low/high quadword registers. */
13138 int mode
= GET_MODE (x
);
13141 if ((GET_MODE_SIZE (mode
) != 16
13142 && GET_MODE_SIZE (mode
) != 32) || GET_CODE (x
) != REG
)
13144 output_operand_lossage ("invalid operand for code '%c'", code
);
13149 if (!NEON_REGNO_OK_FOR_QUAD (regno
))
13151 output_operand_lossage ("invalid operand for code '%c'", code
);
13155 if (GET_MODE_SIZE (mode
) == 16)
13156 fprintf (stream
, "d%d", ((regno
- FIRST_VFP_REGNUM
) >> 1)
13157 + (code
== 'f' ? 1 : 0));
13159 fprintf (stream
, "q%d", ((regno
- FIRST_VFP_REGNUM
) >> 2)
13160 + (code
== 'f' ? 1 : 0));
13164 /* Print a VFPv3 floating-point constant, represented as an integer
13168 int index
= vfp3_const_double_index (x
);
13169 gcc_assert (index
!= -1);
13170 fprintf (stream
, "%d", index
);
13174 /* Print bits representing opcode features for Neon.
13176 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13177 and polynomials as unsigned.
13179 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13181 Bit 2 is 1 for rounding functions, 0 otherwise. */
13183 /* Identify the type as 's', 'u', 'p' or 'f'. */
13186 HOST_WIDE_INT bits
= INTVAL (x
);
13187 fputc ("uspf"[bits
& 3], stream
);
13191 /* Likewise, but signed and unsigned integers are both 'i'. */
13194 HOST_WIDE_INT bits
= INTVAL (x
);
13195 fputc ("iipf"[bits
& 3], stream
);
13199 /* As for 'T', but emit 'u' instead of 'p'. */
13202 HOST_WIDE_INT bits
= INTVAL (x
);
13203 fputc ("usuf"[bits
& 3], stream
);
13207 /* Bit 2: rounding (vs none). */
13210 HOST_WIDE_INT bits
= INTVAL (x
);
13211 fputs ((bits
& 4) != 0 ? "r" : "", stream
);
13218 output_operand_lossage ("missing operand");
13222 switch (GET_CODE (x
))
13225 asm_fprintf (stream
, "%r", REGNO (x
));
13229 output_memory_reference_mode
= GET_MODE (x
);
13230 output_address (XEXP (x
, 0));
13237 real_to_decimal (fpstr
, CONST_DOUBLE_REAL_VALUE (x
),
13238 sizeof (fpstr
), 0, 1);
13239 fprintf (stream
, "#%s", fpstr
);
13242 fprintf (stream
, "#%s", fp_immediate_constant (x
));
13246 gcc_assert (GET_CODE (x
) != NEG
);
13247 fputc ('#', stream
);
13248 output_addr_const (stream
, x
);
13254 /* Target hook for assembling integer objects. The ARM version needs to
13255 handle word-sized values specially. */
13257 arm_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
13259 enum machine_mode mode
;
13261 if (size
== UNITS_PER_WORD
&& aligned_p
)
13263 fputs ("\t.word\t", asm_out_file
);
13264 output_addr_const (asm_out_file
, x
);
13266 /* Mark symbols as position independent. We only do this in the
13267 .text segment, not in the .data segment. */
13268 if (NEED_GOT_RELOC
&& flag_pic
&& making_const_table
&&
13269 (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
))
13271 /* See legitimize_pic_address for an explanation of the
13272 TARGET_VXWORKS_RTP check. */
13273 if (TARGET_VXWORKS_RTP
13274 || (GET_CODE (x
) == SYMBOL_REF
&& !SYMBOL_REF_LOCAL_P (x
)))
13275 fputs ("(GOT)", asm_out_file
);
13277 fputs ("(GOTOFF)", asm_out_file
);
13279 fputc ('\n', asm_out_file
);
13283 mode
= GET_MODE (x
);
13285 if (arm_vector_mode_supported_p (mode
))
13288 unsigned int invmask
= 0, parts_per_word
;
13290 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
13292 units
= CONST_VECTOR_NUNITS (x
);
13293 size
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
13295 /* For big-endian Neon vectors, we must permute the vector to the form
13296 which, when loaded by a VLDR or VLDM instruction, will give a vector
13297 with the elements in the right order. */
13298 if (TARGET_NEON
&& WORDS_BIG_ENDIAN
)
13300 parts_per_word
= UNITS_PER_WORD
/ size
;
13301 /* FIXME: This might be wrong for 64-bit vector elements, but we don't
13302 support those anywhere yet. */
13303 invmask
= (parts_per_word
== 0) ? 0 : (1 << (parts_per_word
- 1)) - 1;
13306 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
13307 for (i
= 0; i
< units
; i
++)
13309 rtx elt
= CONST_VECTOR_ELT (x
, i
^ invmask
);
13311 (elt
, size
, i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
, 1);
13314 for (i
= 0; i
< units
; i
++)
13316 rtx elt
= CONST_VECTOR_ELT (x
, i
);
13317 REAL_VALUE_TYPE rval
;
13319 REAL_VALUE_FROM_CONST_DOUBLE (rval
, elt
);
13322 (rval
, GET_MODE_INNER (mode
),
13323 i
== 0 ? BIGGEST_ALIGNMENT
: size
* BITS_PER_UNIT
);
13329 return default_assemble_integer (x
, size
, aligned_p
);
13333 arm_elf_asm_cdtor (rtx symbol
, int priority
, bool is_ctor
)
13337 if (!TARGET_AAPCS_BASED
)
13340 default_named_section_asm_out_constructor
13341 : default_named_section_asm_out_destructor
) (symbol
, priority
);
13345 /* Put these in the .init_array section, using a special relocation. */
13346 if (priority
!= DEFAULT_INIT_PRIORITY
)
13349 sprintf (buf
, "%s.%.5u",
13350 is_ctor
? ".init_array" : ".fini_array",
13352 s
= get_section (buf
, SECTION_WRITE
, NULL_TREE
);
13359 switch_to_section (s
);
13360 assemble_align (POINTER_SIZE
);
13361 fputs ("\t.word\t", asm_out_file
);
13362 output_addr_const (asm_out_file
, symbol
);
13363 fputs ("(target1)\n", asm_out_file
);
13366 /* Add a function to the list of static constructors. */
13369 arm_elf_asm_constructor (rtx symbol
, int priority
)
13371 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/true);
13374 /* Add a function to the list of static destructors. */
13377 arm_elf_asm_destructor (rtx symbol
, int priority
)
13379 arm_elf_asm_cdtor (symbol
, priority
, /*is_ctor=*/false);
13382 /* A finite state machine takes care of noticing whether or not instructions
13383 can be conditionally executed, and thus decrease execution time and code
13384 size by deleting branch instructions. The fsm is controlled by
13385 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13387 /* The state of the fsm controlling condition codes are:
13388 0: normal, do nothing special
13389 1: make ASM_OUTPUT_OPCODE not output this instruction
13390 2: make ASM_OUTPUT_OPCODE not output this instruction
13391 3: make instructions conditional
13392 4: make instructions conditional
13394 State transitions (state->state by whom under condition):
13395 0 -> 1 final_prescan_insn if the `target' is a label
13396 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13397 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13398 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13399 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13400 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13401 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13402 (the target insn is arm_target_insn).
13404 If the jump clobbers the conditions then we use states 2 and 4.
13406 A similar thing can be done with conditional return insns.
13408 XXX In case the `target' is an unconditional branch, this conditionalising
13409 of the instructions always reduces code size, but not always execution
13410 time. But then, I want to reduce the code size to somewhere near what
13411 /bin/cc produces. */
13413 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13414 instructions. When a COND_EXEC instruction is seen the subsequent
13415 instructions are scanned so that multiple conditional instructions can be
13416 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13417 specify the length and true/false mask for the IT block. These will be
13418 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13420 /* Returns the index of the ARM condition code string in
13421 `arm_condition_codes'. COMPARISON should be an rtx like
13422 `(eq (...) (...))'. */
13423 static enum arm_cond_code
13424 get_arm_condition_code (rtx comparison
)
13426 enum machine_mode mode
= GET_MODE (XEXP (comparison
, 0));
13428 enum rtx_code comp_code
= GET_CODE (comparison
);
13430 if (GET_MODE_CLASS (mode
) != MODE_CC
)
13431 mode
= SELECT_CC_MODE (comp_code
, XEXP (comparison
, 0),
13432 XEXP (comparison
, 1));
13436 case CC_DNEmode
: code
= ARM_NE
; goto dominance
;
13437 case CC_DEQmode
: code
= ARM_EQ
; goto dominance
;
13438 case CC_DGEmode
: code
= ARM_GE
; goto dominance
;
13439 case CC_DGTmode
: code
= ARM_GT
; goto dominance
;
13440 case CC_DLEmode
: code
= ARM_LE
; goto dominance
;
13441 case CC_DLTmode
: code
= ARM_LT
; goto dominance
;
13442 case CC_DGEUmode
: code
= ARM_CS
; goto dominance
;
13443 case CC_DGTUmode
: code
= ARM_HI
; goto dominance
;
13444 case CC_DLEUmode
: code
= ARM_LS
; goto dominance
;
13445 case CC_DLTUmode
: code
= ARM_CC
;
13448 gcc_assert (comp_code
== EQ
|| comp_code
== NE
);
13450 if (comp_code
== EQ
)
13451 return ARM_INVERSE_CONDITION_CODE (code
);
13457 case NE
: return ARM_NE
;
13458 case EQ
: return ARM_EQ
;
13459 case GE
: return ARM_PL
;
13460 case LT
: return ARM_MI
;
13461 default: gcc_unreachable ();
13467 case NE
: return ARM_NE
;
13468 case EQ
: return ARM_EQ
;
13469 default: gcc_unreachable ();
13475 case NE
: return ARM_MI
;
13476 case EQ
: return ARM_PL
;
13477 default: gcc_unreachable ();
13482 /* These encodings assume that AC=1 in the FPA system control
13483 byte. This allows us to handle all cases except UNEQ and
13487 case GE
: return ARM_GE
;
13488 case GT
: return ARM_GT
;
13489 case LE
: return ARM_LS
;
13490 case LT
: return ARM_MI
;
13491 case NE
: return ARM_NE
;
13492 case EQ
: return ARM_EQ
;
13493 case ORDERED
: return ARM_VC
;
13494 case UNORDERED
: return ARM_VS
;
13495 case UNLT
: return ARM_LT
;
13496 case UNLE
: return ARM_LE
;
13497 case UNGT
: return ARM_HI
;
13498 case UNGE
: return ARM_PL
;
13499 /* UNEQ and LTGT do not have a representation. */
13500 case UNEQ
: /* Fall through. */
13501 case LTGT
: /* Fall through. */
13502 default: gcc_unreachable ();
13508 case NE
: return ARM_NE
;
13509 case EQ
: return ARM_EQ
;
13510 case GE
: return ARM_LE
;
13511 case GT
: return ARM_LT
;
13512 case LE
: return ARM_GE
;
13513 case LT
: return ARM_GT
;
13514 case GEU
: return ARM_LS
;
13515 case GTU
: return ARM_CC
;
13516 case LEU
: return ARM_CS
;
13517 case LTU
: return ARM_HI
;
13518 default: gcc_unreachable ();
13524 case LTU
: return ARM_CS
;
13525 case GEU
: return ARM_CC
;
13526 default: gcc_unreachable ();
13532 case NE
: return ARM_NE
;
13533 case EQ
: return ARM_EQ
;
13534 case GE
: return ARM_GE
;
13535 case GT
: return ARM_GT
;
13536 case LE
: return ARM_LE
;
13537 case LT
: return ARM_LT
;
13538 case GEU
: return ARM_CS
;
13539 case GTU
: return ARM_HI
;
13540 case LEU
: return ARM_LS
;
13541 case LTU
: return ARM_CC
;
13542 default: gcc_unreachable ();
13545 default: gcc_unreachable ();
13549 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13552 thumb2_final_prescan_insn (rtx insn
)
13554 rtx first_insn
= insn
;
13555 rtx body
= PATTERN (insn
);
13557 enum arm_cond_code code
;
13561 /* Remove the previous insn from the count of insns to be output. */
13562 if (arm_condexec_count
)
13563 arm_condexec_count
--;
13565 /* Nothing to do if we are already inside a conditional block. */
13566 if (arm_condexec_count
)
13569 if (GET_CODE (body
) != COND_EXEC
)
13572 /* Conditional jumps are implemented directly. */
13573 if (GET_CODE (insn
) == JUMP_INSN
)
13576 predicate
= COND_EXEC_TEST (body
);
13577 arm_current_cc
= get_arm_condition_code (predicate
);
13579 n
= get_attr_ce_count (insn
);
13580 arm_condexec_count
= 1;
13581 arm_condexec_mask
= (1 << n
) - 1;
13582 arm_condexec_masklen
= n
;
13583 /* See if subsequent instructions can be combined into the same block. */
13586 insn
= next_nonnote_insn (insn
);
13588 /* Jumping into the middle of an IT block is illegal, so a label or
13589 barrier terminates the block. */
13590 if (GET_CODE (insn
) != INSN
&& GET_CODE(insn
) != JUMP_INSN
)
13593 body
= PATTERN (insn
);
13594 /* USE and CLOBBER aren't really insns, so just skip them. */
13595 if (GET_CODE (body
) == USE
13596 || GET_CODE (body
) == CLOBBER
)
13599 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13600 if (GET_CODE (body
) != COND_EXEC
)
13602 /* Allow up to 4 conditionally executed instructions in a block. */
13603 n
= get_attr_ce_count (insn
);
13604 if (arm_condexec_masklen
+ n
> 4)
13607 predicate
= COND_EXEC_TEST (body
);
13608 code
= get_arm_condition_code (predicate
);
13609 mask
= (1 << n
) - 1;
13610 if (arm_current_cc
== code
)
13611 arm_condexec_mask
|= (mask
<< arm_condexec_masklen
);
13612 else if (arm_current_cc
!= ARM_INVERSE_CONDITION_CODE(code
))
13615 arm_condexec_count
++;
13616 arm_condexec_masklen
+= n
;
13618 /* A jump must be the last instruction in a conditional block. */
13619 if (GET_CODE(insn
) == JUMP_INSN
)
13622 /* Restore recog_data (getting the attributes of other insns can
13623 destroy this array, but final.c assumes that it remains intact
13624 across this call). */
13625 extract_constrain_insn_cached (first_insn
);
13629 arm_final_prescan_insn (rtx insn
)
13631 /* BODY will hold the body of INSN. */
13632 rtx body
= PATTERN (insn
);
13634 /* This will be 1 if trying to repeat the trick, and things need to be
13635 reversed if it appears to fail. */
13638 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13639 taken are clobbered, even if the rtl suggests otherwise. It also
13640 means that we have to grub around within the jump expression to find
13641 out what the conditions are when the jump isn't taken. */
13642 int jump_clobbers
= 0;
13644 /* If we start with a return insn, we only succeed if we find another one. */
13645 int seeking_return
= 0;
13647 /* START_INSN will hold the insn from where we start looking. This is the
13648 first insn after the following code_label if REVERSE is true. */
13649 rtx start_insn
= insn
;
13651 /* If in state 4, check if the target branch is reached, in order to
13652 change back to state 0. */
13653 if (arm_ccfsm_state
== 4)
13655 if (insn
== arm_target_insn
)
13657 arm_target_insn
= NULL
;
13658 arm_ccfsm_state
= 0;
13663 /* If in state 3, it is possible to repeat the trick, if this insn is an
13664 unconditional branch to a label, and immediately following this branch
13665 is the previous target label which is only used once, and the label this
13666 branch jumps to is not too far off. */
13667 if (arm_ccfsm_state
== 3)
13669 if (simplejump_p (insn
))
13671 start_insn
= next_nonnote_insn (start_insn
);
13672 if (GET_CODE (start_insn
) == BARRIER
)
13674 /* XXX Isn't this always a barrier? */
13675 start_insn
= next_nonnote_insn (start_insn
);
13677 if (GET_CODE (start_insn
) == CODE_LABEL
13678 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
13679 && LABEL_NUSES (start_insn
) == 1)
13684 else if (GET_CODE (body
) == RETURN
)
13686 start_insn
= next_nonnote_insn (start_insn
);
13687 if (GET_CODE (start_insn
) == BARRIER
)
13688 start_insn
= next_nonnote_insn (start_insn
);
13689 if (GET_CODE (start_insn
) == CODE_LABEL
13690 && CODE_LABEL_NUMBER (start_insn
) == arm_target_label
13691 && LABEL_NUSES (start_insn
) == 1)
13694 seeking_return
= 1;
13703 gcc_assert (!arm_ccfsm_state
|| reverse
);
13704 if (GET_CODE (insn
) != JUMP_INSN
)
13707 /* This jump might be paralleled with a clobber of the condition codes
13708 the jump should always come first */
13709 if (GET_CODE (body
) == PARALLEL
&& XVECLEN (body
, 0) > 0)
13710 body
= XVECEXP (body
, 0, 0);
13713 || (GET_CODE (body
) == SET
&& GET_CODE (SET_DEST (body
)) == PC
13714 && GET_CODE (SET_SRC (body
)) == IF_THEN_ELSE
))
13717 int fail
= FALSE
, succeed
= FALSE
;
13718 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13719 int then_not_else
= TRUE
;
13720 rtx this_insn
= start_insn
, label
= 0;
13722 /* If the jump cannot be done with one instruction, we cannot
13723 conditionally execute the instruction in the inverse case. */
13724 if (get_attr_conds (insn
) == CONDS_JUMP_CLOB
)
13730 /* Register the insn jumped to. */
13733 if (!seeking_return
)
13734 label
= XEXP (SET_SRC (body
), 0);
13736 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == LABEL_REF
)
13737 label
= XEXP (XEXP (SET_SRC (body
), 1), 0);
13738 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == LABEL_REF
)
13740 label
= XEXP (XEXP (SET_SRC (body
), 2), 0);
13741 then_not_else
= FALSE
;
13743 else if (GET_CODE (XEXP (SET_SRC (body
), 1)) == RETURN
)
13744 seeking_return
= 1;
13745 else if (GET_CODE (XEXP (SET_SRC (body
), 2)) == RETURN
)
13747 seeking_return
= 1;
13748 then_not_else
= FALSE
;
13751 gcc_unreachable ();
13753 /* See how many insns this branch skips, and what kind of insns. If all
13754 insns are okay, and the label or unconditional branch to the same
13755 label is not too far away, succeed. */
13756 for (insns_skipped
= 0;
13757 !fail
&& !succeed
&& insns_skipped
++ < max_insns_skipped
;)
13761 this_insn
= next_nonnote_insn (this_insn
);
13765 switch (GET_CODE (this_insn
))
13768 /* Succeed if it is the target label, otherwise fail since
13769 control falls in from somewhere else. */
13770 if (this_insn
== label
)
13774 arm_ccfsm_state
= 2;
13775 this_insn
= next_nonnote_insn (this_insn
);
13778 arm_ccfsm_state
= 1;
13786 /* Succeed if the following insn is the target label.
13788 If return insns are used then the last insn in a function
13789 will be a barrier. */
13790 this_insn
= next_nonnote_insn (this_insn
);
13791 if (this_insn
&& this_insn
== label
)
13795 arm_ccfsm_state
= 2;
13796 this_insn
= next_nonnote_insn (this_insn
);
13799 arm_ccfsm_state
= 1;
13807 /* The AAPCS says that conditional calls should not be
13808 used since they make interworking inefficient (the
13809 linker can't transform BL<cond> into BLX). That's
13810 only a problem if the machine has BLX. */
13817 /* Succeed if the following insn is the target label, or
13818 if the following two insns are a barrier and the
13820 this_insn
= next_nonnote_insn (this_insn
);
13821 if (this_insn
&& GET_CODE (this_insn
) == BARRIER
)
13822 this_insn
= next_nonnote_insn (this_insn
);
13824 if (this_insn
&& this_insn
== label
13825 && insns_skipped
< max_insns_skipped
)
13829 arm_ccfsm_state
= 2;
13830 this_insn
= next_nonnote_insn (this_insn
);
13833 arm_ccfsm_state
= 1;
13841 /* If this is an unconditional branch to the same label, succeed.
13842 If it is to another label, do nothing. If it is conditional,
13844 /* XXX Probably, the tests for SET and the PC are
13847 scanbody
= PATTERN (this_insn
);
13848 if (GET_CODE (scanbody
) == SET
13849 && GET_CODE (SET_DEST (scanbody
)) == PC
)
13851 if (GET_CODE (SET_SRC (scanbody
)) == LABEL_REF
13852 && XEXP (SET_SRC (scanbody
), 0) == label
&& !reverse
)
13854 arm_ccfsm_state
= 2;
13857 else if (GET_CODE (SET_SRC (scanbody
)) == IF_THEN_ELSE
)
13860 /* Fail if a conditional return is undesirable (e.g. on a
13861 StrongARM), but still allow this if optimizing for size. */
13862 else if (GET_CODE (scanbody
) == RETURN
13863 && !use_return_insn (TRUE
, NULL
)
13866 else if (GET_CODE (scanbody
) == RETURN
13869 arm_ccfsm_state
= 2;
13872 else if (GET_CODE (scanbody
) == PARALLEL
)
13874 switch (get_attr_conds (this_insn
))
13884 fail
= TRUE
; /* Unrecognized jump (e.g. epilogue). */
13889 /* Instructions using or affecting the condition codes make it
13891 scanbody
= PATTERN (this_insn
);
13892 if (!(GET_CODE (scanbody
) == SET
13893 || GET_CODE (scanbody
) == PARALLEL
)
13894 || get_attr_conds (this_insn
) != CONDS_NOCOND
)
13897 /* A conditional cirrus instruction must be followed by
13898 a non Cirrus instruction. However, since we
13899 conditionalize instructions in this function and by
13900 the time we get here we can't add instructions
13901 (nops), because shorten_branches() has already been
13902 called, we will disable conditionalizing Cirrus
13903 instructions to be safe. */
13904 if (GET_CODE (scanbody
) != USE
13905 && GET_CODE (scanbody
) != CLOBBER
13906 && get_attr_cirrus (this_insn
) != CIRRUS_NOT
)
13916 if ((!seeking_return
) && (arm_ccfsm_state
== 1 || reverse
))
13917 arm_target_label
= CODE_LABEL_NUMBER (label
);
13920 gcc_assert (seeking_return
|| arm_ccfsm_state
== 2);
13922 while (this_insn
&& GET_CODE (PATTERN (this_insn
)) == USE
)
13924 this_insn
= next_nonnote_insn (this_insn
);
13925 gcc_assert (!this_insn
13926 || (GET_CODE (this_insn
) != BARRIER
13927 && GET_CODE (this_insn
) != CODE_LABEL
));
13931 /* Oh, dear! we ran off the end.. give up. */
13932 extract_constrain_insn_cached (insn
);
13933 arm_ccfsm_state
= 0;
13934 arm_target_insn
= NULL
;
13937 arm_target_insn
= this_insn
;
13941 gcc_assert (!reverse
);
13943 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body
),
13945 if (GET_CODE (XEXP (XEXP (SET_SRC (body
), 0), 0)) == AND
)
13946 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
13947 if (GET_CODE (XEXP (SET_SRC (body
), 0)) == NE
)
13948 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
13952 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
13955 arm_current_cc
= get_arm_condition_code (XEXP (SET_SRC (body
),
13959 if (reverse
|| then_not_else
)
13960 arm_current_cc
= ARM_INVERSE_CONDITION_CODE (arm_current_cc
);
13963 /* Restore recog_data (getting the attributes of other insns can
13964 destroy this array, but final.c assumes that it remains intact
13965 across this call. */
13966 extract_constrain_insn_cached (insn
);
13970 /* Output IT instructions. */
13972 thumb2_asm_output_opcode (FILE * stream
)
13977 if (arm_condexec_mask
)
13979 for (n
= 0; n
< arm_condexec_masklen
; n
++)
13980 buff
[n
] = (arm_condexec_mask
& (1 << n
)) ? 't' : 'e';
13982 asm_fprintf(stream
, "i%s\t%s\n\t", buff
,
13983 arm_condition_codes
[arm_current_cc
]);
13984 arm_condexec_mask
= 0;
13988 /* Returns true if REGNO is a valid register
13989 for holding a quantity of type MODE. */
13991 arm_hard_regno_mode_ok (unsigned int regno
, enum machine_mode mode
)
13993 if (GET_MODE_CLASS (mode
) == MODE_CC
)
13994 return (regno
== CC_REGNUM
13995 || (TARGET_HARD_FLOAT
&& TARGET_VFP
13996 && regno
== VFPCC_REGNUM
));
13999 /* For the Thumb we only allow values bigger than SImode in
14000 registers 0 - 6, so that there is always a second low
14001 register available to hold the upper part of the value.
14002 We probably we ought to ensure that the register is the
14003 start of an even numbered register pair. */
14004 return (ARM_NUM_REGS (mode
) < 2) || (regno
< LAST_LO_REGNUM
);
14006 if (TARGET_HARD_FLOAT
&& TARGET_MAVERICK
14007 && IS_CIRRUS_REGNUM (regno
))
14008 /* We have outlawed SI values in Cirrus registers because they
14009 reside in the lower 32 bits, but SF values reside in the
14010 upper 32 bits. This causes gcc all sorts of grief. We can't
14011 even split the registers into pairs because Cirrus SI values
14012 get sign extended to 64bits-- aldyh. */
14013 return (GET_MODE_CLASS (mode
) == MODE_FLOAT
) || (mode
== DImode
);
14015 if (TARGET_HARD_FLOAT
&& TARGET_VFP
14016 && IS_VFP_REGNUM (regno
))
14018 if (mode
== SFmode
|| mode
== SImode
)
14019 return VFP_REGNO_OK_FOR_SINGLE (regno
);
14021 if (mode
== DFmode
)
14022 return VFP_REGNO_OK_FOR_DOUBLE (regno
);
14025 return (VALID_NEON_DREG_MODE (mode
) && VFP_REGNO_OK_FOR_DOUBLE (regno
))
14026 || (VALID_NEON_QREG_MODE (mode
)
14027 && NEON_REGNO_OK_FOR_QUAD (regno
))
14028 || (mode
== TImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 2))
14029 || (mode
== EImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 3))
14030 || (mode
== OImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 4))
14031 || (mode
== CImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 6))
14032 || (mode
== XImode
&& NEON_REGNO_OK_FOR_NREGS (regno
, 8));
14037 if (TARGET_REALLY_IWMMXT
)
14039 if (IS_IWMMXT_GR_REGNUM (regno
))
14040 return mode
== SImode
;
14042 if (IS_IWMMXT_REGNUM (regno
))
14043 return VALID_IWMMXT_REG_MODE (mode
);
14046 /* We allow any value to be stored in the general registers.
14047 Restrict doubleword quantities to even register pairs so that we can
14048 use ldrd. Do not allow Neon structure opaque modes in general registers;
14049 they would use too many. */
14050 if (regno
<= LAST_ARM_REGNUM
)
14051 return !(TARGET_LDRD
&& GET_MODE_SIZE (mode
) > 4 && (regno
& 1) != 0)
14052 && !VALID_NEON_STRUCT_MODE (mode
);
14054 if (regno
== FRAME_POINTER_REGNUM
14055 || regno
== ARG_POINTER_REGNUM
)
14056 /* We only allow integers in the fake hard registers. */
14057 return GET_MODE_CLASS (mode
) == MODE_INT
;
14059 /* The only registers left are the FPA registers
14060 which we only allow to hold FP values. */
14061 return (TARGET_HARD_FLOAT
&& TARGET_FPA
14062 && GET_MODE_CLASS (mode
) == MODE_FLOAT
14063 && regno
>= FIRST_FPA_REGNUM
14064 && regno
<= LAST_FPA_REGNUM
);
14067 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14068 not used in arm mode. */
14070 arm_regno_class (int regno
)
14074 if (regno
== STACK_POINTER_REGNUM
)
14076 if (regno
== CC_REGNUM
)
14083 if (TARGET_THUMB2
&& regno
< 8)
14086 if ( regno
<= LAST_ARM_REGNUM
14087 || regno
== FRAME_POINTER_REGNUM
14088 || regno
== ARG_POINTER_REGNUM
)
14089 return TARGET_THUMB2
? HI_REGS
: GENERAL_REGS
;
14091 if (regno
== CC_REGNUM
|| regno
== VFPCC_REGNUM
)
14092 return TARGET_THUMB2
? CC_REG
: NO_REGS
;
14094 if (IS_CIRRUS_REGNUM (regno
))
14095 return CIRRUS_REGS
;
14097 if (IS_VFP_REGNUM (regno
))
14099 if (regno
<= D7_VFP_REGNUM
)
14100 return VFP_D0_D7_REGS
;
14101 else if (regno
<= LAST_LO_VFP_REGNUM
)
14102 return VFP_LO_REGS
;
14104 return VFP_HI_REGS
;
14107 if (IS_IWMMXT_REGNUM (regno
))
14108 return IWMMXT_REGS
;
14110 if (IS_IWMMXT_GR_REGNUM (regno
))
14111 return IWMMXT_GR_REGS
;
14116 /* Handle a special case when computing the offset
14117 of an argument from the frame pointer. */
14119 arm_debugger_arg_offset (int value
, rtx addr
)
14123 /* We are only interested if dbxout_parms() failed to compute the offset. */
14127 /* We can only cope with the case where the address is held in a register. */
14128 if (GET_CODE (addr
) != REG
)
14131 /* If we are using the frame pointer to point at the argument, then
14132 an offset of 0 is correct. */
14133 if (REGNO (addr
) == (unsigned) HARD_FRAME_POINTER_REGNUM
)
14136 /* If we are using the stack pointer to point at the
14137 argument, then an offset of 0 is correct. */
14138 /* ??? Check this is consistent with thumb2 frame layout. */
14139 if ((TARGET_THUMB
|| !frame_pointer_needed
)
14140 && REGNO (addr
) == SP_REGNUM
)
14143 /* Oh dear. The argument is pointed to by a register rather
14144 than being held in a register, or being stored at a known
14145 offset from the frame pointer. Since GDB only understands
14146 those two kinds of argument we must translate the address
14147 held in the register into an offset from the frame pointer.
14148 We do this by searching through the insns for the function
14149 looking to see where this register gets its value. If the
14150 register is initialized from the frame pointer plus an offset
14151 then we are in luck and we can continue, otherwise we give up.
14153 This code is exercised by producing debugging information
14154 for a function with arguments like this:
14156 double func (double a, double b, int c, double d) {return d;}
14158 Without this code the stab for parameter 'd' will be set to
14159 an offset of 0 from the frame pointer, rather than 8. */
14161 /* The if() statement says:
14163 If the insn is a normal instruction
14164 and if the insn is setting the value in a register
14165 and if the register being set is the register holding the address of the argument
14166 and if the address is computing by an addition
14167 that involves adding to a register
14168 which is the frame pointer
14173 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14175 if ( GET_CODE (insn
) == INSN
14176 && GET_CODE (PATTERN (insn
)) == SET
14177 && REGNO (XEXP (PATTERN (insn
), 0)) == REGNO (addr
)
14178 && GET_CODE (XEXP (PATTERN (insn
), 1)) == PLUS
14179 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 0)) == REG
14180 && REGNO (XEXP (XEXP (PATTERN (insn
), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14181 && GET_CODE (XEXP (XEXP (PATTERN (insn
), 1), 1)) == CONST_INT
14184 value
= INTVAL (XEXP (XEXP (PATTERN (insn
), 1), 1));
14193 warning (0, "unable to compute real location of stacked parameter");
14194 value
= 8; /* XXX magic hack */
14200 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14203 if ((MASK) & insn_flags) \
14204 add_builtin_function ((NAME), (TYPE), (CODE), \
14205 BUILT_IN_MD, NULL, NULL_TREE); \
14209 struct builtin_description
14211 const unsigned int mask
;
14212 const enum insn_code icode
;
14213 const char * const name
;
14214 const enum arm_builtins code
;
14215 const enum rtx_code comparison
;
14216 const unsigned int flag
;
14219 static const struct builtin_description bdesc_2arg
[] =
14221 #define IWMMXT_BUILTIN(code, string, builtin) \
14222 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14223 ARM_BUILTIN_##builtin, 0, 0 },
14225 IWMMXT_BUILTIN (addv8qi3
, "waddb", WADDB
)
14226 IWMMXT_BUILTIN (addv4hi3
, "waddh", WADDH
)
14227 IWMMXT_BUILTIN (addv2si3
, "waddw", WADDW
)
14228 IWMMXT_BUILTIN (subv8qi3
, "wsubb", WSUBB
)
14229 IWMMXT_BUILTIN (subv4hi3
, "wsubh", WSUBH
)
14230 IWMMXT_BUILTIN (subv2si3
, "wsubw", WSUBW
)
14231 IWMMXT_BUILTIN (ssaddv8qi3
, "waddbss", WADDSSB
)
14232 IWMMXT_BUILTIN (ssaddv4hi3
, "waddhss", WADDSSH
)
14233 IWMMXT_BUILTIN (ssaddv2si3
, "waddwss", WADDSSW
)
14234 IWMMXT_BUILTIN (sssubv8qi3
, "wsubbss", WSUBSSB
)
14235 IWMMXT_BUILTIN (sssubv4hi3
, "wsubhss", WSUBSSH
)
14236 IWMMXT_BUILTIN (sssubv2si3
, "wsubwss", WSUBSSW
)
14237 IWMMXT_BUILTIN (usaddv8qi3
, "waddbus", WADDUSB
)
14238 IWMMXT_BUILTIN (usaddv4hi3
, "waddhus", WADDUSH
)
14239 IWMMXT_BUILTIN (usaddv2si3
, "waddwus", WADDUSW
)
14240 IWMMXT_BUILTIN (ussubv8qi3
, "wsubbus", WSUBUSB
)
14241 IWMMXT_BUILTIN (ussubv4hi3
, "wsubhus", WSUBUSH
)
14242 IWMMXT_BUILTIN (ussubv2si3
, "wsubwus", WSUBUSW
)
14243 IWMMXT_BUILTIN (mulv4hi3
, "wmulul", WMULUL
)
14244 IWMMXT_BUILTIN (smulv4hi3_highpart
, "wmulsm", WMULSM
)
14245 IWMMXT_BUILTIN (umulv4hi3_highpart
, "wmulum", WMULUM
)
14246 IWMMXT_BUILTIN (eqv8qi3
, "wcmpeqb", WCMPEQB
)
14247 IWMMXT_BUILTIN (eqv4hi3
, "wcmpeqh", WCMPEQH
)
14248 IWMMXT_BUILTIN (eqv2si3
, "wcmpeqw", WCMPEQW
)
14249 IWMMXT_BUILTIN (gtuv8qi3
, "wcmpgtub", WCMPGTUB
)
14250 IWMMXT_BUILTIN (gtuv4hi3
, "wcmpgtuh", WCMPGTUH
)
14251 IWMMXT_BUILTIN (gtuv2si3
, "wcmpgtuw", WCMPGTUW
)
14252 IWMMXT_BUILTIN (gtv8qi3
, "wcmpgtsb", WCMPGTSB
)
14253 IWMMXT_BUILTIN (gtv4hi3
, "wcmpgtsh", WCMPGTSH
)
14254 IWMMXT_BUILTIN (gtv2si3
, "wcmpgtsw", WCMPGTSW
)
14255 IWMMXT_BUILTIN (umaxv8qi3
, "wmaxub", WMAXUB
)
14256 IWMMXT_BUILTIN (smaxv8qi3
, "wmaxsb", WMAXSB
)
14257 IWMMXT_BUILTIN (umaxv4hi3
, "wmaxuh", WMAXUH
)
14258 IWMMXT_BUILTIN (smaxv4hi3
, "wmaxsh", WMAXSH
)
14259 IWMMXT_BUILTIN (umaxv2si3
, "wmaxuw", WMAXUW
)
14260 IWMMXT_BUILTIN (smaxv2si3
, "wmaxsw", WMAXSW
)
14261 IWMMXT_BUILTIN (uminv8qi3
, "wminub", WMINUB
)
14262 IWMMXT_BUILTIN (sminv8qi3
, "wminsb", WMINSB
)
14263 IWMMXT_BUILTIN (uminv4hi3
, "wminuh", WMINUH
)
14264 IWMMXT_BUILTIN (sminv4hi3
, "wminsh", WMINSH
)
14265 IWMMXT_BUILTIN (uminv2si3
, "wminuw", WMINUW
)
14266 IWMMXT_BUILTIN (sminv2si3
, "wminsw", WMINSW
)
14267 IWMMXT_BUILTIN (iwmmxt_anddi3
, "wand", WAND
)
14268 IWMMXT_BUILTIN (iwmmxt_nanddi3
, "wandn", WANDN
)
14269 IWMMXT_BUILTIN (iwmmxt_iordi3
, "wor", WOR
)
14270 IWMMXT_BUILTIN (iwmmxt_xordi3
, "wxor", WXOR
)
14271 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3
, "wavg2b", WAVG2B
)
14272 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3
, "wavg2h", WAVG2H
)
14273 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3
, "wavg2br", WAVG2BR
)
14274 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3
, "wavg2hr", WAVG2HR
)
14275 IWMMXT_BUILTIN (iwmmxt_wunpckilb
, "wunpckilb", WUNPCKILB
)
14276 IWMMXT_BUILTIN (iwmmxt_wunpckilh
, "wunpckilh", WUNPCKILH
)
14277 IWMMXT_BUILTIN (iwmmxt_wunpckilw
, "wunpckilw", WUNPCKILW
)
14278 IWMMXT_BUILTIN (iwmmxt_wunpckihb
, "wunpckihb", WUNPCKIHB
)
14279 IWMMXT_BUILTIN (iwmmxt_wunpckihh
, "wunpckihh", WUNPCKIHH
)
14280 IWMMXT_BUILTIN (iwmmxt_wunpckihw
, "wunpckihw", WUNPCKIHW
)
14281 IWMMXT_BUILTIN (iwmmxt_wmadds
, "wmadds", WMADDS
)
14282 IWMMXT_BUILTIN (iwmmxt_wmaddu
, "wmaddu", WMADDU
)
14284 #define IWMMXT_BUILTIN2(code, builtin) \
14285 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14287 IWMMXT_BUILTIN2 (iwmmxt_wpackhss
, WPACKHSS
)
14288 IWMMXT_BUILTIN2 (iwmmxt_wpackwss
, WPACKWSS
)
14289 IWMMXT_BUILTIN2 (iwmmxt_wpackdss
, WPACKDSS
)
14290 IWMMXT_BUILTIN2 (iwmmxt_wpackhus
, WPACKHUS
)
14291 IWMMXT_BUILTIN2 (iwmmxt_wpackwus
, WPACKWUS
)
14292 IWMMXT_BUILTIN2 (iwmmxt_wpackdus
, WPACKDUS
)
14293 IWMMXT_BUILTIN2 (ashlv4hi3_di
, WSLLH
)
14294 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt
, WSLLHI
)
14295 IWMMXT_BUILTIN2 (ashlv2si3_di
, WSLLW
)
14296 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt
, WSLLWI
)
14297 IWMMXT_BUILTIN2 (ashldi3_di
, WSLLD
)
14298 IWMMXT_BUILTIN2 (ashldi3_iwmmxt
, WSLLDI
)
14299 IWMMXT_BUILTIN2 (lshrv4hi3_di
, WSRLH
)
14300 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt
, WSRLHI
)
14301 IWMMXT_BUILTIN2 (lshrv2si3_di
, WSRLW
)
14302 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt
, WSRLWI
)
14303 IWMMXT_BUILTIN2 (lshrdi3_di
, WSRLD
)
14304 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt
, WSRLDI
)
14305 IWMMXT_BUILTIN2 (ashrv4hi3_di
, WSRAH
)
14306 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt
, WSRAHI
)
14307 IWMMXT_BUILTIN2 (ashrv2si3_di
, WSRAW
)
14308 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt
, WSRAWI
)
14309 IWMMXT_BUILTIN2 (ashrdi3_di
, WSRAD
)
14310 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt
, WSRADI
)
14311 IWMMXT_BUILTIN2 (rorv4hi3_di
, WRORH
)
14312 IWMMXT_BUILTIN2 (rorv4hi3
, WRORHI
)
14313 IWMMXT_BUILTIN2 (rorv2si3_di
, WRORW
)
14314 IWMMXT_BUILTIN2 (rorv2si3
, WRORWI
)
14315 IWMMXT_BUILTIN2 (rordi3_di
, WRORD
)
14316 IWMMXT_BUILTIN2 (rordi3
, WRORDI
)
14317 IWMMXT_BUILTIN2 (iwmmxt_wmacuz
, WMACUZ
)
14318 IWMMXT_BUILTIN2 (iwmmxt_wmacsz
, WMACSZ
)
14321 static const struct builtin_description bdesc_1arg
[] =
14323 IWMMXT_BUILTIN (iwmmxt_tmovmskb
, "tmovmskb", TMOVMSKB
)
14324 IWMMXT_BUILTIN (iwmmxt_tmovmskh
, "tmovmskh", TMOVMSKH
)
14325 IWMMXT_BUILTIN (iwmmxt_tmovmskw
, "tmovmskw", TMOVMSKW
)
14326 IWMMXT_BUILTIN (iwmmxt_waccb
, "waccb", WACCB
)
14327 IWMMXT_BUILTIN (iwmmxt_wacch
, "wacch", WACCH
)
14328 IWMMXT_BUILTIN (iwmmxt_waccw
, "waccw", WACCW
)
14329 IWMMXT_BUILTIN (iwmmxt_wunpckehub
, "wunpckehub", WUNPCKEHUB
)
14330 IWMMXT_BUILTIN (iwmmxt_wunpckehuh
, "wunpckehuh", WUNPCKEHUH
)
14331 IWMMXT_BUILTIN (iwmmxt_wunpckehuw
, "wunpckehuw", WUNPCKEHUW
)
14332 IWMMXT_BUILTIN (iwmmxt_wunpckehsb
, "wunpckehsb", WUNPCKEHSB
)
14333 IWMMXT_BUILTIN (iwmmxt_wunpckehsh
, "wunpckehsh", WUNPCKEHSH
)
14334 IWMMXT_BUILTIN (iwmmxt_wunpckehsw
, "wunpckehsw", WUNPCKEHSW
)
14335 IWMMXT_BUILTIN (iwmmxt_wunpckelub
, "wunpckelub", WUNPCKELUB
)
14336 IWMMXT_BUILTIN (iwmmxt_wunpckeluh
, "wunpckeluh", WUNPCKELUH
)
14337 IWMMXT_BUILTIN (iwmmxt_wunpckeluw
, "wunpckeluw", WUNPCKELUW
)
14338 IWMMXT_BUILTIN (iwmmxt_wunpckelsb
, "wunpckelsb", WUNPCKELSB
)
14339 IWMMXT_BUILTIN (iwmmxt_wunpckelsh
, "wunpckelsh", WUNPCKELSH
)
14340 IWMMXT_BUILTIN (iwmmxt_wunpckelsw
, "wunpckelsw", WUNPCKELSW
)
14343 /* Set up all the iWMMXt builtins. This is
14344 not called if TARGET_IWMMXT is zero. */
14347 arm_init_iwmmxt_builtins (void)
14349 const struct builtin_description
* d
;
14351 tree endlink
= void_list_node
;
14353 tree V2SI_type_node
= build_vector_type_for_mode (intSI_type_node
, V2SImode
);
14354 tree V4HI_type_node
= build_vector_type_for_mode (intHI_type_node
, V4HImode
);
14355 tree V8QI_type_node
= build_vector_type_for_mode (intQI_type_node
, V8QImode
);
14358 = build_function_type (integer_type_node
,
14359 tree_cons (NULL_TREE
, integer_type_node
, endlink
));
14360 tree v8qi_ftype_v8qi_v8qi_int
14361 = build_function_type (V8QI_type_node
,
14362 tree_cons (NULL_TREE
, V8QI_type_node
,
14363 tree_cons (NULL_TREE
, V8QI_type_node
,
14364 tree_cons (NULL_TREE
,
14367 tree v4hi_ftype_v4hi_int
14368 = build_function_type (V4HI_type_node
,
14369 tree_cons (NULL_TREE
, V4HI_type_node
,
14370 tree_cons (NULL_TREE
, integer_type_node
,
14372 tree v2si_ftype_v2si_int
14373 = build_function_type (V2SI_type_node
,
14374 tree_cons (NULL_TREE
, V2SI_type_node
,
14375 tree_cons (NULL_TREE
, integer_type_node
,
14377 tree v2si_ftype_di_di
14378 = build_function_type (V2SI_type_node
,
14379 tree_cons (NULL_TREE
, long_long_integer_type_node
,
14380 tree_cons (NULL_TREE
, long_long_integer_type_node
,
14382 tree di_ftype_di_int
14383 = build_function_type (long_long_integer_type_node
,
14384 tree_cons (NULL_TREE
, long_long_integer_type_node
,
14385 tree_cons (NULL_TREE
, integer_type_node
,
14387 tree di_ftype_di_int_int
14388 = build_function_type (long_long_integer_type_node
,
14389 tree_cons (NULL_TREE
, long_long_integer_type_node
,
14390 tree_cons (NULL_TREE
, integer_type_node
,
14391 tree_cons (NULL_TREE
,
14394 tree int_ftype_v8qi
14395 = build_function_type (integer_type_node
,
14396 tree_cons (NULL_TREE
, V8QI_type_node
,
14398 tree int_ftype_v4hi
14399 = build_function_type (integer_type_node
,
14400 tree_cons (NULL_TREE
, V4HI_type_node
,
14402 tree int_ftype_v2si
14403 = build_function_type (integer_type_node
,
14404 tree_cons (NULL_TREE
, V2SI_type_node
,
14406 tree int_ftype_v8qi_int
14407 = build_function_type (integer_type_node
,
14408 tree_cons (NULL_TREE
, V8QI_type_node
,
14409 tree_cons (NULL_TREE
, integer_type_node
,
14411 tree int_ftype_v4hi_int
14412 = build_function_type (integer_type_node
,
14413 tree_cons (NULL_TREE
, V4HI_type_node
,
14414 tree_cons (NULL_TREE
, integer_type_node
,
14416 tree int_ftype_v2si_int
14417 = build_function_type (integer_type_node
,
14418 tree_cons (NULL_TREE
, V2SI_type_node
,
14419 tree_cons (NULL_TREE
, integer_type_node
,
14421 tree v8qi_ftype_v8qi_int_int
14422 = build_function_type (V8QI_type_node
,
14423 tree_cons (NULL_TREE
, V8QI_type_node
,
14424 tree_cons (NULL_TREE
, integer_type_node
,
14425 tree_cons (NULL_TREE
,
14428 tree v4hi_ftype_v4hi_int_int
14429 = build_function_type (V4HI_type_node
,
14430 tree_cons (NULL_TREE
, V4HI_type_node
,
14431 tree_cons (NULL_TREE
, integer_type_node
,
14432 tree_cons (NULL_TREE
,
14435 tree v2si_ftype_v2si_int_int
14436 = build_function_type (V2SI_type_node
,
14437 tree_cons (NULL_TREE
, V2SI_type_node
,
14438 tree_cons (NULL_TREE
, integer_type_node
,
14439 tree_cons (NULL_TREE
,
14442 /* Miscellaneous. */
14443 tree v8qi_ftype_v4hi_v4hi
14444 = build_function_type (V8QI_type_node
,
14445 tree_cons (NULL_TREE
, V4HI_type_node
,
14446 tree_cons (NULL_TREE
, V4HI_type_node
,
14448 tree v4hi_ftype_v2si_v2si
14449 = build_function_type (V4HI_type_node
,
14450 tree_cons (NULL_TREE
, V2SI_type_node
,
14451 tree_cons (NULL_TREE
, V2SI_type_node
,
14453 tree v2si_ftype_v4hi_v4hi
14454 = build_function_type (V2SI_type_node
,
14455 tree_cons (NULL_TREE
, V4HI_type_node
,
14456 tree_cons (NULL_TREE
, V4HI_type_node
,
14458 tree v2si_ftype_v8qi_v8qi
14459 = build_function_type (V2SI_type_node
,
14460 tree_cons (NULL_TREE
, V8QI_type_node
,
14461 tree_cons (NULL_TREE
, V8QI_type_node
,
14463 tree v4hi_ftype_v4hi_di
14464 = build_function_type (V4HI_type_node
,
14465 tree_cons (NULL_TREE
, V4HI_type_node
,
14466 tree_cons (NULL_TREE
,
14467 long_long_integer_type_node
,
14469 tree v2si_ftype_v2si_di
14470 = build_function_type (V2SI_type_node
,
14471 tree_cons (NULL_TREE
, V2SI_type_node
,
14472 tree_cons (NULL_TREE
,
14473 long_long_integer_type_node
,
14475 tree void_ftype_int_int
14476 = build_function_type (void_type_node
,
14477 tree_cons (NULL_TREE
, integer_type_node
,
14478 tree_cons (NULL_TREE
, integer_type_node
,
14481 = build_function_type (long_long_unsigned_type_node
, endlink
);
14483 = build_function_type (long_long_integer_type_node
,
14484 tree_cons (NULL_TREE
, V8QI_type_node
,
14487 = build_function_type (long_long_integer_type_node
,
14488 tree_cons (NULL_TREE
, V4HI_type_node
,
14491 = build_function_type (long_long_integer_type_node
,
14492 tree_cons (NULL_TREE
, V2SI_type_node
,
14494 tree v2si_ftype_v4hi
14495 = build_function_type (V2SI_type_node
,
14496 tree_cons (NULL_TREE
, V4HI_type_node
,
14498 tree v4hi_ftype_v8qi
14499 = build_function_type (V4HI_type_node
,
14500 tree_cons (NULL_TREE
, V8QI_type_node
,
14503 tree di_ftype_di_v4hi_v4hi
14504 = build_function_type (long_long_unsigned_type_node
,
14505 tree_cons (NULL_TREE
,
14506 long_long_unsigned_type_node
,
14507 tree_cons (NULL_TREE
, V4HI_type_node
,
14508 tree_cons (NULL_TREE
,
14512 tree di_ftype_v4hi_v4hi
14513 = build_function_type (long_long_unsigned_type_node
,
14514 tree_cons (NULL_TREE
, V4HI_type_node
,
14515 tree_cons (NULL_TREE
, V4HI_type_node
,
14518 /* Normal vector binops. */
14519 tree v8qi_ftype_v8qi_v8qi
14520 = build_function_type (V8QI_type_node
,
14521 tree_cons (NULL_TREE
, V8QI_type_node
,
14522 tree_cons (NULL_TREE
, V8QI_type_node
,
14524 tree v4hi_ftype_v4hi_v4hi
14525 = build_function_type (V4HI_type_node
,
14526 tree_cons (NULL_TREE
, V4HI_type_node
,
14527 tree_cons (NULL_TREE
, V4HI_type_node
,
14529 tree v2si_ftype_v2si_v2si
14530 = build_function_type (V2SI_type_node
,
14531 tree_cons (NULL_TREE
, V2SI_type_node
,
14532 tree_cons (NULL_TREE
, V2SI_type_node
,
14534 tree di_ftype_di_di
14535 = build_function_type (long_long_unsigned_type_node
,
14536 tree_cons (NULL_TREE
, long_long_unsigned_type_node
,
14537 tree_cons (NULL_TREE
,
14538 long_long_unsigned_type_node
,
14541 /* Add all builtins that are more or less simple operations on two
14543 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
14545 /* Use one of the operands; the target can have a different mode for
14546 mask-generating compares. */
14547 enum machine_mode mode
;
14553 mode
= insn_data
[d
->icode
].operand
[1].mode
;
14558 type
= v8qi_ftype_v8qi_v8qi
;
14561 type
= v4hi_ftype_v4hi_v4hi
;
14564 type
= v2si_ftype_v2si_v2si
;
14567 type
= di_ftype_di_di
;
14571 gcc_unreachable ();
14574 def_mbuiltin (d
->mask
, d
->name
, type
, d
->code
);
14577 /* Add the remaining MMX insns with somewhat more complicated types. */
14578 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wzero", di_ftype_void
, ARM_BUILTIN_WZERO
);
14579 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_setwcx", void_ftype_int_int
, ARM_BUILTIN_SETWCX
);
14580 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_getwcx", int_ftype_int
, ARM_BUILTIN_GETWCX
);
14582 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSLLH
);
14583 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSLLW
);
14584 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wslld", di_ftype_di_di
, ARM_BUILTIN_WSLLD
);
14585 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSLLHI
);
14586 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsllwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSLLWI
);
14587 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wslldi", di_ftype_di_int
, ARM_BUILTIN_WSLLDI
);
14589 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSRLH
);
14590 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSRLW
);
14591 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrld", di_ftype_di_di
, ARM_BUILTIN_WSRLD
);
14592 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSRLHI
);
14593 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSRLWI
);
14594 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrldi", di_ftype_di_int
, ARM_BUILTIN_WSRLDI
);
14596 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WSRAH
);
14597 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsraw", v2si_ftype_v2si_di
, ARM_BUILTIN_WSRAW
);
14598 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrad", di_ftype_di_di
, ARM_BUILTIN_WSRAD
);
14599 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSRAHI
);
14600 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsrawi", v2si_ftype_v2si_int
, ARM_BUILTIN_WSRAWI
);
14601 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsradi", di_ftype_di_int
, ARM_BUILTIN_WSRADI
);
14603 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di
, ARM_BUILTIN_WRORH
);
14604 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorw", v2si_ftype_v2si_di
, ARM_BUILTIN_WRORW
);
14605 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrord", di_ftype_di_di
, ARM_BUILTIN_WRORD
);
14606 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WRORHI
);
14607 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrorwi", v2si_ftype_v2si_int
, ARM_BUILTIN_WRORWI
);
14608 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wrordi", di_ftype_di_int
, ARM_BUILTIN_WRORDI
);
14610 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int
, ARM_BUILTIN_WSHUFH
);
14612 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi
, ARM_BUILTIN_WSADB
);
14613 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi
, ARM_BUILTIN_WSADH
);
14614 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi
, ARM_BUILTIN_WSADBZ
);
14615 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi
, ARM_BUILTIN_WSADHZ
);
14617 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsb", int_ftype_v8qi_int
, ARM_BUILTIN_TEXTRMSB
);
14618 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsh", int_ftype_v4hi_int
, ARM_BUILTIN_TEXTRMSH
);
14619 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmsw", int_ftype_v2si_int
, ARM_BUILTIN_TEXTRMSW
);
14620 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmub", int_ftype_v8qi_int
, ARM_BUILTIN_TEXTRMUB
);
14621 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmuh", int_ftype_v4hi_int
, ARM_BUILTIN_TEXTRMUH
);
14622 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_textrmuw", int_ftype_v2si_int
, ARM_BUILTIN_TEXTRMUW
);
14623 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int
, ARM_BUILTIN_TINSRB
);
14624 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int
, ARM_BUILTIN_TINSRH
);
14625 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int
, ARM_BUILTIN_TINSRW
);
14627 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_waccb", di_ftype_v8qi
, ARM_BUILTIN_WACCB
);
14628 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wacch", di_ftype_v4hi
, ARM_BUILTIN_WACCH
);
14629 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_waccw", di_ftype_v2si
, ARM_BUILTIN_WACCW
);
14631 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskb", int_ftype_v8qi
, ARM_BUILTIN_TMOVMSKB
);
14632 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskh", int_ftype_v4hi
, ARM_BUILTIN_TMOVMSKH
);
14633 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmovmskw", int_ftype_v2si
, ARM_BUILTIN_TMOVMSKW
);
14635 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi
, ARM_BUILTIN_WPACKHSS
);
14636 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi
, ARM_BUILTIN_WPACKHUS
);
14637 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si
, ARM_BUILTIN_WPACKWUS
);
14638 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si
, ARM_BUILTIN_WPACKWSS
);
14639 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackdus", v2si_ftype_di_di
, ARM_BUILTIN_WPACKDUS
);
14640 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wpackdss", v2si_ftype_di_di
, ARM_BUILTIN_WPACKDSS
);
14642 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKEHUB
);
14643 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKEHUH
);
14644 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehuw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKEHUW
);
14645 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKEHSB
);
14646 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKEHSH
);
14647 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckehsw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKEHSW
);
14648 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKELUB
);
14649 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKELUH
);
14650 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckeluw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKELUW
);
14651 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi
, ARM_BUILTIN_WUNPCKELSB
);
14652 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi
, ARM_BUILTIN_WUNPCKELSH
);
14653 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wunpckelsw", di_ftype_v2si
, ARM_BUILTIN_WUNPCKELSW
);
14655 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi
, ARM_BUILTIN_WMACS
);
14656 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi
, ARM_BUILTIN_WMACSZ
);
14657 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi
, ARM_BUILTIN_WMACU
);
14658 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi
, ARM_BUILTIN_WMACUZ
);
14660 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int
, ARM_BUILTIN_WALIGN
);
14661 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmia", di_ftype_di_int_int
, ARM_BUILTIN_TMIA
);
14662 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiaph", di_ftype_di_int_int
, ARM_BUILTIN_TMIAPH
);
14663 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiabb", di_ftype_di_int_int
, ARM_BUILTIN_TMIABB
);
14664 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiabt", di_ftype_di_int_int
, ARM_BUILTIN_TMIABT
);
14665 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiatb", di_ftype_di_int_int
, ARM_BUILTIN_TMIATB
);
14666 def_mbuiltin (FL_IWMMXT
, "__builtin_arm_tmiatt", di_ftype_di_int_int
, ARM_BUILTIN_TMIATT
);
14670 arm_init_tls_builtins (void)
14674 ftype
= build_function_type (ptr_type_node
, void_list_node
);
14675 decl
= add_builtin_function ("__builtin_thread_pointer", ftype
,
14676 ARM_BUILTIN_THREAD_POINTER
, BUILT_IN_MD
,
14678 TREE_NOTHROW (decl
) = 1;
14679 TREE_READONLY (decl
) = 1;
14696 } neon_builtin_type_bits
;
14698 #define v8qi_UP T_V8QI
14699 #define v4hi_UP T_V4HI
14700 #define v2si_UP T_V2SI
14701 #define v2sf_UP T_V2SF
14703 #define v16qi_UP T_V16QI
14704 #define v8hi_UP T_V8HI
14705 #define v4si_UP T_V4SI
14706 #define v4sf_UP T_V4SF
14707 #define v2di_UP T_V2DI
14712 #define UP(X) X##_UP
14747 NEON_LOADSTRUCTLANE
,
14749 NEON_STORESTRUCTLANE
,
14758 const neon_itype itype
;
14759 const neon_builtin_type_bits bits
;
14760 const enum insn_code codes
[T_MAX
];
14761 const unsigned int num_vars
;
14762 unsigned int base_fcode
;
14763 } neon_builtin_datum
;
14765 #define CF(N,X) CODE_FOR_neon_##N##X
14767 #define VAR1(T, N, A) \
14768 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14769 #define VAR2(T, N, A, B) \
14770 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14771 #define VAR3(T, N, A, B, C) \
14772 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14773 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14774 #define VAR4(T, N, A, B, C, D) \
14775 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14776 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14777 #define VAR5(T, N, A, B, C, D, E) \
14778 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14779 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14780 #define VAR6(T, N, A, B, C, D, E, F) \
14781 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14782 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14783 #define VAR7(T, N, A, B, C, D, E, F, G) \
14784 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14785 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14787 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14788 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14790 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14791 CF (N, G), CF (N, H) }, 8, 0
14792 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14793 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14794 | UP (H) | UP (I), \
14795 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14796 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14797 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14798 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14799 | UP (H) | UP (I) | UP (J), \
14800 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14801 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14803 /* The mode entries in the following table correspond to the "key" type of the
14804 instruction variant, i.e. equivalent to that which would be specified after
14805 the assembler mnemonic, which usually refers to the last vector operand.
14806 (Signed/unsigned/polynomial types are not differentiated between though, and
14807 are all mapped onto the same mode for a given element size.) The modes
14808 listed per instruction should be the same as those defined for that
14809 instruction's pattern in neon.md.
14810 WARNING: Variants should be listed in the same increasing order as
14811 neon_builtin_type_bits. */
14813 static neon_builtin_datum neon_builtin_data
[] =
14815 { VAR10 (BINOP
, vadd
,
14816 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14817 { VAR3 (BINOP
, vaddl
, v8qi
, v4hi
, v2si
) },
14818 { VAR3 (BINOP
, vaddw
, v8qi
, v4hi
, v2si
) },
14819 { VAR6 (BINOP
, vhadd
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
14820 { VAR8 (BINOP
, vqadd
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
14821 { VAR3 (BINOP
, vaddhn
, v8hi
, v4si
, v2di
) },
14822 { VAR8 (BINOP
, vmul
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14823 { VAR8 (TERNOP
, vmla
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14824 { VAR3 (TERNOP
, vmlal
, v8qi
, v4hi
, v2si
) },
14825 { VAR8 (TERNOP
, vmls
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14826 { VAR3 (TERNOP
, vmlsl
, v8qi
, v4hi
, v2si
) },
14827 { VAR4 (BINOP
, vqdmulh
, v4hi
, v2si
, v8hi
, v4si
) },
14828 { VAR2 (TERNOP
, vqdmlal
, v4hi
, v2si
) },
14829 { VAR2 (TERNOP
, vqdmlsl
, v4hi
, v2si
) },
14830 { VAR3 (BINOP
, vmull
, v8qi
, v4hi
, v2si
) },
14831 { VAR2 (SCALARMULL
, vmull_n
, v4hi
, v2si
) },
14832 { VAR2 (LANEMULL
, vmull_lane
, v4hi
, v2si
) },
14833 { VAR2 (SCALARMULL
, vqdmull_n
, v4hi
, v2si
) },
14834 { VAR2 (LANEMULL
, vqdmull_lane
, v4hi
, v2si
) },
14835 { VAR4 (SCALARMULH
, vqdmulh_n
, v4hi
, v2si
, v8hi
, v4si
) },
14836 { VAR4 (LANEMULH
, vqdmulh_lane
, v4hi
, v2si
, v8hi
, v4si
) },
14837 { VAR2 (BINOP
, vqdmull
, v4hi
, v2si
) },
14838 { VAR8 (BINOP
, vshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
14839 { VAR8 (BINOP
, vqshl
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
14840 { VAR8 (SHIFTIMM
, vshr_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
14841 { VAR3 (SHIFTIMM
, vshrn_n
, v8hi
, v4si
, v2di
) },
14842 { VAR3 (SHIFTIMM
, vqshrn_n
, v8hi
, v4si
, v2di
) },
14843 { VAR3 (SHIFTIMM
, vqshrun_n
, v8hi
, v4si
, v2di
) },
14844 { VAR8 (SHIFTIMM
, vshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
14845 { VAR8 (SHIFTIMM
, vqshl_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
14846 { VAR8 (SHIFTIMM
, vqshlu_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
14847 { VAR3 (SHIFTIMM
, vshll_n
, v8qi
, v4hi
, v2si
) },
14848 { VAR8 (SHIFTACC
, vsra_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
14849 { VAR10 (BINOP
, vsub
,
14850 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14851 { VAR3 (BINOP
, vsubl
, v8qi
, v4hi
, v2si
) },
14852 { VAR3 (BINOP
, vsubw
, v8qi
, v4hi
, v2si
) },
14853 { VAR8 (BINOP
, vqsub
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
14854 { VAR6 (BINOP
, vhsub
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
14855 { VAR3 (BINOP
, vsubhn
, v8hi
, v4si
, v2di
) },
14856 { VAR8 (BINOP
, vceq
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14857 { VAR8 (BINOP
, vcge
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14858 { VAR8 (BINOP
, vcgt
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14859 { VAR2 (BINOP
, vcage
, v2sf
, v4sf
) },
14860 { VAR2 (BINOP
, vcagt
, v2sf
, v4sf
) },
14861 { VAR6 (BINOP
, vtst
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
14862 { VAR8 (BINOP
, vabd
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14863 { VAR3 (BINOP
, vabdl
, v8qi
, v4hi
, v2si
) },
14864 { VAR6 (TERNOP
, vaba
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
14865 { VAR3 (TERNOP
, vabal
, v8qi
, v4hi
, v2si
) },
14866 { VAR8 (BINOP
, vmax
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14867 { VAR8 (BINOP
, vmin
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14868 { VAR4 (BINOP
, vpadd
, v8qi
, v4hi
, v2si
, v2sf
) },
14869 { VAR6 (UNOP
, vpaddl
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
14870 { VAR6 (BINOP
, vpadal
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
14871 { VAR4 (BINOP
, vpmax
, v8qi
, v4hi
, v2si
, v2sf
) },
14872 { VAR4 (BINOP
, vpmin
, v8qi
, v4hi
, v2si
, v2sf
) },
14873 { VAR2 (BINOP
, vrecps
, v2sf
, v4sf
) },
14874 { VAR2 (BINOP
, vrsqrts
, v2sf
, v4sf
) },
14875 { VAR8 (SHIFTINSERT
, vsri_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
14876 { VAR8 (SHIFTINSERT
, vsli_n
, v8qi
, v4hi
, v2si
, di
, v16qi
, v8hi
, v4si
, v2di
) },
14877 { VAR8 (UNOP
, vabs
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14878 { VAR6 (UNOP
, vqabs
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
14879 { VAR8 (UNOP
, vneg
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14880 { VAR6 (UNOP
, vqneg
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
14881 { VAR6 (UNOP
, vcls
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
14882 { VAR6 (UNOP
, vclz
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
14883 { VAR2 (UNOP
, vcnt
, v8qi
, v16qi
) },
14884 { VAR4 (UNOP
, vrecpe
, v2si
, v2sf
, v4si
, v4sf
) },
14885 { VAR4 (UNOP
, vrsqrte
, v2si
, v2sf
, v4si
, v4sf
) },
14886 { VAR6 (UNOP
, vmvn
, v8qi
, v4hi
, v2si
, v16qi
, v8hi
, v4si
) },
14887 /* FIXME: vget_lane supports more variants than this! */
14888 { VAR10 (GETLANE
, vget_lane
,
14889 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14890 { VAR10 (SETLANE
, vset_lane
,
14891 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14892 { VAR5 (CREATE
, vcreate
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
14893 { VAR10 (DUP
, vdup_n
,
14894 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14895 { VAR10 (DUPLANE
, vdup_lane
,
14896 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14897 { VAR5 (COMBINE
, vcombine
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
14898 { VAR5 (SPLIT
, vget_high
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14899 { VAR5 (SPLIT
, vget_low
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14900 { VAR3 (UNOP
, vmovn
, v8hi
, v4si
, v2di
) },
14901 { VAR3 (UNOP
, vqmovn
, v8hi
, v4si
, v2di
) },
14902 { VAR3 (UNOP
, vqmovun
, v8hi
, v4si
, v2di
) },
14903 { VAR3 (UNOP
, vmovl
, v8qi
, v4hi
, v2si
) },
14904 { VAR6 (LANEMUL
, vmul_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14905 { VAR6 (LANEMAC
, vmla_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14906 { VAR2 (LANEMAC
, vmlal_lane
, v4hi
, v2si
) },
14907 { VAR2 (LANEMAC
, vqdmlal_lane
, v4hi
, v2si
) },
14908 { VAR6 (LANEMAC
, vmls_lane
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14909 { VAR2 (LANEMAC
, vmlsl_lane
, v4hi
, v2si
) },
14910 { VAR2 (LANEMAC
, vqdmlsl_lane
, v4hi
, v2si
) },
14911 { VAR6 (SCALARMUL
, vmul_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14912 { VAR6 (SCALARMAC
, vmla_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14913 { VAR2 (SCALARMAC
, vmlal_n
, v4hi
, v2si
) },
14914 { VAR2 (SCALARMAC
, vqdmlal_n
, v4hi
, v2si
) },
14915 { VAR6 (SCALARMAC
, vmls_n
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14916 { VAR2 (SCALARMAC
, vmlsl_n
, v4hi
, v2si
) },
14917 { VAR2 (SCALARMAC
, vqdmlsl_n
, v4hi
, v2si
) },
14918 { VAR10 (BINOP
, vext
,
14919 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14920 { VAR8 (UNOP
, vrev64
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14921 { VAR4 (UNOP
, vrev32
, v8qi
, v4hi
, v16qi
, v8hi
) },
14922 { VAR2 (UNOP
, vrev16
, v8qi
, v16qi
) },
14923 { VAR4 (CONVERT
, vcvt
, v2si
, v2sf
, v4si
, v4sf
) },
14924 { VAR4 (FIXCONV
, vcvt_n
, v2si
, v2sf
, v4si
, v4sf
) },
14925 { VAR10 (SELECT
, vbsl
,
14926 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14927 { VAR1 (VTBL
, vtbl1
, v8qi
) },
14928 { VAR1 (VTBL
, vtbl2
, v8qi
) },
14929 { VAR1 (VTBL
, vtbl3
, v8qi
) },
14930 { VAR1 (VTBL
, vtbl4
, v8qi
) },
14931 { VAR1 (VTBX
, vtbx1
, v8qi
) },
14932 { VAR1 (VTBX
, vtbx2
, v8qi
) },
14933 { VAR1 (VTBX
, vtbx3
, v8qi
) },
14934 { VAR1 (VTBX
, vtbx4
, v8qi
) },
14935 { VAR8 (RESULTPAIR
, vtrn
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14936 { VAR8 (RESULTPAIR
, vzip
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14937 { VAR8 (RESULTPAIR
, vuzp
, v8qi
, v4hi
, v2si
, v2sf
, v16qi
, v8hi
, v4si
, v4sf
) },
14938 { VAR5 (REINTERP
, vreinterpretv8qi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
14939 { VAR5 (REINTERP
, vreinterpretv4hi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
14940 { VAR5 (REINTERP
, vreinterpretv2si
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
14941 { VAR5 (REINTERP
, vreinterpretv2sf
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
14942 { VAR5 (REINTERP
, vreinterpretdi
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
14943 { VAR5 (REINTERP
, vreinterpretv16qi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14944 { VAR5 (REINTERP
, vreinterpretv8hi
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14945 { VAR5 (REINTERP
, vreinterpretv4si
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14946 { VAR5 (REINTERP
, vreinterpretv4sf
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14947 { VAR5 (REINTERP
, vreinterpretv2di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14948 { VAR10 (LOAD1
, vld1
,
14949 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14950 { VAR10 (LOAD1LANE
, vld1_lane
,
14951 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14952 { VAR10 (LOAD1
, vld1_dup
,
14953 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14954 { VAR10 (STORE1
, vst1
,
14955 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14956 { VAR10 (STORE1LANE
, vst1_lane
,
14957 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14958 { VAR9 (LOADSTRUCT
,
14959 vld2
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
14960 { VAR7 (LOADSTRUCTLANE
, vld2_lane
,
14961 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14962 { VAR5 (LOADSTRUCT
, vld2_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
14963 { VAR9 (STORESTRUCT
, vst2
,
14964 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
14965 { VAR7 (STORESTRUCTLANE
, vst2_lane
,
14966 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14967 { VAR9 (LOADSTRUCT
,
14968 vld3
, v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
14969 { VAR7 (LOADSTRUCTLANE
, vld3_lane
,
14970 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14971 { VAR5 (LOADSTRUCT
, vld3_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
14972 { VAR9 (STORESTRUCT
, vst3
,
14973 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
14974 { VAR7 (STORESTRUCTLANE
, vst3_lane
,
14975 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14976 { VAR9 (LOADSTRUCT
, vld4
,
14977 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
14978 { VAR7 (LOADSTRUCTLANE
, vld4_lane
,
14979 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14980 { VAR5 (LOADSTRUCT
, vld4_dup
, v8qi
, v4hi
, v2si
, v2sf
, di
) },
14981 { VAR9 (STORESTRUCT
, vst4
,
14982 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
) },
14983 { VAR7 (STORESTRUCTLANE
, vst4_lane
,
14984 v8qi
, v4hi
, v2si
, v2sf
, v8hi
, v4si
, v4sf
) },
14985 { VAR10 (LOGICBINOP
, vand
,
14986 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14987 { VAR10 (LOGICBINOP
, vorr
,
14988 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14989 { VAR10 (BINOP
, veor
,
14990 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14991 { VAR10 (LOGICBINOP
, vbic
,
14992 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) },
14993 { VAR10 (LOGICBINOP
, vorn
,
14994 v8qi
, v4hi
, v2si
, v2sf
, di
, v16qi
, v8hi
, v4si
, v4sf
, v2di
) }
15010 arm_init_neon_builtins (void)
15012 unsigned int i
, fcode
= ARM_BUILTIN_NEON_BASE
;
15014 tree neon_intQI_type_node
;
15015 tree neon_intHI_type_node
;
15016 tree neon_polyQI_type_node
;
15017 tree neon_polyHI_type_node
;
15018 tree neon_intSI_type_node
;
15019 tree neon_intDI_type_node
;
15020 tree neon_float_type_node
;
15022 tree intQI_pointer_node
;
15023 tree intHI_pointer_node
;
15024 tree intSI_pointer_node
;
15025 tree intDI_pointer_node
;
15026 tree float_pointer_node
;
15028 tree const_intQI_node
;
15029 tree const_intHI_node
;
15030 tree const_intSI_node
;
15031 tree const_intDI_node
;
15032 tree const_float_node
;
15034 tree const_intQI_pointer_node
;
15035 tree const_intHI_pointer_node
;
15036 tree const_intSI_pointer_node
;
15037 tree const_intDI_pointer_node
;
15038 tree const_float_pointer_node
;
15040 tree V8QI_type_node
;
15041 tree V4HI_type_node
;
15042 tree V2SI_type_node
;
15043 tree V2SF_type_node
;
15044 tree V16QI_type_node
;
15045 tree V8HI_type_node
;
15046 tree V4SI_type_node
;
15047 tree V4SF_type_node
;
15048 tree V2DI_type_node
;
15050 tree intUQI_type_node
;
15051 tree intUHI_type_node
;
15052 tree intUSI_type_node
;
15053 tree intUDI_type_node
;
15055 tree intEI_type_node
;
15056 tree intOI_type_node
;
15057 tree intCI_type_node
;
15058 tree intXI_type_node
;
15060 tree V8QI_pointer_node
;
15061 tree V4HI_pointer_node
;
15062 tree V2SI_pointer_node
;
15063 tree V2SF_pointer_node
;
15064 tree V16QI_pointer_node
;
15065 tree V8HI_pointer_node
;
15066 tree V4SI_pointer_node
;
15067 tree V4SF_pointer_node
;
15068 tree V2DI_pointer_node
;
15070 tree void_ftype_pv8qi_v8qi_v8qi
;
15071 tree void_ftype_pv4hi_v4hi_v4hi
;
15072 tree void_ftype_pv2si_v2si_v2si
;
15073 tree void_ftype_pv2sf_v2sf_v2sf
;
15074 tree void_ftype_pdi_di_di
;
15075 tree void_ftype_pv16qi_v16qi_v16qi
;
15076 tree void_ftype_pv8hi_v8hi_v8hi
;
15077 tree void_ftype_pv4si_v4si_v4si
;
15078 tree void_ftype_pv4sf_v4sf_v4sf
;
15079 tree void_ftype_pv2di_v2di_v2di
;
15081 tree reinterp_ftype_dreg
[5][5];
15082 tree reinterp_ftype_qreg
[5][5];
15083 tree dreg_types
[5], qreg_types
[5];
15085 /* Create distinguished type nodes for NEON vector element types,
15086 and pointers to values of such types, so we can detect them later. */
15087 neon_intQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
15088 neon_intHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
15089 neon_polyQI_type_node
= make_signed_type (GET_MODE_PRECISION (QImode
));
15090 neon_polyHI_type_node
= make_signed_type (GET_MODE_PRECISION (HImode
));
15091 neon_intSI_type_node
= make_signed_type (GET_MODE_PRECISION (SImode
));
15092 neon_intDI_type_node
= make_signed_type (GET_MODE_PRECISION (DImode
));
15093 neon_float_type_node
= make_node (REAL_TYPE
);
15094 TYPE_PRECISION (neon_float_type_node
) = FLOAT_TYPE_SIZE
;
15095 layout_type (neon_float_type_node
);
15097 /* Define typedefs which exactly correspond to the modes we are basing vector
15098 types on. If you change these names you'll need to change
15099 the table used by arm_mangle_type too. */
15100 (*lang_hooks
.types
.register_builtin_type
) (neon_intQI_type_node
,
15101 "__builtin_neon_qi");
15102 (*lang_hooks
.types
.register_builtin_type
) (neon_intHI_type_node
,
15103 "__builtin_neon_hi");
15104 (*lang_hooks
.types
.register_builtin_type
) (neon_intSI_type_node
,
15105 "__builtin_neon_si");
15106 (*lang_hooks
.types
.register_builtin_type
) (neon_float_type_node
,
15107 "__builtin_neon_sf");
15108 (*lang_hooks
.types
.register_builtin_type
) (neon_intDI_type_node
,
15109 "__builtin_neon_di");
15110 (*lang_hooks
.types
.register_builtin_type
) (neon_polyQI_type_node
,
15111 "__builtin_neon_poly8");
15112 (*lang_hooks
.types
.register_builtin_type
) (neon_polyHI_type_node
,
15113 "__builtin_neon_poly16");
15115 intQI_pointer_node
= build_pointer_type (neon_intQI_type_node
);
15116 intHI_pointer_node
= build_pointer_type (neon_intHI_type_node
);
15117 intSI_pointer_node
= build_pointer_type (neon_intSI_type_node
);
15118 intDI_pointer_node
= build_pointer_type (neon_intDI_type_node
);
15119 float_pointer_node
= build_pointer_type (neon_float_type_node
);
15121 /* Next create constant-qualified versions of the above types. */
15122 const_intQI_node
= build_qualified_type (neon_intQI_type_node
,
15124 const_intHI_node
= build_qualified_type (neon_intHI_type_node
,
15126 const_intSI_node
= build_qualified_type (neon_intSI_type_node
,
15128 const_intDI_node
= build_qualified_type (neon_intDI_type_node
,
15130 const_float_node
= build_qualified_type (neon_float_type_node
,
15133 const_intQI_pointer_node
= build_pointer_type (const_intQI_node
);
15134 const_intHI_pointer_node
= build_pointer_type (const_intHI_node
);
15135 const_intSI_pointer_node
= build_pointer_type (const_intSI_node
);
15136 const_intDI_pointer_node
= build_pointer_type (const_intDI_node
);
15137 const_float_pointer_node
= build_pointer_type (const_float_node
);
15139 /* Now create vector types based on our NEON element types. */
15140 /* 64-bit vectors. */
15142 build_vector_type_for_mode (neon_intQI_type_node
, V8QImode
);
15144 build_vector_type_for_mode (neon_intHI_type_node
, V4HImode
);
15146 build_vector_type_for_mode (neon_intSI_type_node
, V2SImode
);
15148 build_vector_type_for_mode (neon_float_type_node
, V2SFmode
);
15149 /* 128-bit vectors. */
15151 build_vector_type_for_mode (neon_intQI_type_node
, V16QImode
);
15153 build_vector_type_for_mode (neon_intHI_type_node
, V8HImode
);
15155 build_vector_type_for_mode (neon_intSI_type_node
, V4SImode
);
15157 build_vector_type_for_mode (neon_float_type_node
, V4SFmode
);
15159 build_vector_type_for_mode (neon_intDI_type_node
, V2DImode
);
15161 /* Unsigned integer types for various mode sizes. */
15162 intUQI_type_node
= make_unsigned_type (GET_MODE_PRECISION (QImode
));
15163 intUHI_type_node
= make_unsigned_type (GET_MODE_PRECISION (HImode
));
15164 intUSI_type_node
= make_unsigned_type (GET_MODE_PRECISION (SImode
));
15165 intUDI_type_node
= make_unsigned_type (GET_MODE_PRECISION (DImode
));
15167 (*lang_hooks
.types
.register_builtin_type
) (intUQI_type_node
,
15168 "__builtin_neon_uqi");
15169 (*lang_hooks
.types
.register_builtin_type
) (intUHI_type_node
,
15170 "__builtin_neon_uhi");
15171 (*lang_hooks
.types
.register_builtin_type
) (intUSI_type_node
,
15172 "__builtin_neon_usi");
15173 (*lang_hooks
.types
.register_builtin_type
) (intUDI_type_node
,
15174 "__builtin_neon_udi");
15176 /* Opaque integer types for structures of vectors. */
15177 intEI_type_node
= make_signed_type (GET_MODE_PRECISION (EImode
));
15178 intOI_type_node
= make_signed_type (GET_MODE_PRECISION (OImode
));
15179 intCI_type_node
= make_signed_type (GET_MODE_PRECISION (CImode
));
15180 intXI_type_node
= make_signed_type (GET_MODE_PRECISION (XImode
));
15182 (*lang_hooks
.types
.register_builtin_type
) (intTI_type_node
,
15183 "__builtin_neon_ti");
15184 (*lang_hooks
.types
.register_builtin_type
) (intEI_type_node
,
15185 "__builtin_neon_ei");
15186 (*lang_hooks
.types
.register_builtin_type
) (intOI_type_node
,
15187 "__builtin_neon_oi");
15188 (*lang_hooks
.types
.register_builtin_type
) (intCI_type_node
,
15189 "__builtin_neon_ci");
15190 (*lang_hooks
.types
.register_builtin_type
) (intXI_type_node
,
15191 "__builtin_neon_xi");
15193 /* Pointers to vector types. */
15194 V8QI_pointer_node
= build_pointer_type (V8QI_type_node
);
15195 V4HI_pointer_node
= build_pointer_type (V4HI_type_node
);
15196 V2SI_pointer_node
= build_pointer_type (V2SI_type_node
);
15197 V2SF_pointer_node
= build_pointer_type (V2SF_type_node
);
15198 V16QI_pointer_node
= build_pointer_type (V16QI_type_node
);
15199 V8HI_pointer_node
= build_pointer_type (V8HI_type_node
);
15200 V4SI_pointer_node
= build_pointer_type (V4SI_type_node
);
15201 V4SF_pointer_node
= build_pointer_type (V4SF_type_node
);
15202 V2DI_pointer_node
= build_pointer_type (V2DI_type_node
);
15204 /* Operations which return results as pairs. */
15205 void_ftype_pv8qi_v8qi_v8qi
=
15206 build_function_type_list (void_type_node
, V8QI_pointer_node
, V8QI_type_node
,
15207 V8QI_type_node
, NULL
);
15208 void_ftype_pv4hi_v4hi_v4hi
=
15209 build_function_type_list (void_type_node
, V4HI_pointer_node
, V4HI_type_node
,
15210 V4HI_type_node
, NULL
);
15211 void_ftype_pv2si_v2si_v2si
=
15212 build_function_type_list (void_type_node
, V2SI_pointer_node
, V2SI_type_node
,
15213 V2SI_type_node
, NULL
);
15214 void_ftype_pv2sf_v2sf_v2sf
=
15215 build_function_type_list (void_type_node
, V2SF_pointer_node
, V2SF_type_node
,
15216 V2SF_type_node
, NULL
);
15217 void_ftype_pdi_di_di
=
15218 build_function_type_list (void_type_node
, intDI_pointer_node
,
15219 neon_intDI_type_node
, neon_intDI_type_node
, NULL
);
15220 void_ftype_pv16qi_v16qi_v16qi
=
15221 build_function_type_list (void_type_node
, V16QI_pointer_node
,
15222 V16QI_type_node
, V16QI_type_node
, NULL
);
15223 void_ftype_pv8hi_v8hi_v8hi
=
15224 build_function_type_list (void_type_node
, V8HI_pointer_node
, V8HI_type_node
,
15225 V8HI_type_node
, NULL
);
15226 void_ftype_pv4si_v4si_v4si
=
15227 build_function_type_list (void_type_node
, V4SI_pointer_node
, V4SI_type_node
,
15228 V4SI_type_node
, NULL
);
15229 void_ftype_pv4sf_v4sf_v4sf
=
15230 build_function_type_list (void_type_node
, V4SF_pointer_node
, V4SF_type_node
,
15231 V4SF_type_node
, NULL
);
15232 void_ftype_pv2di_v2di_v2di
=
15233 build_function_type_list (void_type_node
, V2DI_pointer_node
, V2DI_type_node
,
15234 V2DI_type_node
, NULL
);
15236 dreg_types
[0] = V8QI_type_node
;
15237 dreg_types
[1] = V4HI_type_node
;
15238 dreg_types
[2] = V2SI_type_node
;
15239 dreg_types
[3] = V2SF_type_node
;
15240 dreg_types
[4] = neon_intDI_type_node
;
15242 qreg_types
[0] = V16QI_type_node
;
15243 qreg_types
[1] = V8HI_type_node
;
15244 qreg_types
[2] = V4SI_type_node
;
15245 qreg_types
[3] = V4SF_type_node
;
15246 qreg_types
[4] = V2DI_type_node
;
15248 for (i
= 0; i
< 5; i
++)
15251 for (j
= 0; j
< 5; j
++)
15253 reinterp_ftype_dreg
[i
][j
]
15254 = build_function_type_list (dreg_types
[i
], dreg_types
[j
], NULL
);
15255 reinterp_ftype_qreg
[i
][j
]
15256 = build_function_type_list (qreg_types
[i
], qreg_types
[j
], NULL
);
15260 for (i
= 0; i
< ARRAY_SIZE (neon_builtin_data
); i
++)
15262 neon_builtin_datum
*d
= &neon_builtin_data
[i
];
15263 unsigned int j
, codeidx
= 0;
15265 d
->base_fcode
= fcode
;
15267 for (j
= 0; j
< T_MAX
; j
++)
15269 const char* const modenames
[] = {
15270 "v8qi", "v4hi", "v2si", "v2sf", "di",
15271 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15275 enum insn_code icode
;
15276 int is_load
= 0, is_store
= 0;
15278 if ((d
->bits
& (1 << j
)) == 0)
15281 icode
= d
->codes
[codeidx
++];
15286 case NEON_LOAD1LANE
:
15287 case NEON_LOADSTRUCT
:
15288 case NEON_LOADSTRUCTLANE
:
15290 /* Fall through. */
15292 case NEON_STORE1LANE
:
15293 case NEON_STORESTRUCT
:
15294 case NEON_STORESTRUCTLANE
:
15297 /* Fall through. */
15300 case NEON_LOGICBINOP
:
15301 case NEON_SHIFTINSERT
:
15308 case NEON_SHIFTIMM
:
15309 case NEON_SHIFTACC
:
15315 case NEON_LANEMULL
:
15316 case NEON_LANEMULH
:
15318 case NEON_SCALARMUL
:
15319 case NEON_SCALARMULL
:
15320 case NEON_SCALARMULH
:
15321 case NEON_SCALARMAC
:
15327 tree return_type
= void_type_node
, args
= void_list_node
;
15329 /* Build a function type directly from the insn_data for this
15330 builtin. The build_function_type() function takes care of
15331 removing duplicates for us. */
15332 for (k
= insn_data
[icode
].n_operands
- 1; k
>= 0; k
--)
15336 if (is_load
&& k
== 1)
15338 /* Neon load patterns always have the memory operand
15339 (a SImode pointer) in the operand 1 position. We
15340 want a const pointer to the element type in that
15342 gcc_assert (insn_data
[icode
].operand
[k
].mode
== SImode
);
15348 eltype
= const_intQI_pointer_node
;
15353 eltype
= const_intHI_pointer_node
;
15358 eltype
= const_intSI_pointer_node
;
15363 eltype
= const_float_pointer_node
;
15368 eltype
= const_intDI_pointer_node
;
15371 default: gcc_unreachable ();
15374 else if (is_store
&& k
== 0)
15376 /* Similarly, Neon store patterns use operand 0 as
15377 the memory location to store to (a SImode pointer).
15378 Use a pointer to the element type of the store in
15380 gcc_assert (insn_data
[icode
].operand
[k
].mode
== SImode
);
15386 eltype
= intQI_pointer_node
;
15391 eltype
= intHI_pointer_node
;
15396 eltype
= intSI_pointer_node
;
15401 eltype
= float_pointer_node
;
15406 eltype
= intDI_pointer_node
;
15409 default: gcc_unreachable ();
15414 switch (insn_data
[icode
].operand
[k
].mode
)
15416 case VOIDmode
: eltype
= void_type_node
; break;
15418 case QImode
: eltype
= neon_intQI_type_node
; break;
15419 case HImode
: eltype
= neon_intHI_type_node
; break;
15420 case SImode
: eltype
= neon_intSI_type_node
; break;
15421 case SFmode
: eltype
= neon_float_type_node
; break;
15422 case DImode
: eltype
= neon_intDI_type_node
; break;
15423 case TImode
: eltype
= intTI_type_node
; break;
15424 case EImode
: eltype
= intEI_type_node
; break;
15425 case OImode
: eltype
= intOI_type_node
; break;
15426 case CImode
: eltype
= intCI_type_node
; break;
15427 case XImode
: eltype
= intXI_type_node
; break;
15428 /* 64-bit vectors. */
15429 case V8QImode
: eltype
= V8QI_type_node
; break;
15430 case V4HImode
: eltype
= V4HI_type_node
; break;
15431 case V2SImode
: eltype
= V2SI_type_node
; break;
15432 case V2SFmode
: eltype
= V2SF_type_node
; break;
15433 /* 128-bit vectors. */
15434 case V16QImode
: eltype
= V16QI_type_node
; break;
15435 case V8HImode
: eltype
= V8HI_type_node
; break;
15436 case V4SImode
: eltype
= V4SI_type_node
; break;
15437 case V4SFmode
: eltype
= V4SF_type_node
; break;
15438 case V2DImode
: eltype
= V2DI_type_node
; break;
15439 default: gcc_unreachable ();
15443 if (k
== 0 && !is_store
)
15444 return_type
= eltype
;
15446 args
= tree_cons (NULL_TREE
, eltype
, args
);
15449 ftype
= build_function_type (return_type
, args
);
15453 case NEON_RESULTPAIR
:
15455 switch (insn_data
[icode
].operand
[1].mode
)
15457 case V8QImode
: ftype
= void_ftype_pv8qi_v8qi_v8qi
; break;
15458 case V4HImode
: ftype
= void_ftype_pv4hi_v4hi_v4hi
; break;
15459 case V2SImode
: ftype
= void_ftype_pv2si_v2si_v2si
; break;
15460 case V2SFmode
: ftype
= void_ftype_pv2sf_v2sf_v2sf
; break;
15461 case DImode
: ftype
= void_ftype_pdi_di_di
; break;
15462 case V16QImode
: ftype
= void_ftype_pv16qi_v16qi_v16qi
; break;
15463 case V8HImode
: ftype
= void_ftype_pv8hi_v8hi_v8hi
; break;
15464 case V4SImode
: ftype
= void_ftype_pv4si_v4si_v4si
; break;
15465 case V4SFmode
: ftype
= void_ftype_pv4sf_v4sf_v4sf
; break;
15466 case V2DImode
: ftype
= void_ftype_pv2di_v2di_v2di
; break;
15467 default: gcc_unreachable ();
15472 case NEON_REINTERP
:
15474 /* We iterate over 5 doubleword types, then 5 quadword
15477 switch (insn_data
[icode
].operand
[0].mode
)
15479 case V8QImode
: ftype
= reinterp_ftype_dreg
[0][rhs
]; break;
15480 case V4HImode
: ftype
= reinterp_ftype_dreg
[1][rhs
]; break;
15481 case V2SImode
: ftype
= reinterp_ftype_dreg
[2][rhs
]; break;
15482 case V2SFmode
: ftype
= reinterp_ftype_dreg
[3][rhs
]; break;
15483 case DImode
: ftype
= reinterp_ftype_dreg
[4][rhs
]; break;
15484 case V16QImode
: ftype
= reinterp_ftype_qreg
[0][rhs
]; break;
15485 case V8HImode
: ftype
= reinterp_ftype_qreg
[1][rhs
]; break;
15486 case V4SImode
: ftype
= reinterp_ftype_qreg
[2][rhs
]; break;
15487 case V4SFmode
: ftype
= reinterp_ftype_qreg
[3][rhs
]; break;
15488 case V2DImode
: ftype
= reinterp_ftype_qreg
[4][rhs
]; break;
15489 default: gcc_unreachable ();
15495 gcc_unreachable ();
15498 gcc_assert (ftype
!= NULL
);
15500 sprintf (namebuf
, "__builtin_neon_%s%s", d
->name
, modenames
[j
]);
15502 add_builtin_function (namebuf
, ftype
, fcode
++, BUILT_IN_MD
, NULL
,
15509 arm_init_builtins (void)
15511 arm_init_tls_builtins ();
15513 if (TARGET_REALLY_IWMMXT
)
15514 arm_init_iwmmxt_builtins ();
15517 arm_init_neon_builtins ();
15520 /* Errors in the source file can cause expand_expr to return const0_rtx
15521 where we expect a vector. To avoid crashing, use one of the vector
15522 clear instructions. */
15525 safe_vector_operand (rtx x
, enum machine_mode mode
)
15527 if (x
!= const0_rtx
)
15529 x
= gen_reg_rtx (mode
);
15531 emit_insn (gen_iwmmxt_clrdi (mode
== DImode
? x
15532 : gen_rtx_SUBREG (DImode
, x
, 0)));
15536 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15539 arm_expand_binop_builtin (enum insn_code icode
,
15540 tree exp
, rtx target
)
15543 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15544 tree arg1
= CALL_EXPR_ARG (exp
, 1);
15545 rtx op0
= expand_normal (arg0
);
15546 rtx op1
= expand_normal (arg1
);
15547 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15548 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15549 enum machine_mode mode1
= insn_data
[icode
].operand
[2].mode
;
15551 if (VECTOR_MODE_P (mode0
))
15552 op0
= safe_vector_operand (op0
, mode0
);
15553 if (VECTOR_MODE_P (mode1
))
15554 op1
= safe_vector_operand (op1
, mode1
);
15557 || GET_MODE (target
) != tmode
15558 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15559 target
= gen_reg_rtx (tmode
);
15561 gcc_assert (GET_MODE (op0
) == mode0
&& GET_MODE (op1
) == mode1
);
15563 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15564 op0
= copy_to_mode_reg (mode0
, op0
);
15565 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
15566 op1
= copy_to_mode_reg (mode1
, op1
);
15568 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
15575 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15578 arm_expand_unop_builtin (enum insn_code icode
,
15579 tree exp
, rtx target
, int do_load
)
15582 tree arg0
= CALL_EXPR_ARG (exp
, 0);
15583 rtx op0
= expand_normal (arg0
);
15584 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15585 enum machine_mode mode0
= insn_data
[icode
].operand
[1].mode
;
15588 || GET_MODE (target
) != tmode
15589 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
15590 target
= gen_reg_rtx (tmode
);
15592 op0
= gen_rtx_MEM (mode0
, copy_to_mode_reg (Pmode
, op0
));
15595 if (VECTOR_MODE_P (mode0
))
15596 op0
= safe_vector_operand (op0
, mode0
);
15598 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
15599 op0
= copy_to_mode_reg (mode0
, op0
);
15602 pat
= GEN_FCN (icode
) (target
, op0
);
15610 neon_builtin_compare (const void *a
, const void *b
)
15612 const neon_builtin_datum
*const key
= (const neon_builtin_datum
*) a
;
15613 const neon_builtin_datum
*const memb
= (const neon_builtin_datum
*) b
;
15614 unsigned int soughtcode
= key
->base_fcode
;
15616 if (soughtcode
>= memb
->base_fcode
15617 && soughtcode
< memb
->base_fcode
+ memb
->num_vars
)
15619 else if (soughtcode
< memb
->base_fcode
)
15625 static enum insn_code
15626 locate_neon_builtin_icode (int fcode
, neon_itype
*itype
)
15628 neon_builtin_datum key
, *found
;
15631 key
.base_fcode
= fcode
;
15632 found
= (neon_builtin_datum
*)
15633 bsearch (&key
, &neon_builtin_data
[0], ARRAY_SIZE (neon_builtin_data
),
15634 sizeof (neon_builtin_data
[0]), neon_builtin_compare
);
15635 gcc_assert (found
);
15636 idx
= fcode
- (int) found
->base_fcode
;
15637 gcc_assert (idx
>= 0 && idx
< T_MAX
&& idx
< (int)found
->num_vars
);
15640 *itype
= found
->itype
;
15642 return found
->codes
[idx
];
15646 NEON_ARG_COPY_TO_REG
,
15651 #define NEON_MAX_BUILTIN_ARGS 5
15653 /* Expand a Neon builtin. */
15655 arm_expand_neon_args (rtx target
, int icode
, int have_retval
,
15660 tree arg
[NEON_MAX_BUILTIN_ARGS
];
15661 rtx op
[NEON_MAX_BUILTIN_ARGS
];
15662 enum machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
15663 enum machine_mode mode
[NEON_MAX_BUILTIN_ARGS
];
15668 || GET_MODE (target
) != tmode
15669 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
)))
15670 target
= gen_reg_rtx (tmode
);
15672 va_start (ap
, exp
);
15676 builtin_arg thisarg
= va_arg (ap
, int);
15678 if (thisarg
== NEON_ARG_STOP
)
15682 arg
[argc
] = CALL_EXPR_ARG (exp
, argc
);
15683 op
[argc
] = expand_normal (arg
[argc
]);
15684 mode
[argc
] = insn_data
[icode
].operand
[argc
+ have_retval
].mode
;
15688 case NEON_ARG_COPY_TO_REG
:
15689 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15690 if (!(*insn_data
[icode
].operand
[argc
+ have_retval
].predicate
)
15691 (op
[argc
], mode
[argc
]))
15692 op
[argc
] = copy_to_mode_reg (mode
[argc
], op
[argc
]);
15695 case NEON_ARG_CONSTANT
:
15696 /* FIXME: This error message is somewhat unhelpful. */
15697 if (!(*insn_data
[icode
].operand
[argc
+ have_retval
].predicate
)
15698 (op
[argc
], mode
[argc
]))
15699 error ("argument must be a constant");
15702 case NEON_ARG_STOP
:
15703 gcc_unreachable ();
15716 pat
= GEN_FCN (icode
) (target
, op
[0]);
15720 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
15724 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
15728 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
15732 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
15736 gcc_unreachable ();
15742 pat
= GEN_FCN (icode
) (op
[0]);
15746 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
15750 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
15754 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
15758 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
15762 gcc_unreachable ();
15773 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15774 constants defined per-instruction or per instruction-variant. Instead, the
15775 required info is looked up in the table neon_builtin_data. */
15777 arm_expand_neon_builtin (int fcode
, tree exp
, rtx target
)
15780 enum insn_code icode
= locate_neon_builtin_icode (fcode
, &itype
);
15787 return arm_expand_neon_args (target
, icode
, 1, exp
,
15788 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
15792 case NEON_SCALARMUL
:
15793 case NEON_SCALARMULL
:
15794 case NEON_SCALARMULH
:
15795 case NEON_SHIFTINSERT
:
15796 case NEON_LOGICBINOP
:
15797 return arm_expand_neon_args (target
, icode
, 1, exp
,
15798 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
15802 return arm_expand_neon_args (target
, icode
, 1, exp
,
15803 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
15804 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
15808 case NEON_SHIFTIMM
:
15809 return arm_expand_neon_args (target
, icode
, 1, exp
,
15810 NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
,
15814 return arm_expand_neon_args (target
, icode
, 1, exp
,
15815 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
15819 case NEON_REINTERP
:
15820 return arm_expand_neon_args (target
, icode
, 1, exp
,
15821 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
15825 return arm_expand_neon_args (target
, icode
, 1, exp
,
15826 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
15828 case NEON_RESULTPAIR
:
15829 return arm_expand_neon_args (target
, icode
, 0, exp
,
15830 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
15834 case NEON_LANEMULL
:
15835 case NEON_LANEMULH
:
15836 return arm_expand_neon_args (target
, icode
, 1, exp
,
15837 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
15838 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
15841 return arm_expand_neon_args (target
, icode
, 1, exp
,
15842 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
15843 NEON_ARG_CONSTANT
, NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
15845 case NEON_SHIFTACC
:
15846 return arm_expand_neon_args (target
, icode
, 1, exp
,
15847 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
15848 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
15850 case NEON_SCALARMAC
:
15851 return arm_expand_neon_args (target
, icode
, 1, exp
,
15852 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
15853 NEON_ARG_CONSTANT
, NEON_ARG_STOP
);
15857 return arm_expand_neon_args (target
, icode
, 1, exp
,
15858 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
,
15862 case NEON_LOADSTRUCT
:
15863 return arm_expand_neon_args (target
, icode
, 1, exp
,
15864 NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
15866 case NEON_LOAD1LANE
:
15867 case NEON_LOADSTRUCTLANE
:
15868 return arm_expand_neon_args (target
, icode
, 1, exp
,
15869 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
15873 case NEON_STORESTRUCT
:
15874 return arm_expand_neon_args (target
, icode
, 0, exp
,
15875 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_STOP
);
15877 case NEON_STORE1LANE
:
15878 case NEON_STORESTRUCTLANE
:
15879 return arm_expand_neon_args (target
, icode
, 0, exp
,
15880 NEON_ARG_COPY_TO_REG
, NEON_ARG_COPY_TO_REG
, NEON_ARG_CONSTANT
,
15884 gcc_unreachable ();
15887 /* Emit code to reinterpret one Neon type as another, without altering bits. */
15889 neon_reinterpret (rtx dest
, rtx src
)
15891 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
15894 /* Emit code to place a Neon pair result in memory locations (with equal
15897 neon_emit_pair_result_insn (enum machine_mode mode
,
15898 rtx (*intfn
) (rtx
, rtx
, rtx
, rtx
), rtx destaddr
,
15901 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
15902 rtx tmp1
= gen_reg_rtx (mode
);
15903 rtx tmp2
= gen_reg_rtx (mode
);
15905 emit_insn (intfn (tmp1
, op1
, tmp2
, op2
));
15907 emit_move_insn (mem
, tmp1
);
15908 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
15909 emit_move_insn (mem
, tmp2
);
15912 /* Set up operands for a register copy from src to dest, taking care not to
15913 clobber registers in the process.
15914 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
15915 be called with a large N, so that should be OK. */
15918 neon_disambiguate_copy (rtx
*operands
, rtx
*dest
, rtx
*src
, unsigned int count
)
15920 unsigned int copied
= 0, opctr
= 0;
15921 unsigned int done
= (1 << count
) - 1;
15924 while (copied
!= done
)
15926 for (i
= 0; i
< count
; i
++)
15930 for (j
= 0; good
&& j
< count
; j
++)
15931 if (i
!= j
&& (copied
& (1 << j
)) == 0
15932 && reg_overlap_mentioned_p (src
[j
], dest
[i
]))
15937 operands
[opctr
++] = dest
[i
];
15938 operands
[opctr
++] = src
[i
];
15944 gcc_assert (opctr
== count
* 2);
15947 /* Expand an expression EXP that calls a built-in function,
15948 with result going to TARGET if that's convenient
15949 (and in mode MODE if that's convenient).
15950 SUBTARGET may be used as the target for computing one of EXP's operands.
15951 IGNORE is nonzero if the value is to be ignored. */
15954 arm_expand_builtin (tree exp
,
15956 rtx subtarget ATTRIBUTE_UNUSED
,
15957 enum machine_mode mode ATTRIBUTE_UNUSED
,
15958 int ignore ATTRIBUTE_UNUSED
)
15960 const struct builtin_description
* d
;
15961 enum insn_code icode
;
15962 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
15970 int fcode
= DECL_FUNCTION_CODE (fndecl
);
15972 enum machine_mode tmode
;
15973 enum machine_mode mode0
;
15974 enum machine_mode mode1
;
15975 enum machine_mode mode2
;
15977 if (fcode
>= ARM_BUILTIN_NEON_BASE
)
15978 return arm_expand_neon_builtin (fcode
, exp
, target
);
15982 case ARM_BUILTIN_TEXTRMSB
:
15983 case ARM_BUILTIN_TEXTRMUB
:
15984 case ARM_BUILTIN_TEXTRMSH
:
15985 case ARM_BUILTIN_TEXTRMUH
:
15986 case ARM_BUILTIN_TEXTRMSW
:
15987 case ARM_BUILTIN_TEXTRMUW
:
15988 icode
= (fcode
== ARM_BUILTIN_TEXTRMSB
? CODE_FOR_iwmmxt_textrmsb
15989 : fcode
== ARM_BUILTIN_TEXTRMUB
? CODE_FOR_iwmmxt_textrmub
15990 : fcode
== ARM_BUILTIN_TEXTRMSH
? CODE_FOR_iwmmxt_textrmsh
15991 : fcode
== ARM_BUILTIN_TEXTRMUH
? CODE_FOR_iwmmxt_textrmuh
15992 : CODE_FOR_iwmmxt_textrmw
);
15994 arg0
= CALL_EXPR_ARG (exp
, 0);
15995 arg1
= CALL_EXPR_ARG (exp
, 1);
15996 op0
= expand_normal (arg0
);
15997 op1
= expand_normal (arg1
);
15998 tmode
= insn_data
[icode
].operand
[0].mode
;
15999 mode0
= insn_data
[icode
].operand
[1].mode
;
16000 mode1
= insn_data
[icode
].operand
[2].mode
;
16002 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16003 op0
= copy_to_mode_reg (mode0
, op0
);
16004 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16006 /* @@@ better error message */
16007 error ("selector must be an immediate");
16008 return gen_reg_rtx (tmode
);
16011 || GET_MODE (target
) != tmode
16012 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16013 target
= gen_reg_rtx (tmode
);
16014 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16020 case ARM_BUILTIN_TINSRB
:
16021 case ARM_BUILTIN_TINSRH
:
16022 case ARM_BUILTIN_TINSRW
:
16023 icode
= (fcode
== ARM_BUILTIN_TINSRB
? CODE_FOR_iwmmxt_tinsrb
16024 : fcode
== ARM_BUILTIN_TINSRH
? CODE_FOR_iwmmxt_tinsrh
16025 : CODE_FOR_iwmmxt_tinsrw
);
16026 arg0
= CALL_EXPR_ARG (exp
, 0);
16027 arg1
= CALL_EXPR_ARG (exp
, 1);
16028 arg2
= CALL_EXPR_ARG (exp
, 2);
16029 op0
= expand_normal (arg0
);
16030 op1
= expand_normal (arg1
);
16031 op2
= expand_normal (arg2
);
16032 tmode
= insn_data
[icode
].operand
[0].mode
;
16033 mode0
= insn_data
[icode
].operand
[1].mode
;
16034 mode1
= insn_data
[icode
].operand
[2].mode
;
16035 mode2
= insn_data
[icode
].operand
[3].mode
;
16037 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16038 op0
= copy_to_mode_reg (mode0
, op0
);
16039 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16040 op1
= copy_to_mode_reg (mode1
, op1
);
16041 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
16043 /* @@@ better error message */
16044 error ("selector must be an immediate");
16048 || GET_MODE (target
) != tmode
16049 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16050 target
= gen_reg_rtx (tmode
);
16051 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
16057 case ARM_BUILTIN_SETWCX
:
16058 arg0
= CALL_EXPR_ARG (exp
, 0);
16059 arg1
= CALL_EXPR_ARG (exp
, 1);
16060 op0
= force_reg (SImode
, expand_normal (arg0
));
16061 op1
= expand_normal (arg1
);
16062 emit_insn (gen_iwmmxt_tmcr (op1
, op0
));
16065 case ARM_BUILTIN_GETWCX
:
16066 arg0
= CALL_EXPR_ARG (exp
, 0);
16067 op0
= expand_normal (arg0
);
16068 target
= gen_reg_rtx (SImode
);
16069 emit_insn (gen_iwmmxt_tmrc (target
, op0
));
16072 case ARM_BUILTIN_WSHUFH
:
16073 icode
= CODE_FOR_iwmmxt_wshufh
;
16074 arg0
= CALL_EXPR_ARG (exp
, 0);
16075 arg1
= CALL_EXPR_ARG (exp
, 1);
16076 op0
= expand_normal (arg0
);
16077 op1
= expand_normal (arg1
);
16078 tmode
= insn_data
[icode
].operand
[0].mode
;
16079 mode1
= insn_data
[icode
].operand
[1].mode
;
16080 mode2
= insn_data
[icode
].operand
[2].mode
;
16082 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode1
))
16083 op0
= copy_to_mode_reg (mode1
, op0
);
16084 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode2
))
16086 /* @@@ better error message */
16087 error ("mask must be an immediate");
16091 || GET_MODE (target
) != tmode
16092 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16093 target
= gen_reg_rtx (tmode
);
16094 pat
= GEN_FCN (icode
) (target
, op0
, op1
);
16100 case ARM_BUILTIN_WSADB
:
16101 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb
, exp
, target
);
16102 case ARM_BUILTIN_WSADH
:
16103 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh
, exp
, target
);
16104 case ARM_BUILTIN_WSADBZ
:
16105 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz
, exp
, target
);
16106 case ARM_BUILTIN_WSADHZ
:
16107 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz
, exp
, target
);
16109 /* Several three-argument builtins. */
16110 case ARM_BUILTIN_WMACS
:
16111 case ARM_BUILTIN_WMACU
:
16112 case ARM_BUILTIN_WALIGN
:
16113 case ARM_BUILTIN_TMIA
:
16114 case ARM_BUILTIN_TMIAPH
:
16115 case ARM_BUILTIN_TMIATT
:
16116 case ARM_BUILTIN_TMIATB
:
16117 case ARM_BUILTIN_TMIABT
:
16118 case ARM_BUILTIN_TMIABB
:
16119 icode
= (fcode
== ARM_BUILTIN_WMACS
? CODE_FOR_iwmmxt_wmacs
16120 : fcode
== ARM_BUILTIN_WMACU
? CODE_FOR_iwmmxt_wmacu
16121 : fcode
== ARM_BUILTIN_TMIA
? CODE_FOR_iwmmxt_tmia
16122 : fcode
== ARM_BUILTIN_TMIAPH
? CODE_FOR_iwmmxt_tmiaph
16123 : fcode
== ARM_BUILTIN_TMIABB
? CODE_FOR_iwmmxt_tmiabb
16124 : fcode
== ARM_BUILTIN_TMIABT
? CODE_FOR_iwmmxt_tmiabt
16125 : fcode
== ARM_BUILTIN_TMIATB
? CODE_FOR_iwmmxt_tmiatb
16126 : fcode
== ARM_BUILTIN_TMIATT
? CODE_FOR_iwmmxt_tmiatt
16127 : CODE_FOR_iwmmxt_walign
);
16128 arg0
= CALL_EXPR_ARG (exp
, 0);
16129 arg1
= CALL_EXPR_ARG (exp
, 1);
16130 arg2
= CALL_EXPR_ARG (exp
, 2);
16131 op0
= expand_normal (arg0
);
16132 op1
= expand_normal (arg1
);
16133 op2
= expand_normal (arg2
);
16134 tmode
= insn_data
[icode
].operand
[0].mode
;
16135 mode0
= insn_data
[icode
].operand
[1].mode
;
16136 mode1
= insn_data
[icode
].operand
[2].mode
;
16137 mode2
= insn_data
[icode
].operand
[3].mode
;
16139 if (! (*insn_data
[icode
].operand
[1].predicate
) (op0
, mode0
))
16140 op0
= copy_to_mode_reg (mode0
, op0
);
16141 if (! (*insn_data
[icode
].operand
[2].predicate
) (op1
, mode1
))
16142 op1
= copy_to_mode_reg (mode1
, op1
);
16143 if (! (*insn_data
[icode
].operand
[3].predicate
) (op2
, mode2
))
16144 op2
= copy_to_mode_reg (mode2
, op2
);
16146 || GET_MODE (target
) != tmode
16147 || ! (*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
16148 target
= gen_reg_rtx (tmode
);
16149 pat
= GEN_FCN (icode
) (target
, op0
, op1
, op2
);
16155 case ARM_BUILTIN_WZERO
:
16156 target
= gen_reg_rtx (DImode
);
16157 emit_insn (gen_iwmmxt_clrdi (target
));
16160 case ARM_BUILTIN_THREAD_POINTER
:
16161 return arm_load_tp (target
);
16167 for (i
= 0, d
= bdesc_2arg
; i
< ARRAY_SIZE (bdesc_2arg
); i
++, d
++)
16168 if (d
->code
== (const enum arm_builtins
) fcode
)
16169 return arm_expand_binop_builtin (d
->icode
, exp
, target
);
16171 for (i
= 0, d
= bdesc_1arg
; i
< ARRAY_SIZE (bdesc_1arg
); i
++, d
++)
16172 if (d
->code
== (const enum arm_builtins
) fcode
)
16173 return arm_expand_unop_builtin (d
->icode
, exp
, target
, 0);
16175 /* @@@ Should really do something sensible here. */
16179 /* Return the number (counting from 0) of
16180 the least significant set bit in MASK. */
16183 number_of_first_bit_set (unsigned mask
)
16188 (mask
& (1 << bit
)) == 0;
16195 /* Emit code to push or pop registers to or from the stack. F is the
16196 assembly file. MASK is the registers to push or pop. PUSH is
16197 nonzero if we should push, and zero if we should pop. For debugging
16198 output, if pushing, adjust CFA_OFFSET by the amount of space added
16199 to the stack. REAL_REGS should have the same number of bits set as
16200 MASK, and will be used instead (in the same order) to describe which
16201 registers were saved - this is used to mark the save slots when we
16202 push high registers after moving them to low registers. */
16204 thumb_pushpop (FILE *f
, unsigned long mask
, int push
, int *cfa_offset
,
16205 unsigned long real_regs
)
16208 int lo_mask
= mask
& 0xFF;
16209 int pushed_words
= 0;
16213 if (lo_mask
== 0 && !push
&& (mask
& (1 << PC_REGNUM
)))
16215 /* Special case. Do not generate a POP PC statement here, do it in
16217 thumb_exit (f
, -1);
16221 if (ARM_EABI_UNWIND_TABLES
&& push
)
16223 fprintf (f
, "\t.save\t{");
16224 for (regno
= 0; regno
< 15; regno
++)
16226 if (real_regs
& (1 << regno
))
16228 if (real_regs
& ((1 << regno
) -1))
16230 asm_fprintf (f
, "%r", regno
);
16233 fprintf (f
, "}\n");
16236 fprintf (f
, "\t%s\t{", push
? "push" : "pop");
16238 /* Look at the low registers first. */
16239 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++, lo_mask
>>= 1)
16243 asm_fprintf (f
, "%r", regno
);
16245 if ((lo_mask
& ~1) != 0)
16252 if (push
&& (mask
& (1 << LR_REGNUM
)))
16254 /* Catch pushing the LR. */
16258 asm_fprintf (f
, "%r", LR_REGNUM
);
16262 else if (!push
&& (mask
& (1 << PC_REGNUM
)))
16264 /* Catch popping the PC. */
16265 if (TARGET_INTERWORK
|| TARGET_BACKTRACE
16266 || crtl
->calls_eh_return
)
16268 /* The PC is never poped directly, instead
16269 it is popped into r3 and then BX is used. */
16270 fprintf (f
, "}\n");
16272 thumb_exit (f
, -1);
16281 asm_fprintf (f
, "%r", PC_REGNUM
);
16285 fprintf (f
, "}\n");
16287 if (push
&& pushed_words
&& dwarf2out_do_frame ())
16289 char *l
= dwarf2out_cfi_label ();
16290 int pushed_mask
= real_regs
;
16292 *cfa_offset
+= pushed_words
* 4;
16293 dwarf2out_def_cfa (l
, SP_REGNUM
, *cfa_offset
);
16296 pushed_mask
= real_regs
;
16297 for (regno
= 0; regno
<= 14; regno
++, pushed_mask
>>= 1)
16299 if (pushed_mask
& 1)
16300 dwarf2out_reg_save (l
, regno
, 4 * pushed_words
++ - *cfa_offset
);
16305 /* Generate code to return from a thumb function.
16306 If 'reg_containing_return_addr' is -1, then the return address is
16307 actually on the stack, at the stack pointer. */
16309 thumb_exit (FILE *f
, int reg_containing_return_addr
)
16311 unsigned regs_available_for_popping
;
16312 unsigned regs_to_pop
;
16314 unsigned available
;
16318 int restore_a4
= FALSE
;
16320 /* Compute the registers we need to pop. */
16324 if (reg_containing_return_addr
== -1)
16326 regs_to_pop
|= 1 << LR_REGNUM
;
16330 if (TARGET_BACKTRACE
)
16332 /* Restore the (ARM) frame pointer and stack pointer. */
16333 regs_to_pop
|= (1 << ARM_HARD_FRAME_POINTER_REGNUM
) | (1 << SP_REGNUM
);
16337 /* If there is nothing to pop then just emit the BX instruction and
16339 if (pops_needed
== 0)
16341 if (crtl
->calls_eh_return
)
16342 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
16344 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
16347 /* Otherwise if we are not supporting interworking and we have not created
16348 a backtrace structure and the function was not entered in ARM mode then
16349 just pop the return address straight into the PC. */
16350 else if (!TARGET_INTERWORK
16351 && !TARGET_BACKTRACE
16352 && !is_called_in_ARM_mode (current_function_decl
)
16353 && !crtl
->calls_eh_return
)
16355 asm_fprintf (f
, "\tpop\t{%r}\n", PC_REGNUM
);
16359 /* Find out how many of the (return) argument registers we can corrupt. */
16360 regs_available_for_popping
= 0;
16362 /* If returning via __builtin_eh_return, the bottom three registers
16363 all contain information needed for the return. */
16364 if (crtl
->calls_eh_return
)
16368 /* If we can deduce the registers used from the function's
16369 return value. This is more reliable that examining
16370 df_regs_ever_live_p () because that will be set if the register is
16371 ever used in the function, not just if the register is used
16372 to hold a return value. */
16374 if (crtl
->return_rtx
!= 0)
16375 mode
= GET_MODE (crtl
->return_rtx
);
16377 mode
= DECL_MODE (DECL_RESULT (current_function_decl
));
16379 size
= GET_MODE_SIZE (mode
);
16383 /* In a void function we can use any argument register.
16384 In a function that returns a structure on the stack
16385 we can use the second and third argument registers. */
16386 if (mode
== VOIDmode
)
16387 regs_available_for_popping
=
16388 (1 << ARG_REGISTER (1))
16389 | (1 << ARG_REGISTER (2))
16390 | (1 << ARG_REGISTER (3));
16392 regs_available_for_popping
=
16393 (1 << ARG_REGISTER (2))
16394 | (1 << ARG_REGISTER (3));
16396 else if (size
<= 4)
16397 regs_available_for_popping
=
16398 (1 << ARG_REGISTER (2))
16399 | (1 << ARG_REGISTER (3));
16400 else if (size
<= 8)
16401 regs_available_for_popping
=
16402 (1 << ARG_REGISTER (3));
16405 /* Match registers to be popped with registers into which we pop them. */
16406 for (available
= regs_available_for_popping
,
16407 required
= regs_to_pop
;
16408 required
!= 0 && available
!= 0;
16409 available
&= ~(available
& - available
),
16410 required
&= ~(required
& - required
))
16413 /* If we have any popping registers left over, remove them. */
16415 regs_available_for_popping
&= ~available
;
16417 /* Otherwise if we need another popping register we can use
16418 the fourth argument register. */
16419 else if (pops_needed
)
16421 /* If we have not found any free argument registers and
16422 reg a4 contains the return address, we must move it. */
16423 if (regs_available_for_popping
== 0
16424 && reg_containing_return_addr
== LAST_ARG_REGNUM
)
16426 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
16427 reg_containing_return_addr
= LR_REGNUM
;
16429 else if (size
> 12)
16431 /* Register a4 is being used to hold part of the return value,
16432 but we have dire need of a free, low register. */
16435 asm_fprintf (f
, "\tmov\t%r, %r\n",IP_REGNUM
, LAST_ARG_REGNUM
);
16438 if (reg_containing_return_addr
!= LAST_ARG_REGNUM
)
16440 /* The fourth argument register is available. */
16441 regs_available_for_popping
|= 1 << LAST_ARG_REGNUM
;
16447 /* Pop as many registers as we can. */
16448 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
16449 regs_available_for_popping
);
16451 /* Process the registers we popped. */
16452 if (reg_containing_return_addr
== -1)
16454 /* The return address was popped into the lowest numbered register. */
16455 regs_to_pop
&= ~(1 << LR_REGNUM
);
16457 reg_containing_return_addr
=
16458 number_of_first_bit_set (regs_available_for_popping
);
16460 /* Remove this register for the mask of available registers, so that
16461 the return address will not be corrupted by further pops. */
16462 regs_available_for_popping
&= ~(1 << reg_containing_return_addr
);
16465 /* If we popped other registers then handle them here. */
16466 if (regs_available_for_popping
)
16470 /* Work out which register currently contains the frame pointer. */
16471 frame_pointer
= number_of_first_bit_set (regs_available_for_popping
);
16473 /* Move it into the correct place. */
16474 asm_fprintf (f
, "\tmov\t%r, %r\n",
16475 ARM_HARD_FRAME_POINTER_REGNUM
, frame_pointer
);
16477 /* (Temporarily) remove it from the mask of popped registers. */
16478 regs_available_for_popping
&= ~(1 << frame_pointer
);
16479 regs_to_pop
&= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM
);
16481 if (regs_available_for_popping
)
16485 /* We popped the stack pointer as well,
16486 find the register that contains it. */
16487 stack_pointer
= number_of_first_bit_set (regs_available_for_popping
);
16489 /* Move it into the stack register. */
16490 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, stack_pointer
);
16492 /* At this point we have popped all necessary registers, so
16493 do not worry about restoring regs_available_for_popping
16494 to its correct value:
16496 assert (pops_needed == 0)
16497 assert (regs_available_for_popping == (1 << frame_pointer))
16498 assert (regs_to_pop == (1 << STACK_POINTER)) */
16502 /* Since we have just move the popped value into the frame
16503 pointer, the popping register is available for reuse, and
16504 we know that we still have the stack pointer left to pop. */
16505 regs_available_for_popping
|= (1 << frame_pointer
);
16509 /* If we still have registers left on the stack, but we no longer have
16510 any registers into which we can pop them, then we must move the return
16511 address into the link register and make available the register that
16513 if (regs_available_for_popping
== 0 && pops_needed
> 0)
16515 regs_available_for_popping
|= 1 << reg_containing_return_addr
;
16517 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
,
16518 reg_containing_return_addr
);
16520 reg_containing_return_addr
= LR_REGNUM
;
16523 /* If we have registers left on the stack then pop some more.
16524 We know that at most we will want to pop FP and SP. */
16525 if (pops_needed
> 0)
16530 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
16531 regs_available_for_popping
);
16533 /* We have popped either FP or SP.
16534 Move whichever one it is into the correct register. */
16535 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
16536 move_to
= number_of_first_bit_set (regs_to_pop
);
16538 asm_fprintf (f
, "\tmov\t%r, %r\n", move_to
, popped_into
);
16540 regs_to_pop
&= ~(1 << move_to
);
16545 /* If we still have not popped everything then we must have only
16546 had one register available to us and we are now popping the SP. */
16547 if (pops_needed
> 0)
16551 thumb_pushpop (f
, regs_available_for_popping
, FALSE
, NULL
,
16552 regs_available_for_popping
);
16554 popped_into
= number_of_first_bit_set (regs_available_for_popping
);
16556 asm_fprintf (f
, "\tmov\t%r, %r\n", SP_REGNUM
, popped_into
);
16558 assert (regs_to_pop == (1 << STACK_POINTER))
16559 assert (pops_needed == 1)
16563 /* If necessary restore the a4 register. */
16566 if (reg_containing_return_addr
!= LR_REGNUM
)
16568 asm_fprintf (f
, "\tmov\t%r, %r\n", LR_REGNUM
, LAST_ARG_REGNUM
);
16569 reg_containing_return_addr
= LR_REGNUM
;
16572 asm_fprintf (f
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
, IP_REGNUM
);
16575 if (crtl
->calls_eh_return
)
16576 asm_fprintf (f
, "\tadd\t%r, %r\n", SP_REGNUM
, ARM_EH_STACKADJ_REGNUM
);
16578 /* Return to caller. */
16579 asm_fprintf (f
, "\tbx\t%r\n", reg_containing_return_addr
);
16584 thumb1_final_prescan_insn (rtx insn
)
16586 if (flag_print_asm_name
)
16587 asm_fprintf (asm_out_file
, "%@ 0x%04x\n",
16588 INSN_ADDRESSES (INSN_UID (insn
)));
16592 thumb_shiftable_const (unsigned HOST_WIDE_INT val
)
16594 unsigned HOST_WIDE_INT mask
= 0xff;
16597 if (val
== 0) /* XXX */
16600 for (i
= 0; i
< 25; i
++)
16601 if ((val
& (mask
<< i
)) == val
)
16607 /* Returns nonzero if the current function contains,
16608 or might contain a far jump. */
16610 thumb_far_jump_used_p (void)
16614 /* This test is only important for leaf functions. */
16615 /* assert (!leaf_function_p ()); */
16617 /* If we have already decided that far jumps may be used,
16618 do not bother checking again, and always return true even if
16619 it turns out that they are not being used. Once we have made
16620 the decision that far jumps are present (and that hence the link
16621 register will be pushed onto the stack) we cannot go back on it. */
16622 if (cfun
->machine
->far_jump_used
)
16625 /* If this function is not being called from the prologue/epilogue
16626 generation code then it must be being called from the
16627 INITIAL_ELIMINATION_OFFSET macro. */
16628 if (!(ARM_DOUBLEWORD_ALIGN
|| reload_completed
))
16630 /* In this case we know that we are being asked about the elimination
16631 of the arg pointer register. If that register is not being used,
16632 then there are no arguments on the stack, and we do not have to
16633 worry that a far jump might force the prologue to push the link
16634 register, changing the stack offsets. In this case we can just
16635 return false, since the presence of far jumps in the function will
16636 not affect stack offsets.
16638 If the arg pointer is live (or if it was live, but has now been
16639 eliminated and so set to dead) then we do have to test to see if
16640 the function might contain a far jump. This test can lead to some
16641 false negatives, since before reload is completed, then length of
16642 branch instructions is not known, so gcc defaults to returning their
16643 longest length, which in turn sets the far jump attribute to true.
16645 A false negative will not result in bad code being generated, but it
16646 will result in a needless push and pop of the link register. We
16647 hope that this does not occur too often.
16649 If we need doubleword stack alignment this could affect the other
16650 elimination offsets so we can't risk getting it wrong. */
16651 if (df_regs_ever_live_p (ARG_POINTER_REGNUM
))
16652 cfun
->machine
->arg_pointer_live
= 1;
16653 else if (!cfun
->machine
->arg_pointer_live
)
16657 /* Check to see if the function contains a branch
16658 insn with the far jump attribute set. */
16659 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
16661 if (GET_CODE (insn
) == JUMP_INSN
16662 /* Ignore tablejump patterns. */
16663 && GET_CODE (PATTERN (insn
)) != ADDR_VEC
16664 && GET_CODE (PATTERN (insn
)) != ADDR_DIFF_VEC
16665 && get_attr_far_jump (insn
) == FAR_JUMP_YES
16668 /* Record the fact that we have decided that
16669 the function does use far jumps. */
16670 cfun
->machine
->far_jump_used
= 1;
16678 /* Return nonzero if FUNC must be entered in ARM mode. */
16680 is_called_in_ARM_mode (tree func
)
16682 gcc_assert (TREE_CODE (func
) == FUNCTION_DECL
);
16684 /* Ignore the problem about functions whose address is taken. */
16685 if (TARGET_CALLEE_INTERWORKING
&& TREE_PUBLIC (func
))
16689 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func
)) != NULL_TREE
;
16695 /* The bits which aren't usefully expanded as rtl. */
16697 thumb_unexpanded_epilogue (void)
16699 arm_stack_offsets
*offsets
;
16701 unsigned long live_regs_mask
= 0;
16702 int high_regs_pushed
= 0;
16703 int had_to_push_lr
;
16706 if (return_used_this_function
)
16709 if (IS_NAKED (arm_current_func_type ()))
16712 offsets
= arm_get_frame_offsets ();
16713 live_regs_mask
= offsets
->saved_regs_mask
;
16714 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
16716 /* If we can deduce the registers used from the function's return value.
16717 This is more reliable that examining df_regs_ever_live_p () because that
16718 will be set if the register is ever used in the function, not just if
16719 the register is used to hold a return value. */
16720 size
= arm_size_return_regs ();
16722 /* The prolog may have pushed some high registers to use as
16723 work registers. e.g. the testsuite file:
16724 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16725 compiles to produce:
16726 push {r4, r5, r6, r7, lr}
16730 as part of the prolog. We have to undo that pushing here. */
16732 if (high_regs_pushed
)
16734 unsigned long mask
= live_regs_mask
& 0xff;
16737 /* The available low registers depend on the size of the value we are
16745 /* Oh dear! We have no low registers into which we can pop
16748 ("no low registers available for popping high registers");
16750 for (next_hi_reg
= 8; next_hi_reg
< 13; next_hi_reg
++)
16751 if (live_regs_mask
& (1 << next_hi_reg
))
16754 while (high_regs_pushed
)
16756 /* Find lo register(s) into which the high register(s) can
16758 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
16760 if (mask
& (1 << regno
))
16761 high_regs_pushed
--;
16762 if (high_regs_pushed
== 0)
16766 mask
&= (2 << regno
) - 1; /* A noop if regno == 8 */
16768 /* Pop the values into the low register(s). */
16769 thumb_pushpop (asm_out_file
, mask
, 0, NULL
, mask
);
16771 /* Move the value(s) into the high registers. */
16772 for (regno
= 0; regno
<= LAST_LO_REGNUM
; regno
++)
16774 if (mask
& (1 << regno
))
16776 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", next_hi_reg
,
16779 for (next_hi_reg
++; next_hi_reg
< 13; next_hi_reg
++)
16780 if (live_regs_mask
& (1 << next_hi_reg
))
16785 live_regs_mask
&= ~0x0f00;
16788 had_to_push_lr
= (live_regs_mask
& (1 << LR_REGNUM
)) != 0;
16789 live_regs_mask
&= 0xff;
16791 if (crtl
->args
.pretend_args_size
== 0 || TARGET_BACKTRACE
)
16793 /* Pop the return address into the PC. */
16794 if (had_to_push_lr
)
16795 live_regs_mask
|= 1 << PC_REGNUM
;
16797 /* Either no argument registers were pushed or a backtrace
16798 structure was created which includes an adjusted stack
16799 pointer, so just pop everything. */
16800 if (live_regs_mask
)
16801 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
16804 /* We have either just popped the return address into the
16805 PC or it is was kept in LR for the entire function. */
16806 if (!had_to_push_lr
)
16807 thumb_exit (asm_out_file
, LR_REGNUM
);
16811 /* Pop everything but the return address. */
16812 if (live_regs_mask
)
16813 thumb_pushpop (asm_out_file
, live_regs_mask
, FALSE
, NULL
,
16816 if (had_to_push_lr
)
16820 /* We have no free low regs, so save one. */
16821 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", IP_REGNUM
,
16825 /* Get the return address into a temporary register. */
16826 thumb_pushpop (asm_out_file
, 1 << LAST_ARG_REGNUM
, 0, NULL
,
16827 1 << LAST_ARG_REGNUM
);
16831 /* Move the return address to lr. */
16832 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LR_REGNUM
,
16834 /* Restore the low register. */
16835 asm_fprintf (asm_out_file
, "\tmov\t%r, %r\n", LAST_ARG_REGNUM
,
16840 regno
= LAST_ARG_REGNUM
;
16845 /* Remove the argument registers that were pushed onto the stack. */
16846 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, #%d\n",
16847 SP_REGNUM
, SP_REGNUM
,
16848 crtl
->args
.pretend_args_size
);
16850 thumb_exit (asm_out_file
, regno
);
16856 /* Functions to save and restore machine-specific function data. */
16857 static struct machine_function
*
16858 arm_init_machine_status (void)
16860 struct machine_function
*machine
;
16861 machine
= (machine_function
*) ggc_alloc_cleared (sizeof (machine_function
));
16863 #if ARM_FT_UNKNOWN != 0
16864 machine
->func_type
= ARM_FT_UNKNOWN
;
16869 /* Return an RTX indicating where the return address to the
16870 calling function can be found. */
16872 arm_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
16877 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
16880 /* Do anything needed before RTL is emitted for each function. */
16882 arm_init_expanders (void)
16884 /* Arrange to initialize and mark the machine per-function status. */
16885 init_machine_status
= arm_init_machine_status
;
16887 /* This is to stop the combine pass optimizing away the alignment
16888 adjustment of va_arg. */
16889 /* ??? It is claimed that this should not be necessary. */
16891 mark_reg_pointer (arg_pointer_rtx
, PARM_BOUNDARY
);
16895 /* Like arm_compute_initial_elimination offset. Simpler because there
16896 isn't an ABI specified frame pointer for Thumb. Instead, we set it
16897 to point at the base of the local variables after static stack
16898 space for a function has been allocated. */
16901 thumb_compute_initial_elimination_offset (unsigned int from
, unsigned int to
)
16903 arm_stack_offsets
*offsets
;
16905 offsets
= arm_get_frame_offsets ();
16909 case ARG_POINTER_REGNUM
:
16912 case STACK_POINTER_REGNUM
:
16913 return offsets
->outgoing_args
- offsets
->saved_args
;
16915 case FRAME_POINTER_REGNUM
:
16916 return offsets
->soft_frame
- offsets
->saved_args
;
16918 case ARM_HARD_FRAME_POINTER_REGNUM
:
16919 return offsets
->saved_regs
- offsets
->saved_args
;
16921 case THUMB_HARD_FRAME_POINTER_REGNUM
:
16922 return offsets
->locals_base
- offsets
->saved_args
;
16925 gcc_unreachable ();
16929 case FRAME_POINTER_REGNUM
:
16932 case STACK_POINTER_REGNUM
:
16933 return offsets
->outgoing_args
- offsets
->soft_frame
;
16935 case ARM_HARD_FRAME_POINTER_REGNUM
:
16936 return offsets
->saved_regs
- offsets
->soft_frame
;
16938 case THUMB_HARD_FRAME_POINTER_REGNUM
:
16939 return offsets
->locals_base
- offsets
->soft_frame
;
16942 gcc_unreachable ();
16947 gcc_unreachable ();
16951 /* Generate the rest of a function's prologue. */
16953 thumb1_expand_prologue (void)
16957 HOST_WIDE_INT amount
;
16958 arm_stack_offsets
*offsets
;
16959 unsigned long func_type
;
16961 unsigned long live_regs_mask
;
16963 func_type
= arm_current_func_type ();
16965 /* Naked functions don't have prologues. */
16966 if (IS_NAKED (func_type
))
16969 if (IS_INTERRUPT (func_type
))
16971 error ("interrupt Service Routines cannot be coded in Thumb mode");
16975 offsets
= arm_get_frame_offsets ();
16976 live_regs_mask
= offsets
->saved_regs_mask
;
16977 /* Load the pic register before setting the frame pointer,
16978 so we can use r7 as a temporary work register. */
16979 if (flag_pic
&& arm_pic_register
!= INVALID_REGNUM
)
16980 arm_load_pic_register (live_regs_mask
);
16982 if (!frame_pointer_needed
&& CALLER_INTERWORKING_SLOT_SIZE
> 0)
16983 emit_move_insn (gen_rtx_REG (Pmode
, ARM_HARD_FRAME_POINTER_REGNUM
),
16984 stack_pointer_rtx
);
16986 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
16991 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
16992 GEN_INT (- amount
)));
16993 RTX_FRAME_RELATED_P (insn
) = 1;
16999 /* The stack decrement is too big for an immediate value in a single
17000 insn. In theory we could issue multiple subtracts, but after
17001 three of them it becomes more space efficient to place the full
17002 value in the constant pool and load into a register. (Also the
17003 ARM debugger really likes to see only one stack decrement per
17004 function). So instead we look for a scratch register into which
17005 we can load the decrement, and then we subtract this from the
17006 stack pointer. Unfortunately on the thumb the only available
17007 scratch registers are the argument registers, and we cannot use
17008 these as they may hold arguments to the function. Instead we
17009 attempt to locate a call preserved register which is used by this
17010 function. If we can find one, then we know that it will have
17011 been pushed at the start of the prologue and so we can corrupt
17013 for (regno
= LAST_ARG_REGNUM
+ 1; regno
<= LAST_LO_REGNUM
; regno
++)
17014 if (live_regs_mask
& (1 << regno
)
17015 && !(frame_pointer_needed
17016 && (regno
== THUMB_HARD_FRAME_POINTER_REGNUM
)))
17019 if (regno
> LAST_LO_REGNUM
) /* Very unlikely. */
17021 rtx spare
= gen_rtx_REG (SImode
, IP_REGNUM
);
17023 /* Choose an arbitrary, non-argument low register. */
17024 reg
= gen_rtx_REG (SImode
, LAST_LO_REGNUM
);
17026 /* Save it by copying it into a high, scratch register. */
17027 emit_insn (gen_movsi (spare
, reg
));
17028 /* Add a USE to stop propagate_one_insn() from barfing. */
17029 emit_insn (gen_prologue_use (spare
));
17031 /* Decrement the stack. */
17032 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
17033 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
17034 stack_pointer_rtx
, reg
));
17035 RTX_FRAME_RELATED_P (insn
) = 1;
17036 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17037 plus_constant (stack_pointer_rtx
,
17039 RTX_FRAME_RELATED_P (dwarf
) = 1;
17041 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, dwarf
,
17044 /* Restore the low register's original value. */
17045 emit_insn (gen_movsi (reg
, spare
));
17047 /* Emit a USE of the restored scratch register, so that flow
17048 analysis will not consider the restore redundant. The
17049 register won't be used again in this function and isn't
17050 restored by the epilogue. */
17051 emit_insn (gen_prologue_use (reg
));
17055 reg
= gen_rtx_REG (SImode
, regno
);
17057 emit_insn (gen_movsi (reg
, GEN_INT (- amount
)));
17059 insn
= emit_insn (gen_addsi3 (stack_pointer_rtx
,
17060 stack_pointer_rtx
, reg
));
17061 RTX_FRAME_RELATED_P (insn
) = 1;
17062 dwarf
= gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
17063 plus_constant (stack_pointer_rtx
,
17065 RTX_FRAME_RELATED_P (dwarf
) = 1;
17067 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, dwarf
,
17073 if (frame_pointer_needed
)
17074 thumb_set_frame_pointer (offsets
);
17076 /* If we are profiling, make sure no instructions are scheduled before
17077 the call to mcount. Similarly if the user has requested no
17078 scheduling in the prolog. Similarly if we want non-call exceptions
17079 using the EABI unwinder, to prevent faulting instructions from being
17080 swapped with a stack adjustment. */
17081 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
17082 || (ARM_EABI_UNWIND_TABLES
&& flag_non_call_exceptions
))
17083 emit_insn (gen_blockage ());
17085 cfun
->machine
->lr_save_eliminated
= !thumb_force_lr_save ();
17086 if (live_regs_mask
& 0xff)
17087 cfun
->machine
->lr_save_eliminated
= 0;
17092 thumb1_expand_epilogue (void)
17094 HOST_WIDE_INT amount
;
17095 arm_stack_offsets
*offsets
;
17098 /* Naked functions don't have prologues. */
17099 if (IS_NAKED (arm_current_func_type ()))
17102 offsets
= arm_get_frame_offsets ();
17103 amount
= offsets
->outgoing_args
- offsets
->saved_regs
;
17105 if (frame_pointer_needed
)
17107 emit_insn (gen_movsi (stack_pointer_rtx
, hard_frame_pointer_rtx
));
17108 amount
= offsets
->locals_base
- offsets
->saved_regs
;
17111 gcc_assert (amount
>= 0);
17115 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
,
17116 GEN_INT (amount
)));
17119 /* r3 is always free in the epilogue. */
17120 rtx reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
17122 emit_insn (gen_movsi (reg
, GEN_INT (amount
)));
17123 emit_insn (gen_addsi3 (stack_pointer_rtx
, stack_pointer_rtx
, reg
));
17127 /* Emit a USE (stack_pointer_rtx), so that
17128 the stack adjustment will not be deleted. */
17129 emit_insn (gen_prologue_use (stack_pointer_rtx
));
17131 if (crtl
->profile
|| !TARGET_SCHED_PROLOG
)
17132 emit_insn (gen_blockage ());
17134 /* Emit a clobber for each insn that will be restored in the epilogue,
17135 so that flow2 will get register lifetimes correct. */
17136 for (regno
= 0; regno
< 13; regno
++)
17137 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
17138 emit_clobber (gen_rtx_REG (SImode
, regno
));
17140 if (! df_regs_ever_live_p (LR_REGNUM
))
17141 emit_use (gen_rtx_REG (SImode
, LR_REGNUM
));
17145 thumb1_output_function_prologue (FILE *f
, HOST_WIDE_INT size ATTRIBUTE_UNUSED
)
17147 arm_stack_offsets
*offsets
;
17148 unsigned long live_regs_mask
= 0;
17149 unsigned long l_mask
;
17150 unsigned high_regs_pushed
= 0;
17151 int cfa_offset
= 0;
17154 if (IS_NAKED (arm_current_func_type ()))
17157 if (is_called_in_ARM_mode (current_function_decl
))
17161 gcc_assert (GET_CODE (DECL_RTL (current_function_decl
)) == MEM
);
17162 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl
), 0))
17164 name
= XSTR (XEXP (DECL_RTL (current_function_decl
), 0), 0);
17166 /* Generate code sequence to switch us into Thumb mode. */
17167 /* The .code 32 directive has already been emitted by
17168 ASM_DECLARE_FUNCTION_NAME. */
17169 asm_fprintf (f
, "\torr\t%r, %r, #1\n", IP_REGNUM
, PC_REGNUM
);
17170 asm_fprintf (f
, "\tbx\t%r\n", IP_REGNUM
);
17172 /* Generate a label, so that the debugger will notice the
17173 change in instruction sets. This label is also used by
17174 the assembler to bypass the ARM code when this function
17175 is called from a Thumb encoded function elsewhere in the
17176 same file. Hence the definition of STUB_NAME here must
17177 agree with the definition in gas/config/tc-arm.c. */
17179 #define STUB_NAME ".real_start_of"
17181 fprintf (f
, "\t.code\t16\n");
17183 if (arm_dllexport_name_p (name
))
17184 name
= arm_strip_name_encoding (name
);
17186 asm_fprintf (f
, "\t.globl %s%U%s\n", STUB_NAME
, name
);
17187 fprintf (f
, "\t.thumb_func\n");
17188 asm_fprintf (f
, "%s%U%s:\n", STUB_NAME
, name
);
17191 if (crtl
->args
.pretend_args_size
)
17193 /* Output unwind directive for the stack adjustment. */
17194 if (ARM_EABI_UNWIND_TABLES
)
17195 fprintf (f
, "\t.pad #%d\n",
17196 crtl
->args
.pretend_args_size
);
17198 if (cfun
->machine
->uses_anonymous_args
)
17202 fprintf (f
, "\tpush\t{");
17204 num_pushes
= ARM_NUM_INTS (crtl
->args
.pretend_args_size
);
17206 for (regno
= LAST_ARG_REGNUM
+ 1 - num_pushes
;
17207 regno
<= LAST_ARG_REGNUM
;
17209 asm_fprintf (f
, "%r%s", regno
,
17210 regno
== LAST_ARG_REGNUM
? "" : ", ");
17212 fprintf (f
, "}\n");
17215 asm_fprintf (f
, "\tsub\t%r, %r, #%d\n",
17216 SP_REGNUM
, SP_REGNUM
,
17217 crtl
->args
.pretend_args_size
);
17219 /* We don't need to record the stores for unwinding (would it
17220 help the debugger any if we did?), but record the change in
17221 the stack pointer. */
17222 if (dwarf2out_do_frame ())
17224 char *l
= dwarf2out_cfi_label ();
17226 cfa_offset
= cfa_offset
+ crtl
->args
.pretend_args_size
;
17227 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
17231 /* Get the registers we are going to push. */
17232 offsets
= arm_get_frame_offsets ();
17233 live_regs_mask
= offsets
->saved_regs_mask
;
17234 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17235 l_mask
= live_regs_mask
& 0x40ff;
17236 /* Then count how many other high registers will need to be pushed. */
17237 high_regs_pushed
= bit_count (live_regs_mask
& 0x0f00);
17239 if (TARGET_BACKTRACE
)
17242 unsigned work_register
;
17244 /* We have been asked to create a stack backtrace structure.
17245 The code looks like this:
17249 0 sub SP, #16 Reserve space for 4 registers.
17250 2 push {R7} Push low registers.
17251 4 add R7, SP, #20 Get the stack pointer before the push.
17252 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17253 8 mov R7, PC Get hold of the start of this code plus 12.
17254 10 str R7, [SP, #16] Store it.
17255 12 mov R7, FP Get hold of the current frame pointer.
17256 14 str R7, [SP, #4] Store it.
17257 16 mov R7, LR Get hold of the current return address.
17258 18 str R7, [SP, #12] Store it.
17259 20 add R7, SP, #16 Point at the start of the backtrace structure.
17260 22 mov FP, R7 Put this value into the frame pointer. */
17262 work_register
= thumb_find_work_register (live_regs_mask
);
17264 if (ARM_EABI_UNWIND_TABLES
)
17265 asm_fprintf (f
, "\t.pad #16\n");
17268 (f
, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17269 SP_REGNUM
, SP_REGNUM
);
17271 if (dwarf2out_do_frame ())
17273 char *l
= dwarf2out_cfi_label ();
17275 cfa_offset
= cfa_offset
+ 16;
17276 dwarf2out_def_cfa (l
, SP_REGNUM
, cfa_offset
);
17281 thumb_pushpop (f
, l_mask
, 1, &cfa_offset
, l_mask
);
17282 offset
= bit_count (l_mask
) * UNITS_PER_WORD
;
17287 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
17288 offset
+ 16 + crtl
->args
.pretend_args_size
);
17290 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
17293 /* Make sure that the instruction fetching the PC is in the right place
17294 to calculate "start of backtrace creation code + 12". */
17297 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
17298 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
17300 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
17301 ARM_HARD_FRAME_POINTER_REGNUM
);
17302 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
17307 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
,
17308 ARM_HARD_FRAME_POINTER_REGNUM
);
17309 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
17311 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, PC_REGNUM
);
17312 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
17316 asm_fprintf (f
, "\tmov\t%r, %r\n", work_register
, LR_REGNUM
);
17317 asm_fprintf (f
, "\tstr\t%r, [%r, #%d]\n", work_register
, SP_REGNUM
,
17319 asm_fprintf (f
, "\tadd\t%r, %r, #%d\n", work_register
, SP_REGNUM
,
17321 asm_fprintf (f
, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17322 ARM_HARD_FRAME_POINTER_REGNUM
, work_register
);
17324 /* Optimization: If we are not pushing any low registers but we are going
17325 to push some high registers then delay our first push. This will just
17326 be a push of LR and we can combine it with the push of the first high
17328 else if ((l_mask
& 0xff) != 0
17329 || (high_regs_pushed
== 0 && l_mask
))
17330 thumb_pushpop (f
, l_mask
, 1, &cfa_offset
, l_mask
);
17332 if (high_regs_pushed
)
17334 unsigned pushable_regs
;
17335 unsigned next_hi_reg
;
17337 for (next_hi_reg
= 12; next_hi_reg
> LAST_LO_REGNUM
; next_hi_reg
--)
17338 if (live_regs_mask
& (1 << next_hi_reg
))
17341 pushable_regs
= l_mask
& 0xff;
17343 if (pushable_regs
== 0)
17344 pushable_regs
= 1 << thumb_find_work_register (live_regs_mask
);
17346 while (high_regs_pushed
> 0)
17348 unsigned long real_regs_mask
= 0;
17350 for (regno
= LAST_LO_REGNUM
; regno
>= 0; regno
--)
17352 if (pushable_regs
& (1 << regno
))
17354 asm_fprintf (f
, "\tmov\t%r, %r\n", regno
, next_hi_reg
);
17356 high_regs_pushed
--;
17357 real_regs_mask
|= (1 << next_hi_reg
);
17359 if (high_regs_pushed
)
17361 for (next_hi_reg
--; next_hi_reg
> LAST_LO_REGNUM
;
17363 if (live_regs_mask
& (1 << next_hi_reg
))
17368 pushable_regs
&= ~((1 << regno
) - 1);
17374 /* If we had to find a work register and we have not yet
17375 saved the LR then add it to the list of regs to push. */
17376 if (l_mask
== (1 << LR_REGNUM
))
17378 thumb_pushpop (f
, pushable_regs
| (1 << LR_REGNUM
),
17380 real_regs_mask
| (1 << LR_REGNUM
));
17384 thumb_pushpop (f
, pushable_regs
, 1, &cfa_offset
, real_regs_mask
);
17389 /* Handle the case of a double word load into a low register from
17390 a computed memory address. The computed address may involve a
17391 register which is overwritten by the load. */
17393 thumb_load_double_from_address (rtx
*operands
)
17401 gcc_assert (GET_CODE (operands
[0]) == REG
);
17402 gcc_assert (GET_CODE (operands
[1]) == MEM
);
17404 /* Get the memory address. */
17405 addr
= XEXP (operands
[1], 0);
17407 /* Work out how the memory address is computed. */
17408 switch (GET_CODE (addr
))
17411 operands
[2] = adjust_address (operands
[1], SImode
, 4);
17413 if (REGNO (operands
[0]) == REGNO (addr
))
17415 output_asm_insn ("ldr\t%H0, %2", operands
);
17416 output_asm_insn ("ldr\t%0, %1", operands
);
17420 output_asm_insn ("ldr\t%0, %1", operands
);
17421 output_asm_insn ("ldr\t%H0, %2", operands
);
17426 /* Compute <address> + 4 for the high order load. */
17427 operands
[2] = adjust_address (operands
[1], SImode
, 4);
17429 output_asm_insn ("ldr\t%0, %1", operands
);
17430 output_asm_insn ("ldr\t%H0, %2", operands
);
17434 arg1
= XEXP (addr
, 0);
17435 arg2
= XEXP (addr
, 1);
17437 if (CONSTANT_P (arg1
))
17438 base
= arg2
, offset
= arg1
;
17440 base
= arg1
, offset
= arg2
;
17442 gcc_assert (GET_CODE (base
) == REG
);
17444 /* Catch the case of <address> = <reg> + <reg> */
17445 if (GET_CODE (offset
) == REG
)
17447 int reg_offset
= REGNO (offset
);
17448 int reg_base
= REGNO (base
);
17449 int reg_dest
= REGNO (operands
[0]);
17451 /* Add the base and offset registers together into the
17452 higher destination register. */
17453 asm_fprintf (asm_out_file
, "\tadd\t%r, %r, %r",
17454 reg_dest
+ 1, reg_base
, reg_offset
);
17456 /* Load the lower destination register from the address in
17457 the higher destination register. */
17458 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #0]",
17459 reg_dest
, reg_dest
+ 1);
17461 /* Load the higher destination register from its own address
17463 asm_fprintf (asm_out_file
, "\tldr\t%r, [%r, #4]",
17464 reg_dest
+ 1, reg_dest
+ 1);
17468 /* Compute <address> + 4 for the high order load. */
17469 operands
[2] = adjust_address (operands
[1], SImode
, 4);
17471 /* If the computed address is held in the low order register
17472 then load the high order register first, otherwise always
17473 load the low order register first. */
17474 if (REGNO (operands
[0]) == REGNO (base
))
17476 output_asm_insn ("ldr\t%H0, %2", operands
);
17477 output_asm_insn ("ldr\t%0, %1", operands
);
17481 output_asm_insn ("ldr\t%0, %1", operands
);
17482 output_asm_insn ("ldr\t%H0, %2", operands
);
17488 /* With no registers to worry about we can just load the value
17490 operands
[2] = adjust_address (operands
[1], SImode
, 4);
17492 output_asm_insn ("ldr\t%H0, %2", operands
);
17493 output_asm_insn ("ldr\t%0, %1", operands
);
17497 gcc_unreachable ();
17504 thumb_output_move_mem_multiple (int n
, rtx
*operands
)
17511 if (REGNO (operands
[4]) > REGNO (operands
[5]))
17514 operands
[4] = operands
[5];
17517 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands
);
17518 output_asm_insn ("stmia\t%0!, {%4, %5}", operands
);
17522 if (REGNO (operands
[4]) > REGNO (operands
[5]))
17525 operands
[4] = operands
[5];
17528 if (REGNO (operands
[5]) > REGNO (operands
[6]))
17531 operands
[5] = operands
[6];
17534 if (REGNO (operands
[4]) > REGNO (operands
[5]))
17537 operands
[4] = operands
[5];
17541 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands
);
17542 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands
);
17546 gcc_unreachable ();
17552 /* Output a call-via instruction for thumb state. */
17554 thumb_call_via_reg (rtx reg
)
17556 int regno
= REGNO (reg
);
17559 gcc_assert (regno
< LR_REGNUM
);
17561 /* If we are in the normal text section we can use a single instance
17562 per compilation unit. If we are doing function sections, then we need
17563 an entry per section, since we can't rely on reachability. */
17564 if (in_section
== text_section
)
17566 thumb_call_reg_needed
= 1;
17568 if (thumb_call_via_label
[regno
] == NULL
)
17569 thumb_call_via_label
[regno
] = gen_label_rtx ();
17570 labelp
= thumb_call_via_label
+ regno
;
17574 if (cfun
->machine
->call_via
[regno
] == NULL
)
17575 cfun
->machine
->call_via
[regno
] = gen_label_rtx ();
17576 labelp
= cfun
->machine
->call_via
+ regno
;
17579 output_asm_insn ("bl\t%a0", labelp
);
17583 /* Routines for generating rtl. */
17585 thumb_expand_movmemqi (rtx
*operands
)
17587 rtx out
= copy_to_mode_reg (SImode
, XEXP (operands
[0], 0));
17588 rtx in
= copy_to_mode_reg (SImode
, XEXP (operands
[1], 0));
17589 HOST_WIDE_INT len
= INTVAL (operands
[2]);
17590 HOST_WIDE_INT offset
= 0;
17594 emit_insn (gen_movmem12b (out
, in
, out
, in
));
17600 emit_insn (gen_movmem8b (out
, in
, out
, in
));
17606 rtx reg
= gen_reg_rtx (SImode
);
17607 emit_insn (gen_movsi (reg
, gen_rtx_MEM (SImode
, in
)));
17608 emit_insn (gen_movsi (gen_rtx_MEM (SImode
, out
), reg
));
17615 rtx reg
= gen_reg_rtx (HImode
);
17616 emit_insn (gen_movhi (reg
, gen_rtx_MEM (HImode
,
17617 plus_constant (in
, offset
))));
17618 emit_insn (gen_movhi (gen_rtx_MEM (HImode
, plus_constant (out
, offset
)),
17626 rtx reg
= gen_reg_rtx (QImode
);
17627 emit_insn (gen_movqi (reg
, gen_rtx_MEM (QImode
,
17628 plus_constant (in
, offset
))));
17629 emit_insn (gen_movqi (gen_rtx_MEM (QImode
, plus_constant (out
, offset
)),
17635 thumb_reload_out_hi (rtx
*operands
)
17637 emit_insn (gen_thumb_movhi_clobber (operands
[0], operands
[1], operands
[2]));
17640 /* Handle reading a half-word from memory during reload. */
17642 thumb_reload_in_hi (rtx
*operands ATTRIBUTE_UNUSED
)
17644 gcc_unreachable ();
17647 /* Return the length of a function name prefix
17648 that starts with the character 'c'. */
17650 arm_get_strip_length (int c
)
17654 ARM_NAME_ENCODING_LENGTHS
17659 /* Return a pointer to a function's name with any
17660 and all prefix encodings stripped from it. */
17662 arm_strip_name_encoding (const char *name
)
17666 while ((skip
= arm_get_strip_length (* name
)))
17672 /* If there is a '*' anywhere in the name's prefix, then
17673 emit the stripped name verbatim, otherwise prepend an
17674 underscore if leading underscores are being used. */
17676 arm_asm_output_labelref (FILE *stream
, const char *name
)
17681 while ((skip
= arm_get_strip_length (* name
)))
17683 verbatim
|= (*name
== '*');
17688 fputs (name
, stream
);
17690 asm_fprintf (stream
, "%U%s", name
);
17694 arm_file_start (void)
17698 if (TARGET_UNIFIED_ASM
)
17699 asm_fprintf (asm_out_file
, "\t.syntax unified\n");
17703 const char *fpu_name
;
17704 if (arm_select
[0].string
)
17705 asm_fprintf (asm_out_file
, "\t.cpu %s\n", arm_select
[0].string
);
17706 else if (arm_select
[1].string
)
17707 asm_fprintf (asm_out_file
, "\t.arch %s\n", arm_select
[1].string
);
17709 asm_fprintf (asm_out_file
, "\t.cpu %s\n",
17710 all_cores
[arm_default_cpu
].name
);
17712 if (TARGET_SOFT_FLOAT
)
17715 fpu_name
= "softvfp";
17717 fpu_name
= "softfpa";
17721 int set_float_abi_attributes
= 0;
17722 switch (arm_fpu_arch
)
17727 case FPUTYPE_FPA_EMU2
:
17730 case FPUTYPE_FPA_EMU3
:
17733 case FPUTYPE_MAVERICK
:
17734 fpu_name
= "maverick";
17738 set_float_abi_attributes
= 1;
17742 set_float_abi_attributes
= 1;
17746 set_float_abi_attributes
= 1;
17751 if (set_float_abi_attributes
)
17753 if (TARGET_HARD_FLOAT
)
17754 asm_fprintf (asm_out_file
, "\t.eabi_attribute 27, 3\n");
17755 if (TARGET_HARD_FLOAT_ABI
)
17756 asm_fprintf (asm_out_file
, "\t.eabi_attribute 28, 1\n");
17759 asm_fprintf (asm_out_file
, "\t.fpu %s\n", fpu_name
);
17761 /* Some of these attributes only apply when the corresponding features
17762 are used. However we don't have any easy way of figuring this out.
17763 Conservatively record the setting that would have been used. */
17765 /* Tag_ABI_FP_rounding. */
17766 if (flag_rounding_math
)
17767 asm_fprintf (asm_out_file
, "\t.eabi_attribute 19, 1\n");
17768 if (!flag_unsafe_math_optimizations
)
17770 /* Tag_ABI_FP_denomal. */
17771 asm_fprintf (asm_out_file
, "\t.eabi_attribute 20, 1\n");
17772 /* Tag_ABI_FP_exceptions. */
17773 asm_fprintf (asm_out_file
, "\t.eabi_attribute 21, 1\n");
17775 /* Tag_ABI_FP_user_exceptions. */
17776 if (flag_signaling_nans
)
17777 asm_fprintf (asm_out_file
, "\t.eabi_attribute 22, 1\n");
17778 /* Tag_ABI_FP_number_model. */
17779 asm_fprintf (asm_out_file
, "\t.eabi_attribute 23, %d\n",
17780 flag_finite_math_only
? 1 : 3);
17782 /* Tag_ABI_align8_needed. */
17783 asm_fprintf (asm_out_file
, "\t.eabi_attribute 24, 1\n");
17784 /* Tag_ABI_align8_preserved. */
17785 asm_fprintf (asm_out_file
, "\t.eabi_attribute 25, 1\n");
17786 /* Tag_ABI_enum_size. */
17787 asm_fprintf (asm_out_file
, "\t.eabi_attribute 26, %d\n",
17788 flag_short_enums
? 1 : 2);
17790 /* Tag_ABI_optimization_goals. */
17793 else if (optimize
>= 2)
17799 asm_fprintf (asm_out_file
, "\t.eabi_attribute 30, %d\n", val
);
17801 if (arm_lang_output_object_attributes_hook
)
17802 arm_lang_output_object_attributes_hook();
17804 default_file_start();
17808 arm_file_end (void)
17812 if (NEED_INDICATE_EXEC_STACK
)
17813 /* Add .note.GNU-stack. */
17814 file_end_indicate_exec_stack ();
17816 if (! thumb_call_reg_needed
)
17819 switch_to_section (text_section
);
17820 asm_fprintf (asm_out_file
, "\t.code 16\n");
17821 ASM_OUTPUT_ALIGN (asm_out_file
, 1);
17823 for (regno
= 0; regno
< LR_REGNUM
; regno
++)
17825 rtx label
= thumb_call_via_label
[regno
];
17829 targetm
.asm_out
.internal_label (asm_out_file
, "L",
17830 CODE_LABEL_NUMBER (label
));
17831 asm_fprintf (asm_out_file
, "\tbx\t%r\n", regno
);
17837 /* Symbols in the text segment can be accessed without indirecting via the
17838 constant pool; it may take an extra binary operation, but this is still
17839 faster than indirecting via memory. Don't do this when not optimizing,
17840 since we won't be calculating al of the offsets necessary to do this
17844 arm_encode_section_info (tree decl
, rtx rtl
, int first
)
17846 if (optimize
> 0 && TREE_CONSTANT (decl
))
17847 SYMBOL_REF_FLAG (XEXP (rtl
, 0)) = 1;
17849 default_encode_section_info (decl
, rtl
, first
);
17851 #endif /* !ARM_PE */
17854 arm_internal_label (FILE *stream
, const char *prefix
, unsigned long labelno
)
17856 if (arm_ccfsm_state
== 3 && (unsigned) arm_target_label
== labelno
17857 && !strcmp (prefix
, "L"))
17859 arm_ccfsm_state
= 0;
17860 arm_target_insn
= NULL
;
17862 default_internal_label (stream
, prefix
, labelno
);
17865 /* Output code to add DELTA to the first argument, and then jump
17866 to FUNCTION. Used for C++ multiple inheritance. */
17868 arm_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
17869 HOST_WIDE_INT delta
,
17870 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED
,
17873 static int thunk_label
= 0;
17876 int mi_delta
= delta
;
17877 const char *const mi_op
= mi_delta
< 0 ? "sub" : "add";
17879 int this_regno
= (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
)
17882 mi_delta
= - mi_delta
;
17886 int labelno
= thunk_label
++;
17887 ASM_GENERATE_INTERNAL_LABEL (label
, "LTHUMBFUNC", labelno
);
17888 /* Thunks are entered in arm mode when avaiable. */
17889 if (TARGET_THUMB1_ONLY
)
17891 /* push r3 so we can use it as a temporary. */
17892 /* TODO: Omit this save if r3 is not used. */
17893 fputs ("\tpush {r3}\n", file
);
17894 fputs ("\tldr\tr3, ", file
);
17898 fputs ("\tldr\tr12, ", file
);
17900 assemble_name (file
, label
);
17901 fputc ('\n', file
);
17904 /* If we are generating PIC, the ldr instruction below loads
17905 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
17906 the address of the add + 8, so we have:
17908 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
17911 Note that we have "+ 1" because some versions of GNU ld
17912 don't set the low bit of the result for R_ARM_REL32
17913 relocations against thumb function symbols.
17914 On ARMv6M this is +4, not +8. */
17915 ASM_GENERATE_INTERNAL_LABEL (labelpc
, "LTHUNKPC", labelno
);
17916 assemble_name (file
, labelpc
);
17917 fputs (":\n", file
);
17918 if (TARGET_THUMB1_ONLY
)
17920 /* This is 2 insns after the start of the thunk, so we know it
17921 is 4-byte aligned. */
17922 fputs ("\tadd\tr3, pc, r3\n", file
);
17923 fputs ("\tmov r12, r3\n", file
);
17926 fputs ("\tadd\tr12, pc, r12\n", file
);
17928 else if (TARGET_THUMB1_ONLY
)
17929 fputs ("\tmov r12, r3\n", file
);
17931 if (TARGET_THUMB1_ONLY
)
17933 if (mi_delta
> 255)
17935 fputs ("\tldr\tr3, ", file
);
17936 assemble_name (file
, label
);
17937 fputs ("+4\n", file
);
17938 asm_fprintf (file
, "\t%s\t%r, %r, r3\n",
17939 mi_op
, this_regno
, this_regno
);
17941 else if (mi_delta
!= 0)
17943 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
17944 mi_op
, this_regno
, this_regno
,
17950 /* TODO: Use movw/movt for large constants when available. */
17951 while (mi_delta
!= 0)
17953 if ((mi_delta
& (3 << shift
)) == 0)
17957 asm_fprintf (file
, "\t%s\t%r, %r, #%d\n",
17958 mi_op
, this_regno
, this_regno
,
17959 mi_delta
& (0xff << shift
));
17960 mi_delta
&= ~(0xff << shift
);
17967 if (TARGET_THUMB1_ONLY
)
17968 fputs ("\tpop\t{r3}\n", file
);
17970 fprintf (file
, "\tbx\tr12\n");
17971 ASM_OUTPUT_ALIGN (file
, 2);
17972 assemble_name (file
, label
);
17973 fputs (":\n", file
);
17976 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
17977 rtx tem
= XEXP (DECL_RTL (function
), 0);
17978 tem
= gen_rtx_PLUS (GET_MODE (tem
), tem
, GEN_INT (-7));
17979 tem
= gen_rtx_MINUS (GET_MODE (tem
),
17981 gen_rtx_SYMBOL_REF (Pmode
,
17982 ggc_strdup (labelpc
)));
17983 assemble_integer (tem
, 4, BITS_PER_WORD
, 1);
17986 /* Output ".word .LTHUNKn". */
17987 assemble_integer (XEXP (DECL_RTL (function
), 0), 4, BITS_PER_WORD
, 1);
17989 if (TARGET_THUMB1_ONLY
&& mi_delta
> 255)
17990 assemble_integer (GEN_INT(mi_delta
), 4, BITS_PER_WORD
, 1);
17994 fputs ("\tb\t", file
);
17995 assemble_name (file
, XSTR (XEXP (DECL_RTL (function
), 0), 0));
17996 if (NEED_PLT_RELOC
)
17997 fputs ("(PLT)", file
);
17998 fputc ('\n', file
);
18003 arm_emit_vector_const (FILE *file
, rtx x
)
18006 const char * pattern
;
18008 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
18010 switch (GET_MODE (x
))
18012 case V2SImode
: pattern
= "%08x"; break;
18013 case V4HImode
: pattern
= "%04x"; break;
18014 case V8QImode
: pattern
= "%02x"; break;
18015 default: gcc_unreachable ();
18018 fprintf (file
, "0x");
18019 for (i
= CONST_VECTOR_NUNITS (x
); i
--;)
18023 element
= CONST_VECTOR_ELT (x
, i
);
18024 fprintf (file
, pattern
, INTVAL (element
));
18031 arm_output_load_gr (rtx
*operands
)
18038 if (GET_CODE (operands
[1]) != MEM
18039 || GET_CODE (sum
= XEXP (operands
[1], 0)) != PLUS
18040 || GET_CODE (reg
= XEXP (sum
, 0)) != REG
18041 || GET_CODE (offset
= XEXP (sum
, 1)) != CONST_INT
18042 || ((INTVAL (offset
) < 1024) && (INTVAL (offset
) > -1024)))
18043 return "wldrw%?\t%0, %1";
18045 /* Fix up an out-of-range load of a GR register. */
18046 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg
);
18047 wcgr
= operands
[0];
18049 output_asm_insn ("ldr%?\t%0, %1", operands
);
18051 operands
[0] = wcgr
;
18053 output_asm_insn ("tmcr%?\t%0, %1", operands
);
18054 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg
);
18059 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
18061 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
18062 named arg and all anonymous args onto the stack.
18063 XXX I know the prologue shouldn't be pushing registers, but it is faster
18067 arm_setup_incoming_varargs (CUMULATIVE_ARGS
*cum
,
18068 enum machine_mode mode
,
18071 int second_time ATTRIBUTE_UNUSED
)
18073 int nregs
= cum
->nregs
;
18075 && ARM_DOUBLEWORD_ALIGN
18076 && arm_needs_doubleword_align (mode
, type
))
18079 cfun
->machine
->uses_anonymous_args
= 1;
18080 if (nregs
< NUM_ARG_REGS
)
18081 *pretend_size
= (NUM_ARG_REGS
- nregs
) * UNITS_PER_WORD
;
18084 /* Return nonzero if the CONSUMER instruction (a store) does not need
18085 PRODUCER's value to calculate the address. */
18088 arm_no_early_store_addr_dep (rtx producer
, rtx consumer
)
18090 rtx value
= PATTERN (producer
);
18091 rtx addr
= PATTERN (consumer
);
18093 if (GET_CODE (value
) == COND_EXEC
)
18094 value
= COND_EXEC_CODE (value
);
18095 if (GET_CODE (value
) == PARALLEL
)
18096 value
= XVECEXP (value
, 0, 0);
18097 value
= XEXP (value
, 0);
18098 if (GET_CODE (addr
) == COND_EXEC
)
18099 addr
= COND_EXEC_CODE (addr
);
18100 if (GET_CODE (addr
) == PARALLEL
)
18101 addr
= XVECEXP (addr
, 0, 0);
18102 addr
= XEXP (addr
, 0);
18104 return !reg_overlap_mentioned_p (value
, addr
);
18107 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18108 have an early register shift value or amount dependency on the
18109 result of PRODUCER. */
18112 arm_no_early_alu_shift_dep (rtx producer
, rtx consumer
)
18114 rtx value
= PATTERN (producer
);
18115 rtx op
= PATTERN (consumer
);
18118 if (GET_CODE (value
) == COND_EXEC
)
18119 value
= COND_EXEC_CODE (value
);
18120 if (GET_CODE (value
) == PARALLEL
)
18121 value
= XVECEXP (value
, 0, 0);
18122 value
= XEXP (value
, 0);
18123 if (GET_CODE (op
) == COND_EXEC
)
18124 op
= COND_EXEC_CODE (op
);
18125 if (GET_CODE (op
) == PARALLEL
)
18126 op
= XVECEXP (op
, 0, 0);
18129 early_op
= XEXP (op
, 0);
18130 /* This is either an actual independent shift, or a shift applied to
18131 the first operand of another operation. We want the whole shift
18133 if (GET_CODE (early_op
) == REG
)
18136 return !reg_overlap_mentioned_p (value
, early_op
);
18139 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18140 have an early register shift value dependency on the result of
18144 arm_no_early_alu_shift_value_dep (rtx producer
, rtx consumer
)
18146 rtx value
= PATTERN (producer
);
18147 rtx op
= PATTERN (consumer
);
18150 if (GET_CODE (value
) == COND_EXEC
)
18151 value
= COND_EXEC_CODE (value
);
18152 if (GET_CODE (value
) == PARALLEL
)
18153 value
= XVECEXP (value
, 0, 0);
18154 value
= XEXP (value
, 0);
18155 if (GET_CODE (op
) == COND_EXEC
)
18156 op
= COND_EXEC_CODE (op
);
18157 if (GET_CODE (op
) == PARALLEL
)
18158 op
= XVECEXP (op
, 0, 0);
18161 early_op
= XEXP (op
, 0);
18163 /* This is either an actual independent shift, or a shift applied to
18164 the first operand of another operation. We want the value being
18165 shifted, in either case. */
18166 if (GET_CODE (early_op
) != REG
)
18167 early_op
= XEXP (early_op
, 0);
18169 return !reg_overlap_mentioned_p (value
, early_op
);
18172 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18173 have an early register mult dependency on the result of
18177 arm_no_early_mul_dep (rtx producer
, rtx consumer
)
18179 rtx value
= PATTERN (producer
);
18180 rtx op
= PATTERN (consumer
);
18182 if (GET_CODE (value
) == COND_EXEC
)
18183 value
= COND_EXEC_CODE (value
);
18184 if (GET_CODE (value
) == PARALLEL
)
18185 value
= XVECEXP (value
, 0, 0);
18186 value
= XEXP (value
, 0);
18187 if (GET_CODE (op
) == COND_EXEC
)
18188 op
= COND_EXEC_CODE (op
);
18189 if (GET_CODE (op
) == PARALLEL
)
18190 op
= XVECEXP (op
, 0, 0);
18193 return (GET_CODE (op
) == PLUS
18194 && !reg_overlap_mentioned_p (value
, XEXP (op
, 0)));
18197 /* We can't rely on the caller doing the proper promotion when
18198 using APCS or ATPCS. */
18201 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED
)
18203 return !TARGET_AAPCS_BASED
;
18207 /* AAPCS based ABIs use short enums by default. */
18210 arm_default_short_enums (void)
18212 return TARGET_AAPCS_BASED
&& arm_abi
!= ARM_ABI_AAPCS_LINUX
;
18216 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18219 arm_align_anon_bitfield (void)
18221 return TARGET_AAPCS_BASED
;
18225 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18228 arm_cxx_guard_type (void)
18230 return TARGET_AAPCS_BASED
? integer_type_node
: long_long_integer_type_node
;
18233 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18234 has an accumulator dependency on the result of the producer (a
18235 multiplication instruction) and no other dependency on that result. */
18237 arm_mac_accumulator_is_mul_result (rtx producer
, rtx consumer
)
18239 rtx mul
= PATTERN (producer
);
18240 rtx mac
= PATTERN (consumer
);
18242 rtx mac_op0
, mac_op1
, mac_acc
;
18244 if (GET_CODE (mul
) == COND_EXEC
)
18245 mul
= COND_EXEC_CODE (mul
);
18246 if (GET_CODE (mac
) == COND_EXEC
)
18247 mac
= COND_EXEC_CODE (mac
);
18249 /* Check that mul is of the form (set (...) (mult ...))
18250 and mla is of the form (set (...) (plus (mult ...) (...))). */
18251 if ((GET_CODE (mul
) != SET
|| GET_CODE (XEXP (mul
, 1)) != MULT
)
18252 || (GET_CODE (mac
) != SET
|| GET_CODE (XEXP (mac
, 1)) != PLUS
18253 || GET_CODE (XEXP (XEXP (mac
, 1), 0)) != MULT
))
18256 mul_result
= XEXP (mul
, 0);
18257 mac_op0
= XEXP (XEXP (XEXP (mac
, 1), 0), 0);
18258 mac_op1
= XEXP (XEXP (XEXP (mac
, 1), 0), 1);
18259 mac_acc
= XEXP (XEXP (mac
, 1), 1);
18261 return (reg_overlap_mentioned_p (mul_result
, mac_acc
)
18262 && !reg_overlap_mentioned_p (mul_result
, mac_op0
)
18263 && !reg_overlap_mentioned_p (mul_result
, mac_op1
));
18267 /* The EABI says test the least significant bit of a guard variable. */
18270 arm_cxx_guard_mask_bit (void)
18272 return TARGET_AAPCS_BASED
;
18276 /* The EABI specifies that all array cookies are 8 bytes long. */
18279 arm_get_cookie_size (tree type
)
18283 if (!TARGET_AAPCS_BASED
)
18284 return default_cxx_get_cookie_size (type
);
18286 size
= build_int_cst (sizetype
, 8);
18291 /* The EABI says that array cookies should also contain the element size. */
18294 arm_cookie_has_size (void)
18296 return TARGET_AAPCS_BASED
;
18300 /* The EABI says constructors and destructors should return a pointer to
18301 the object constructed/destroyed. */
18304 arm_cxx_cdtor_returns_this (void)
18306 return TARGET_AAPCS_BASED
;
18309 /* The EABI says that an inline function may never be the key
18313 arm_cxx_key_method_may_be_inline (void)
18315 return !TARGET_AAPCS_BASED
;
18319 arm_cxx_determine_class_data_visibility (tree decl
)
18321 if (!TARGET_AAPCS_BASED
18322 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES
)
18325 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18326 is exported. However, on systems without dynamic vague linkage,
18327 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18328 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P
&& DECL_COMDAT (decl
))
18329 DECL_VISIBILITY (decl
) = VISIBILITY_HIDDEN
;
18331 DECL_VISIBILITY (decl
) = VISIBILITY_DEFAULT
;
18332 DECL_VISIBILITY_SPECIFIED (decl
) = 1;
18336 arm_cxx_class_data_always_comdat (void)
18338 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18339 vague linkage if the class has no key function. */
18340 return !TARGET_AAPCS_BASED
;
18344 /* The EABI says __aeabi_atexit should be used to register static
18348 arm_cxx_use_aeabi_atexit (void)
18350 return TARGET_AAPCS_BASED
;
18355 arm_set_return_address (rtx source
, rtx scratch
)
18357 arm_stack_offsets
*offsets
;
18358 HOST_WIDE_INT delta
;
18360 unsigned long saved_regs
;
18362 offsets
= arm_get_frame_offsets ();
18363 saved_regs
= offsets
->saved_regs_mask
;
18365 if ((saved_regs
& (1 << LR_REGNUM
)) == 0)
18366 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
18369 if (frame_pointer_needed
)
18370 addr
= plus_constant(hard_frame_pointer_rtx
, -4);
18373 /* LR will be the first saved register. */
18374 delta
= offsets
->outgoing_args
- (offsets
->frame
+ 4);
18379 emit_insn (gen_addsi3 (scratch
, stack_pointer_rtx
,
18380 GEN_INT (delta
& ~4095)));
18385 addr
= stack_pointer_rtx
;
18387 addr
= plus_constant (addr
, delta
);
18389 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
18395 thumb_set_return_address (rtx source
, rtx scratch
)
18397 arm_stack_offsets
*offsets
;
18398 HOST_WIDE_INT delta
;
18399 HOST_WIDE_INT limit
;
18402 unsigned long mask
;
18406 offsets
= arm_get_frame_offsets ();
18407 mask
= offsets
->saved_regs_mask
;
18408 if (mask
& (1 << LR_REGNUM
))
18411 /* Find the saved regs. */
18412 if (frame_pointer_needed
)
18414 delta
= offsets
->soft_frame
- offsets
->saved_args
;
18415 reg
= THUMB_HARD_FRAME_POINTER_REGNUM
;
18421 delta
= offsets
->outgoing_args
- offsets
->saved_args
;
18424 /* Allow for the stack frame. */
18425 if (TARGET_THUMB1
&& TARGET_BACKTRACE
)
18427 /* The link register is always the first saved register. */
18430 /* Construct the address. */
18431 addr
= gen_rtx_REG (SImode
, reg
);
18434 emit_insn (gen_movsi (scratch
, GEN_INT (delta
)));
18435 emit_insn (gen_addsi3 (scratch
, scratch
, stack_pointer_rtx
));
18439 addr
= plus_constant (addr
, delta
);
18441 emit_move_insn (gen_frame_mem (Pmode
, addr
), source
);
18444 emit_move_insn (gen_rtx_REG (Pmode
, LR_REGNUM
), source
);
18447 /* Implements target hook vector_mode_supported_p. */
18449 arm_vector_mode_supported_p (enum machine_mode mode
)
18451 /* Neon also supports V2SImode, etc. listed in the clause below. */
18452 if (TARGET_NEON
&& (mode
== V2SFmode
|| mode
== V4SImode
|| mode
== V8HImode
18453 || mode
== V16QImode
|| mode
== V4SFmode
|| mode
== V2DImode
))
18456 if ((mode
== V2SImode
)
18457 || (mode
== V4HImode
)
18458 || (mode
== V8QImode
))
18464 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18465 ARM insns and therefore guarantee that the shift count is modulo 256.
18466 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18467 guarantee no particular behavior for out-of-range counts. */
18469 static unsigned HOST_WIDE_INT
18470 arm_shift_truncation_mask (enum machine_mode mode
)
18472 return mode
== SImode
? 255 : 0;
18476 /* Map internal gcc register numbers to DWARF2 register numbers. */
18479 arm_dbx_register_number (unsigned int regno
)
18484 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18485 compatibility. The EABI defines them as registers 96-103. */
18486 if (IS_FPA_REGNUM (regno
))
18487 return (TARGET_AAPCS_BASED
? 96 : 16) + regno
- FIRST_FPA_REGNUM
;
18489 /* FIXME: VFPv3 register numbering. */
18490 if (IS_VFP_REGNUM (regno
))
18491 return 64 + regno
- FIRST_VFP_REGNUM
;
18493 if (IS_IWMMXT_GR_REGNUM (regno
))
18494 return 104 + regno
- FIRST_IWMMXT_GR_REGNUM
;
18496 if (IS_IWMMXT_REGNUM (regno
))
18497 return 112 + regno
- FIRST_IWMMXT_REGNUM
;
18499 gcc_unreachable ();
18503 #ifdef TARGET_UNWIND_INFO
18504 /* Emit unwind directives for a store-multiple instruction or stack pointer
18505 push during alignment.
18506 These should only ever be generated by the function prologue code, so
18507 expect them to have a particular form. */
18510 arm_unwind_emit_sequence (FILE * asm_out_file
, rtx p
)
18513 HOST_WIDE_INT offset
;
18514 HOST_WIDE_INT nregs
;
18520 e
= XVECEXP (p
, 0, 0);
18521 if (GET_CODE (e
) != SET
)
18524 /* First insn will adjust the stack pointer. */
18525 if (GET_CODE (e
) != SET
18526 || GET_CODE (XEXP (e
, 0)) != REG
18527 || REGNO (XEXP (e
, 0)) != SP_REGNUM
18528 || GET_CODE (XEXP (e
, 1)) != PLUS
)
18531 offset
= -INTVAL (XEXP (XEXP (e
, 1), 1));
18532 nregs
= XVECLEN (p
, 0) - 1;
18534 reg
= REGNO (XEXP (XVECEXP (p
, 0, 1), 1));
18537 /* The function prologue may also push pc, but not annotate it as it is
18538 never restored. We turn this into a stack pointer adjustment. */
18539 if (nregs
* 4 == offset
- 4)
18541 fprintf (asm_out_file
, "\t.pad #4\n");
18545 fprintf (asm_out_file
, "\t.save {");
18547 else if (IS_VFP_REGNUM (reg
))
18550 fprintf (asm_out_file
, "\t.vsave {");
18552 else if (reg
>= FIRST_FPA_REGNUM
&& reg
<= LAST_FPA_REGNUM
)
18554 /* FPA registers are done differently. */
18555 asm_fprintf (asm_out_file
, "\t.save %r, %wd\n", reg
, nregs
);
18559 /* Unknown register type. */
18562 /* If the stack increment doesn't match the size of the saved registers,
18563 something has gone horribly wrong. */
18564 if (offset
!= nregs
* reg_size
)
18569 /* The remaining insns will describe the stores. */
18570 for (i
= 1; i
<= nregs
; i
++)
18572 /* Expect (set (mem <addr>) (reg)).
18573 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18574 e
= XVECEXP (p
, 0, i
);
18575 if (GET_CODE (e
) != SET
18576 || GET_CODE (XEXP (e
, 0)) != MEM
18577 || GET_CODE (XEXP (e
, 1)) != REG
)
18580 reg
= REGNO (XEXP (e
, 1));
18585 fprintf (asm_out_file
, ", ");
18586 /* We can't use %r for vfp because we need to use the
18587 double precision register names. */
18588 if (IS_VFP_REGNUM (reg
))
18589 asm_fprintf (asm_out_file
, "d%d", (reg
- FIRST_VFP_REGNUM
) / 2);
18591 asm_fprintf (asm_out_file
, "%r", reg
);
18593 #ifdef ENABLE_CHECKING
18594 /* Check that the addresses are consecutive. */
18595 e
= XEXP (XEXP (e
, 0), 0);
18596 if (GET_CODE (e
) == PLUS
)
18598 offset
+= reg_size
;
18599 if (GET_CODE (XEXP (e
, 0)) != REG
18600 || REGNO (XEXP (e
, 0)) != SP_REGNUM
18601 || GET_CODE (XEXP (e
, 1)) != CONST_INT
18602 || offset
!= INTVAL (XEXP (e
, 1)))
18606 || GET_CODE (e
) != REG
18607 || REGNO (e
) != SP_REGNUM
)
18611 fprintf (asm_out_file
, "}\n");
18614 /* Emit unwind directives for a SET. */
18617 arm_unwind_emit_set (FILE * asm_out_file
, rtx p
)
18625 switch (GET_CODE (e0
))
18628 /* Pushing a single register. */
18629 if (GET_CODE (XEXP (e0
, 0)) != PRE_DEC
18630 || GET_CODE (XEXP (XEXP (e0
, 0), 0)) != REG
18631 || REGNO (XEXP (XEXP (e0
, 0), 0)) != SP_REGNUM
)
18634 asm_fprintf (asm_out_file
, "\t.save ");
18635 if (IS_VFP_REGNUM (REGNO (e1
)))
18636 asm_fprintf(asm_out_file
, "{d%d}\n",
18637 (REGNO (e1
) - FIRST_VFP_REGNUM
) / 2);
18639 asm_fprintf(asm_out_file
, "{%r}\n", REGNO (e1
));
18643 if (REGNO (e0
) == SP_REGNUM
)
18645 /* A stack increment. */
18646 if (GET_CODE (e1
) != PLUS
18647 || GET_CODE (XEXP (e1
, 0)) != REG
18648 || REGNO (XEXP (e1
, 0)) != SP_REGNUM
18649 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
18652 asm_fprintf (asm_out_file
, "\t.pad #%wd\n",
18653 -INTVAL (XEXP (e1
, 1)));
18655 else if (REGNO (e0
) == HARD_FRAME_POINTER_REGNUM
)
18657 HOST_WIDE_INT offset
;
18659 if (GET_CODE (e1
) == PLUS
)
18661 if (GET_CODE (XEXP (e1
, 0)) != REG
18662 || GET_CODE (XEXP (e1
, 1)) != CONST_INT
)
18664 reg
= REGNO (XEXP (e1
, 0));
18665 offset
= INTVAL (XEXP (e1
, 1));
18666 asm_fprintf (asm_out_file
, "\t.setfp %r, %r, #%wd\n",
18667 HARD_FRAME_POINTER_REGNUM
, reg
,
18668 INTVAL (XEXP (e1
, 1)));
18670 else if (GET_CODE (e1
) == REG
)
18673 asm_fprintf (asm_out_file
, "\t.setfp %r, %r\n",
18674 HARD_FRAME_POINTER_REGNUM
, reg
);
18679 else if (GET_CODE (e1
) == REG
&& REGNO (e1
) == SP_REGNUM
)
18681 /* Move from sp to reg. */
18682 asm_fprintf (asm_out_file
, "\t.movsp %r\n", REGNO (e0
));
18684 else if (GET_CODE (e1
) == PLUS
18685 && GET_CODE (XEXP (e1
, 0)) == REG
18686 && REGNO (XEXP (e1
, 0)) == SP_REGNUM
18687 && GET_CODE (XEXP (e1
, 1)) == CONST_INT
)
18689 /* Set reg to offset from sp. */
18690 asm_fprintf (asm_out_file
, "\t.movsp %r, #%d\n",
18691 REGNO (e0
), (int)INTVAL(XEXP (e1
, 1)));
18693 else if (GET_CODE (e1
) == UNSPEC
&& XINT (e1
, 1) == UNSPEC_STACK_ALIGN
)
18695 /* Stack pointer save before alignment. */
18697 asm_fprintf (asm_out_file
, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18710 /* Emit unwind directives for the given insn. */
18713 arm_unwind_emit (FILE * asm_out_file
, rtx insn
)
18717 if (!ARM_EABI_UNWIND_TABLES
)
18720 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
18721 && (TREE_NOTHROW (current_function_decl
)
18722 || crtl
->all_throwers_are_sibcalls
))
18725 if (GET_CODE (insn
) == NOTE
|| !RTX_FRAME_RELATED_P (insn
))
18728 pat
= find_reg_note (insn
, REG_FRAME_RELATED_EXPR
, NULL_RTX
);
18730 pat
= XEXP (pat
, 0);
18732 pat
= PATTERN (insn
);
18734 switch (GET_CODE (pat
))
18737 arm_unwind_emit_set (asm_out_file
, pat
);
18741 /* Store multiple. */
18742 arm_unwind_emit_sequence (asm_out_file
, pat
);
18751 /* Output a reference from a function exception table to the type_info
18752 object X. The EABI specifies that the symbol should be relocated by
18753 an R_ARM_TARGET2 relocation. */
18756 arm_output_ttype (rtx x
)
18758 fputs ("\t.word\t", asm_out_file
);
18759 output_addr_const (asm_out_file
, x
);
18760 /* Use special relocations for symbol references. */
18761 if (GET_CODE (x
) != CONST_INT
)
18762 fputs ("(TARGET2)", asm_out_file
);
18763 fputc ('\n', asm_out_file
);
18767 #endif /* TARGET_UNWIND_INFO */
18770 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18771 stack alignment. */
18774 arm_dwarf_handle_frame_unspec (const char *label
, rtx pattern
, int index
)
18776 rtx unspec
= SET_SRC (pattern
);
18777 gcc_assert (GET_CODE (unspec
) == UNSPEC
);
18781 case UNSPEC_STACK_ALIGN
:
18782 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18783 put anything on the stack, so hopefully it won't matter.
18784 CFA = SP will be correct after alignment. */
18785 dwarf2out_reg_save_reg (label
, stack_pointer_rtx
,
18786 SET_DEST (pattern
));
18789 gcc_unreachable ();
18794 /* Output unwind directives for the start/end of a function. */
18797 arm_output_fn_unwind (FILE * f
, bool prologue
)
18799 if (!ARM_EABI_UNWIND_TABLES
)
18803 fputs ("\t.fnstart\n", f
);
18806 /* If this function will never be unwound, then mark it as such.
18807 The came condition is used in arm_unwind_emit to suppress
18808 the frame annotations. */
18809 if (!(flag_unwind_tables
|| crtl
->uses_eh_lsda
)
18810 && (TREE_NOTHROW (current_function_decl
)
18811 || crtl
->all_throwers_are_sibcalls
))
18812 fputs("\t.cantunwind\n", f
);
18814 fputs ("\t.fnend\n", f
);
18819 arm_emit_tls_decoration (FILE *fp
, rtx x
)
18821 enum tls_reloc reloc
;
18824 val
= XVECEXP (x
, 0, 0);
18825 reloc
= INTVAL (XVECEXP (x
, 0, 1));
18827 output_addr_const (fp
, val
);
18832 fputs ("(tlsgd)", fp
);
18835 fputs ("(tlsldm)", fp
);
18838 fputs ("(tlsldo)", fp
);
18841 fputs ("(gottpoff)", fp
);
18844 fputs ("(tpoff)", fp
);
18847 gcc_unreachable ();
18855 fputs (" + (. - ", fp
);
18856 output_addr_const (fp
, XVECEXP (x
, 0, 2));
18858 output_addr_const (fp
, XVECEXP (x
, 0, 3));
18868 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18871 arm_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
18873 gcc_assert (size
== 4);
18874 fputs ("\t.word\t", file
);
18875 output_addr_const (file
, x
);
18876 fputs ("(tlsldo)", file
);
18880 arm_output_addr_const_extra (FILE *fp
, rtx x
)
18882 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
18883 return arm_emit_tls_decoration (fp
, x
);
18884 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_PIC_LABEL
)
18887 int labelno
= INTVAL (XVECEXP (x
, 0, 0));
18889 ASM_GENERATE_INTERNAL_LABEL (label
, "LPIC", labelno
);
18890 assemble_name_raw (fp
, label
);
18894 else if (GET_CODE (x
) == CONST_VECTOR
)
18895 return arm_emit_vector_const (fp
, x
);
18900 /* Output assembly for a shift instruction.
18901 SET_FLAGS determines how the instruction modifies the condition codes.
18902 0 - Do not set condition codes.
18903 1 - Set condition codes.
18904 2 - Use smallest instruction. */
18906 arm_output_shift(rtx
* operands
, int set_flags
)
18909 static const char flag_chars
[3] = {'?', '.', '!'};
18914 c
= flag_chars
[set_flags
];
18915 if (TARGET_UNIFIED_ASM
)
18917 shift
= shift_op(operands
[3], &val
);
18921 operands
[2] = GEN_INT(val
);
18922 sprintf (pattern
, "%s%%%c\t%%0, %%1, %%2", shift
, c
);
18925 sprintf (pattern
, "mov%%%c\t%%0, %%1", c
);
18928 sprintf (pattern
, "mov%%%c\t%%0, %%1%%S3", c
);
18929 output_asm_insn (pattern
, operands
);
18933 /* Output a Thumb-2 casesi instruction. */
18935 thumb2_output_casesi (rtx
*operands
)
18937 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
18939 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
18941 output_asm_insn ("cmp\t%0, %1", operands
);
18942 output_asm_insn ("bhi\t%l3", operands
);
18943 switch (GET_MODE(diff_vec
))
18946 return "tbb\t[%|pc, %0]";
18948 return "tbh\t[%|pc, %0, lsl #1]";
18952 output_asm_insn ("adr\t%4, %l2", operands
);
18953 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands
);
18954 output_asm_insn ("add\t%4, %4, %5", operands
);
18959 output_asm_insn ("adr\t%4, %l2", operands
);
18960 return "ldr\t%|pc, [%4, %0, lsl #2]";
18963 gcc_unreachable ();
18967 /* Most ARM cores are single issue, but some newer ones can dual issue.
18968 The scheduler descriptions rely on this being correct. */
18970 arm_issue_rate (void)
18983 /* A table and a function to perform ARM-specific name mangling for
18984 NEON vector types in order to conform to the AAPCS (see "Procedure
18985 Call Standard for the ARM Architecture", Appendix A). To qualify
18986 for emission with the mangled names defined in that document, a
18987 vector type must not only be of the correct mode but also be
18988 composed of NEON vector element types (e.g. __builtin_neon_qi). */
18991 enum machine_mode mode
;
18992 const char *element_type_name
;
18993 const char *aapcs_name
;
18994 } arm_mangle_map_entry
;
18996 static arm_mangle_map_entry arm_mangle_map
[] = {
18997 /* 64-bit containerized types. */
18998 { V8QImode
, "__builtin_neon_qi", "15__simd64_int8_t" },
18999 { V8QImode
, "__builtin_neon_uqi", "16__simd64_uint8_t" },
19000 { V4HImode
, "__builtin_neon_hi", "16__simd64_int16_t" },
19001 { V4HImode
, "__builtin_neon_uhi", "17__simd64_uint16_t" },
19002 { V2SImode
, "__builtin_neon_si", "16__simd64_int32_t" },
19003 { V2SImode
, "__builtin_neon_usi", "17__simd64_uint32_t" },
19004 { V2SFmode
, "__builtin_neon_sf", "18__simd64_float32_t" },
19005 { V8QImode
, "__builtin_neon_poly8", "16__simd64_poly8_t" },
19006 { V4HImode
, "__builtin_neon_poly16", "17__simd64_poly16_t" },
19007 /* 128-bit containerized types. */
19008 { V16QImode
, "__builtin_neon_qi", "16__simd128_int8_t" },
19009 { V16QImode
, "__builtin_neon_uqi", "17__simd128_uint8_t" },
19010 { V8HImode
, "__builtin_neon_hi", "17__simd128_int16_t" },
19011 { V8HImode
, "__builtin_neon_uhi", "18__simd128_uint16_t" },
19012 { V4SImode
, "__builtin_neon_si", "17__simd128_int32_t" },
19013 { V4SImode
, "__builtin_neon_usi", "18__simd128_uint32_t" },
19014 { V4SFmode
, "__builtin_neon_sf", "19__simd128_float32_t" },
19015 { V16QImode
, "__builtin_neon_poly8", "17__simd128_poly8_t" },
19016 { V8HImode
, "__builtin_neon_poly16", "18__simd128_poly16_t" },
19017 { VOIDmode
, NULL
, NULL
}
19021 arm_mangle_type (const_tree type
)
19023 arm_mangle_map_entry
*pos
= arm_mangle_map
;
19025 if (TREE_CODE (type
) != VECTOR_TYPE
)
19028 /* Check the mode of the vector type, and the name of the vector
19029 element type, against the table. */
19030 while (pos
->mode
!= VOIDmode
)
19032 tree elt_type
= TREE_TYPE (type
);
19034 if (pos
->mode
== TYPE_MODE (type
)
19035 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
19036 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
19037 pos
->element_type_name
))
19038 return pos
->aapcs_name
;
19043 /* Use the default mangling for unrecognized (possibly user-defined)
19048 #include "gt-arm.h"