rtl.h (emit_clobber, [...]): Declare.
[official-gcc.git] / gcc / config / arm / arm.c
blobe1ec23b3862c13608420ab4d51c60a79e9dc29bc
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 3, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "obstack.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "real.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "recog.h"
45 #include "ggc.h"
46 #include "except.h"
47 #include "c-pragma.h"
48 #include "integrate.h"
49 #include "tm_p.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "debug.h"
53 #include "langhooks.h"
54 #include "df.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 void (*arm_lang_output_object_attributes_hook)(void);
64 /* Forward function declarations. */
65 static arm_stack_offsets *arm_get_frame_offsets (void);
66 static void arm_add_gc_roots (void);
67 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
68 HOST_WIDE_INT, rtx, rtx, int, int);
69 static unsigned bit_count (unsigned long);
70 static int arm_address_register_rtx_p (rtx, int);
71 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
72 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
73 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
74 inline static int thumb1_index_register_rtx_p (rtx, int);
75 static int thumb_far_jump_used_p (void);
76 static bool thumb_force_lr_save (void);
77 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
78 static rtx emit_sfm (int, int);
79 static unsigned arm_size_return_regs (void);
80 static bool arm_assemble_integer (rtx, unsigned int, int);
81 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
82 static arm_cc get_arm_condition_code (rtx);
83 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
84 static rtx is_jump_table (rtx);
85 static const char *output_multi_immediate (rtx *, const char *, const char *,
86 int, HOST_WIDE_INT);
87 static const char *shift_op (rtx, HOST_WIDE_INT *);
88 static struct machine_function *arm_init_machine_status (void);
89 static void thumb_exit (FILE *, int);
90 static rtx is_jump_table (rtx);
91 static HOST_WIDE_INT get_jump_table_size (rtx);
92 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
93 static Mnode *add_minipool_forward_ref (Mfix *);
94 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
95 static Mnode *add_minipool_backward_ref (Mfix *);
96 static void assign_minipool_offsets (Mfix *);
97 static void arm_print_value (FILE *, rtx);
98 static void dump_minipool (rtx);
99 static int arm_barrier_cost (rtx);
100 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
101 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
102 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
103 rtx);
104 static void arm_reorg (void);
105 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
106 static unsigned long arm_compute_save_reg0_reg12_mask (void);
107 static unsigned long arm_compute_save_reg_mask (void);
108 static unsigned long arm_isr_value (tree);
109 static unsigned long arm_compute_func_type (void);
110 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
111 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
112 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
113 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
114 #endif
115 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
116 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
117 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static int arm_comp_type_attributes (const_tree, const_tree);
119 static void arm_set_default_type_attributes (tree);
120 static int arm_adjust_cost (rtx, rtx, rtx, int);
121 static int count_insns_for_constant (HOST_WIDE_INT, int);
122 static int arm_get_strip_length (int);
123 static bool arm_function_ok_for_sibcall (tree, tree);
124 static void arm_internal_label (FILE *, const char *, unsigned long);
125 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
126 tree);
127 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
128 static bool arm_size_rtx_costs (rtx, int, int, int *);
129 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
130 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
131 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
132 static bool arm_9e_rtx_costs (rtx, int, int, int *);
133 static int arm_address_cost (rtx);
134 static bool arm_memory_load_p (rtx);
135 static bool arm_cirrus_insn_p (rtx);
136 static void cirrus_reorg (rtx);
137 static void arm_init_builtins (void);
138 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
139 static void arm_init_iwmmxt_builtins (void);
140 static rtx safe_vector_operand (rtx, enum machine_mode);
141 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
142 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
143 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
144 static void emit_constant_insn (rtx cond, rtx pattern);
145 static rtx emit_set_insn (rtx, rtx);
146 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
147 tree, bool);
149 #ifdef OBJECT_FORMAT_ELF
150 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
151 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
152 #endif
153 #ifndef ARM_PE
154 static void arm_encode_section_info (tree, rtx, int);
155 #endif
157 static void arm_file_end (void);
158 static void arm_file_start (void);
160 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
161 tree, int *, int);
162 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
163 enum machine_mode, const_tree, bool);
164 static bool arm_promote_prototypes (const_tree);
165 static bool arm_default_short_enums (void);
166 static bool arm_align_anon_bitfield (void);
167 static bool arm_return_in_msb (const_tree);
168 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
169 #ifdef TARGET_UNWIND_INFO
170 static void arm_unwind_emit (FILE *, rtx);
171 static bool arm_output_ttype (rtx);
172 #endif
173 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
175 static tree arm_cxx_guard_type (void);
176 static bool arm_cxx_guard_mask_bit (void);
177 static tree arm_get_cookie_size (tree);
178 static bool arm_cookie_has_size (void);
179 static bool arm_cxx_cdtor_returns_this (void);
180 static bool arm_cxx_key_method_may_be_inline (void);
181 static void arm_cxx_determine_class_data_visibility (tree);
182 static bool arm_cxx_class_data_always_comdat (void);
183 static bool arm_cxx_use_aeabi_atexit (void);
184 static void arm_init_libfuncs (void);
185 static bool arm_handle_option (size_t, const char *, int);
186 static void arm_target_help (void);
187 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
188 static bool arm_cannot_copy_insn_p (rtx);
189 static bool arm_tls_symbol_p (rtx x);
190 static int arm_issue_rate (void);
191 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
192 static bool arm_allocate_stack_slots_for_args (void);
195 /* Initialize the GCC target structure. */
196 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
197 #undef TARGET_MERGE_DECL_ATTRIBUTES
198 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
199 #endif
201 #undef TARGET_ATTRIBUTE_TABLE
202 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
204 #undef TARGET_ASM_FILE_START
205 #define TARGET_ASM_FILE_START arm_file_start
206 #undef TARGET_ASM_FILE_END
207 #define TARGET_ASM_FILE_END arm_file_end
209 #undef TARGET_ASM_ALIGNED_SI_OP
210 #define TARGET_ASM_ALIGNED_SI_OP NULL
211 #undef TARGET_ASM_INTEGER
212 #define TARGET_ASM_INTEGER arm_assemble_integer
214 #undef TARGET_ASM_FUNCTION_PROLOGUE
215 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
217 #undef TARGET_ASM_FUNCTION_EPILOGUE
218 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
220 #undef TARGET_DEFAULT_TARGET_FLAGS
221 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
222 #undef TARGET_HANDLE_OPTION
223 #define TARGET_HANDLE_OPTION arm_handle_option
224 #undef TARGET_HELP
225 #define TARGET_HELP arm_target_help
227 #undef TARGET_COMP_TYPE_ATTRIBUTES
228 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
230 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
231 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
233 #undef TARGET_SCHED_ADJUST_COST
234 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
236 #undef TARGET_ENCODE_SECTION_INFO
237 #ifdef ARM_PE
238 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
239 #else
240 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
241 #endif
243 #undef TARGET_STRIP_NAME_ENCODING
244 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
246 #undef TARGET_ASM_INTERNAL_LABEL
247 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
249 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
250 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
252 #undef TARGET_ASM_OUTPUT_MI_THUNK
253 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
254 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
255 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
257 /* This will be overridden in arm_override_options. */
258 #undef TARGET_RTX_COSTS
259 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
260 #undef TARGET_ADDRESS_COST
261 #define TARGET_ADDRESS_COST arm_address_cost
263 #undef TARGET_SHIFT_TRUNCATION_MASK
264 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
265 #undef TARGET_VECTOR_MODE_SUPPORTED_P
266 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
268 #undef TARGET_MACHINE_DEPENDENT_REORG
269 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
271 #undef TARGET_INIT_BUILTINS
272 #define TARGET_INIT_BUILTINS arm_init_builtins
273 #undef TARGET_EXPAND_BUILTIN
274 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
276 #undef TARGET_INIT_LIBFUNCS
277 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
279 #undef TARGET_PROMOTE_FUNCTION_ARGS
280 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
281 #undef TARGET_PROMOTE_FUNCTION_RETURN
282 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
283 #undef TARGET_PROMOTE_PROTOTYPES
284 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
285 #undef TARGET_PASS_BY_REFERENCE
286 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
287 #undef TARGET_ARG_PARTIAL_BYTES
288 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
290 #undef TARGET_SETUP_INCOMING_VARARGS
291 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
293 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
294 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
296 #undef TARGET_DEFAULT_SHORT_ENUMS
297 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
299 #undef TARGET_ALIGN_ANON_BITFIELD
300 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
302 #undef TARGET_NARROW_VOLATILE_BITFIELD
303 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
305 #undef TARGET_CXX_GUARD_TYPE
306 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
308 #undef TARGET_CXX_GUARD_MASK_BIT
309 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
311 #undef TARGET_CXX_GET_COOKIE_SIZE
312 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
314 #undef TARGET_CXX_COOKIE_HAS_SIZE
315 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
317 #undef TARGET_CXX_CDTOR_RETURNS_THIS
318 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
320 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
321 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
323 #undef TARGET_CXX_USE_AEABI_ATEXIT
324 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
326 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
327 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
328 arm_cxx_determine_class_data_visibility
330 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
331 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
333 #undef TARGET_RETURN_IN_MSB
334 #define TARGET_RETURN_IN_MSB arm_return_in_msb
336 #undef TARGET_MUST_PASS_IN_STACK
337 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
339 #ifdef TARGET_UNWIND_INFO
340 #undef TARGET_UNWIND_EMIT
341 #define TARGET_UNWIND_EMIT arm_unwind_emit
343 /* EABI unwinding tables use a different format for the typeinfo tables. */
344 #undef TARGET_ASM_TTYPE
345 #define TARGET_ASM_TTYPE arm_output_ttype
347 #undef TARGET_ARM_EABI_UNWINDER
348 #define TARGET_ARM_EABI_UNWINDER true
349 #endif /* TARGET_UNWIND_INFO */
351 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
352 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
354 #undef TARGET_CANNOT_COPY_INSN_P
355 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
357 #ifdef HAVE_AS_TLS
358 #undef TARGET_HAVE_TLS
359 #define TARGET_HAVE_TLS true
360 #endif
362 #undef TARGET_CANNOT_FORCE_CONST_MEM
363 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
365 #undef TARGET_SCHED_ISSUE_RATE
366 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
368 #undef TARGET_MANGLE_TYPE
369 #define TARGET_MANGLE_TYPE arm_mangle_type
371 #ifdef HAVE_AS_TLS
372 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
373 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
374 #endif
376 struct gcc_target targetm = TARGET_INITIALIZER;
378 /* Obstack for minipool constant handling. */
379 static struct obstack minipool_obstack;
380 static char * minipool_startobj;
382 /* The maximum number of insns skipped which
383 will be conditionalised if possible. */
384 static int max_insns_skipped = 5;
386 extern FILE * asm_out_file;
388 /* True if we are currently building a constant table. */
389 int making_const_table;
391 /* Define the information needed to generate branch insns. This is
392 stored from the compare operation. */
393 rtx arm_compare_op0, arm_compare_op1;
395 /* The processor for which instructions should be scheduled. */
396 enum processor_type arm_tune = arm_none;
398 /* The default processor used if not overridden by commandline. */
399 static enum processor_type arm_default_cpu = arm_none;
401 /* Which floating point model to use. */
402 enum arm_fp_model arm_fp_model;
404 /* Which floating point hardware is available. */
405 enum fputype arm_fpu_arch;
407 /* Which floating point hardware to schedule for. */
408 enum fputype arm_fpu_tune;
410 /* Whether to use floating point hardware. */
411 enum float_abi_type arm_float_abi;
413 /* Which ABI to use. */
414 enum arm_abi_type arm_abi;
416 /* Which thread pointer model to use. */
417 enum arm_tp_type target_thread_pointer = TP_AUTO;
419 /* Used to parse -mstructure_size_boundary command line option. */
420 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
422 /* Used for Thumb call_via trampolines. */
423 rtx thumb_call_via_label[14];
424 static int thumb_call_reg_needed;
426 /* Bit values used to identify processor capabilities. */
427 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
428 #define FL_ARCH3M (1 << 1) /* Extended multiply */
429 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
430 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
431 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
432 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
433 #define FL_THUMB (1 << 6) /* Thumb aware */
434 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
435 #define FL_STRONG (1 << 8) /* StrongARM */
436 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
437 #define FL_XSCALE (1 << 10) /* XScale */
438 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
439 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
440 media instructions. */
441 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
442 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
443 Note: ARM6 & 7 derivatives only. */
444 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
445 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
446 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
447 profile. */
448 #define FL_DIV (1 << 18) /* Hardware divide. */
449 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
450 #define FL_NEON (1 << 20) /* Neon instructions. */
452 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
454 #define FL_FOR_ARCH2 FL_NOTM
455 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
456 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
457 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
458 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
459 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
460 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
461 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
462 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
463 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
464 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
465 #define FL_FOR_ARCH6J FL_FOR_ARCH6
466 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
467 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
468 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
469 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
470 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
471 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
472 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
473 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
474 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
476 /* The bits in this mask specify which
477 instructions we are allowed to generate. */
478 static unsigned long insn_flags = 0;
480 /* The bits in this mask specify which instruction scheduling options should
481 be used. */
482 static unsigned long tune_flags = 0;
484 /* The following are used in the arm.md file as equivalents to bits
485 in the above two flag variables. */
487 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
488 int arm_arch3m = 0;
490 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
491 int arm_arch4 = 0;
493 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
494 int arm_arch4t = 0;
496 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
497 int arm_arch5 = 0;
499 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
500 int arm_arch5e = 0;
502 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
503 int arm_arch6 = 0;
505 /* Nonzero if this chip supports the ARM 6K extensions. */
506 int arm_arch6k = 0;
508 /* Nonzero if instructions not present in the 'M' profile can be used. */
509 int arm_arch_notm = 0;
511 /* Nonzero if this chip can benefit from load scheduling. */
512 int arm_ld_sched = 0;
514 /* Nonzero if this chip is a StrongARM. */
515 int arm_tune_strongarm = 0;
517 /* Nonzero if this chip is a Cirrus variant. */
518 int arm_arch_cirrus = 0;
520 /* Nonzero if this chip supports Intel Wireless MMX technology. */
521 int arm_arch_iwmmxt = 0;
523 /* Nonzero if this chip is an XScale. */
524 int arm_arch_xscale = 0;
526 /* Nonzero if tuning for XScale */
527 int arm_tune_xscale = 0;
529 /* Nonzero if we want to tune for stores that access the write-buffer.
530 This typically means an ARM6 or ARM7 with MMU or MPU. */
531 int arm_tune_wbuf = 0;
533 /* Nonzero if generating Thumb instructions. */
534 int thumb_code = 0;
536 /* Nonzero if we should define __THUMB_INTERWORK__ in the
537 preprocessor.
538 XXX This is a bit of a hack, it's intended to help work around
539 problems in GLD which doesn't understand that armv5t code is
540 interworking clean. */
541 int arm_cpp_interwork = 0;
543 /* Nonzero if chip supports Thumb 2. */
544 int arm_arch_thumb2;
546 /* Nonzero if chip supports integer division instruction. */
547 int arm_arch_hwdiv;
549 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
550 must report the mode of the memory reference from PRINT_OPERAND to
551 PRINT_OPERAND_ADDRESS. */
552 enum machine_mode output_memory_reference_mode;
554 /* The register number to be used for the PIC offset register. */
555 unsigned arm_pic_register = INVALID_REGNUM;
557 /* Set to 1 when a return insn is output, this means that the epilogue
558 is not needed. */
559 int return_used_this_function;
561 /* Set to 1 after arm_reorg has started. Reset to start at the start of
562 the next function. */
563 static int after_arm_reorg = 0;
565 /* The maximum number of insns to be used when loading a constant. */
566 static int arm_constant_limit = 3;
568 /* For an explanation of these variables, see final_prescan_insn below. */
569 int arm_ccfsm_state;
570 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
571 enum arm_cond_code arm_current_cc;
572 rtx arm_target_insn;
573 int arm_target_label;
574 /* The number of conditionally executed insns, including the current insn. */
575 int arm_condexec_count = 0;
576 /* A bitmask specifying the patterns for the IT block.
577 Zero means do not output an IT block before this insn. */
578 int arm_condexec_mask = 0;
579 /* The number of bits used in arm_condexec_mask. */
580 int arm_condexec_masklen = 0;
582 /* The condition codes of the ARM, and the inverse function. */
583 static const char * const arm_condition_codes[] =
585 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
586 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
589 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
590 #define streq(string1, string2) (strcmp (string1, string2) == 0)
592 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
593 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
594 | (1 << PIC_OFFSET_TABLE_REGNUM)))
596 /* Initialization code. */
598 struct processors
600 const char *const name;
601 enum processor_type core;
602 const char *arch;
603 const unsigned long flags;
604 bool (* rtx_costs) (rtx, int, int, int *);
607 /* Not all of these give usefully different compilation alternatives,
608 but there is no simple way of generalizing them. */
609 static const struct processors all_cores[] =
611 /* ARM Cores */
612 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
613 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
614 #include "arm-cores.def"
615 #undef ARM_CORE
616 {NULL, arm_none, NULL, 0, NULL}
619 static const struct processors all_architectures[] =
621 /* ARM Architectures */
622 /* We don't specify rtx_costs here as it will be figured out
623 from the core. */
625 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
626 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
627 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
628 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
629 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
630 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
631 implementations that support it, so we will leave it out for now. */
632 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
633 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
634 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
635 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
636 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
637 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
638 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
639 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
640 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
641 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
642 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
643 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
644 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
645 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
646 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
647 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
648 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
649 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
650 {NULL, arm_none, NULL, 0 , NULL}
653 struct arm_cpu_select
655 const char * string;
656 const char * name;
657 const struct processors * processors;
660 /* This is a magic structure. The 'string' field is magically filled in
661 with a pointer to the value specified by the user on the command line
662 assuming that the user has specified such a value. */
664 static struct arm_cpu_select arm_select[] =
666 /* string name processors */
667 { NULL, "-mcpu=", all_cores },
668 { NULL, "-march=", all_architectures },
669 { NULL, "-mtune=", all_cores }
672 /* Defines representing the indexes into the above table. */
673 #define ARM_OPT_SET_CPU 0
674 #define ARM_OPT_SET_ARCH 1
675 #define ARM_OPT_SET_TUNE 2
677 /* The name of the preprocessor macro to define for this architecture. */
679 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
681 struct fpu_desc
683 const char * name;
684 enum fputype fpu;
688 /* Available values for -mfpu=. */
690 static const struct fpu_desc all_fpus[] =
692 {"fpa", FPUTYPE_FPA},
693 {"fpe2", FPUTYPE_FPA_EMU2},
694 {"fpe3", FPUTYPE_FPA_EMU2},
695 {"maverick", FPUTYPE_MAVERICK},
696 {"vfp", FPUTYPE_VFP},
697 {"vfp3", FPUTYPE_VFP3},
698 {"neon", FPUTYPE_NEON}
702 /* Floating point models used by the different hardware.
703 See fputype in arm.h. */
705 static const enum fputype fp_model_for_fpu[] =
707 /* No FP hardware. */
708 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
709 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
710 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
711 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
712 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
713 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
714 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
715 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
719 struct float_abi
721 const char * name;
722 enum float_abi_type abi_type;
726 /* Available values for -mfloat-abi=. */
728 static const struct float_abi all_float_abis[] =
730 {"soft", ARM_FLOAT_ABI_SOFT},
731 {"softfp", ARM_FLOAT_ABI_SOFTFP},
732 {"hard", ARM_FLOAT_ABI_HARD}
736 struct abi_name
738 const char *name;
739 enum arm_abi_type abi_type;
743 /* Available values for -mabi=. */
745 static const struct abi_name arm_all_abis[] =
747 {"apcs-gnu", ARM_ABI_APCS},
748 {"atpcs", ARM_ABI_ATPCS},
749 {"aapcs", ARM_ABI_AAPCS},
750 {"iwmmxt", ARM_ABI_IWMMXT},
751 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
754 /* Supported TLS relocations. */
756 enum tls_reloc {
757 TLS_GD32,
758 TLS_LDM32,
759 TLS_LDO32,
760 TLS_IE32,
761 TLS_LE32
764 /* Emit an insn that's a simple single-set. Both the operands must be known
765 to be valid. */
766 inline static rtx
767 emit_set_insn (rtx x, rtx y)
769 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
772 /* Return the number of bits set in VALUE. */
773 static unsigned
774 bit_count (unsigned long value)
776 unsigned long count = 0;
778 while (value)
780 count++;
781 value &= value - 1; /* Clear the least-significant set bit. */
784 return count;
787 /* Set up library functions unique to ARM. */
789 static void
790 arm_init_libfuncs (void)
792 /* There are no special library functions unless we are using the
793 ARM BPABI. */
794 if (!TARGET_BPABI)
795 return;
797 /* The functions below are described in Section 4 of the "Run-Time
798 ABI for the ARM architecture", Version 1.0. */
800 /* Double-precision floating-point arithmetic. Table 2. */
801 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
802 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
803 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
804 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
805 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
807 /* Double-precision comparisons. Table 3. */
808 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
809 set_optab_libfunc (ne_optab, DFmode, NULL);
810 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
811 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
812 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
813 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
814 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
816 /* Single-precision floating-point arithmetic. Table 4. */
817 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
818 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
819 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
820 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
821 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
823 /* Single-precision comparisons. Table 5. */
824 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
825 set_optab_libfunc (ne_optab, SFmode, NULL);
826 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
827 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
828 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
829 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
830 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
832 /* Floating-point to integer conversions. Table 6. */
833 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
834 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
835 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
836 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
837 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
838 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
839 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
840 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
842 /* Conversions between floating types. Table 7. */
843 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
844 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
846 /* Integer to floating-point conversions. Table 8. */
847 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
848 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
849 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
850 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
851 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
852 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
853 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
854 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
856 /* Long long. Table 9. */
857 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
858 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
859 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
860 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
861 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
862 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
863 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
864 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
866 /* Integer (32/32->32) division. \S 4.3.1. */
867 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
868 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
870 /* The divmod functions are designed so that they can be used for
871 plain division, even though they return both the quotient and the
872 remainder. The quotient is returned in the usual location (i.e.,
873 r0 for SImode, {r0, r1} for DImode), just as would be expected
874 for an ordinary division routine. Because the AAPCS calling
875 conventions specify that all of { r0, r1, r2, r3 } are
876 callee-saved registers, there is no need to tell the compiler
877 explicitly that those registers are clobbered by these
878 routines. */
879 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
880 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
882 /* For SImode division the ABI provides div-without-mod routines,
883 which are faster. */
884 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
885 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
887 /* We don't have mod libcalls. Fortunately gcc knows how to use the
888 divmod libcalls instead. */
889 set_optab_libfunc (smod_optab, DImode, NULL);
890 set_optab_libfunc (umod_optab, DImode, NULL);
891 set_optab_libfunc (smod_optab, SImode, NULL);
892 set_optab_libfunc (umod_optab, SImode, NULL);
895 /* Implement TARGET_HANDLE_OPTION. */
897 static bool
898 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
900 switch (code)
902 case OPT_march_:
903 arm_select[1].string = arg;
904 return true;
906 case OPT_mcpu_:
907 arm_select[0].string = arg;
908 return true;
910 case OPT_mhard_float:
911 target_float_abi_name = "hard";
912 return true;
914 case OPT_msoft_float:
915 target_float_abi_name = "soft";
916 return true;
918 case OPT_mtune_:
919 arm_select[2].string = arg;
920 return true;
922 default:
923 return true;
927 static void
928 arm_target_help (void)
930 int i;
931 static int columns = 0;
932 int remaining;
934 /* If we have not done so already, obtain the desired maximum width of
935 the output. Note - this is a duplication of the code at the start of
936 gcc/opts.c:print_specific_help() - the two copies should probably be
937 replaced by a single function. */
938 if (columns == 0)
940 const char *p;
942 GET_ENVIRONMENT (p, "COLUMNS");
943 if (p != NULL)
945 int value = atoi (p);
947 if (value > 0)
948 columns = value;
951 if (columns == 0)
952 /* Use a reasonable default. */
953 columns = 80;
956 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
958 /* The - 2 is because we know that the last entry in the array is NULL. */
959 i = ARRAY_SIZE (all_cores) - 2;
960 gcc_assert (i > 0);
961 printf (" %s", all_cores[i].name);
962 remaining = columns - (strlen (all_cores[i].name) + 4);
963 gcc_assert (remaining >= 0);
965 while (i--)
967 int len = strlen (all_cores[i].name);
969 if (remaining > len + 2)
971 printf (", %s", all_cores[i].name);
972 remaining -= len + 2;
974 else
976 if (remaining > 0)
977 printf (",");
978 printf ("\n %s", all_cores[i].name);
979 remaining = columns - (len + 4);
983 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
985 i = ARRAY_SIZE (all_architectures) - 2;
986 gcc_assert (i > 0);
988 printf (" %s", all_architectures[i].name);
989 remaining = columns - (strlen (all_architectures[i].name) + 4);
990 gcc_assert (remaining >= 0);
992 while (i--)
994 int len = strlen (all_architectures[i].name);
996 if (remaining > len + 2)
998 printf (", %s", all_architectures[i].name);
999 remaining -= len + 2;
1001 else
1003 if (remaining > 0)
1004 printf (",");
1005 printf ("\n %s", all_architectures[i].name);
1006 remaining = columns - (len + 4);
1009 printf ("\n");
1013 /* Fix up any incompatible options that the user has specified.
1014 This has now turned into a maze. */
1015 void
1016 arm_override_options (void)
1018 unsigned i;
1019 enum processor_type target_arch_cpu = arm_none;
1021 /* Set up the flags based on the cpu/architecture selected by the user. */
1022 for (i = ARRAY_SIZE (arm_select); i--;)
1024 struct arm_cpu_select * ptr = arm_select + i;
1026 if (ptr->string != NULL && ptr->string[0] != '\0')
1028 const struct processors * sel;
1030 for (sel = ptr->processors; sel->name != NULL; sel++)
1031 if (streq (ptr->string, sel->name))
1033 /* Set the architecture define. */
1034 if (i != ARM_OPT_SET_TUNE)
1035 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1037 /* Determine the processor core for which we should
1038 tune code-generation. */
1039 if (/* -mcpu= is a sensible default. */
1040 i == ARM_OPT_SET_CPU
1041 /* -mtune= overrides -mcpu= and -march=. */
1042 || i == ARM_OPT_SET_TUNE)
1043 arm_tune = (enum processor_type) (sel - ptr->processors);
1045 /* Remember the CPU associated with this architecture.
1046 If no other option is used to set the CPU type,
1047 we'll use this to guess the most suitable tuning
1048 options. */
1049 if (i == ARM_OPT_SET_ARCH)
1050 target_arch_cpu = sel->core;
1052 if (i != ARM_OPT_SET_TUNE)
1054 /* If we have been given an architecture and a processor
1055 make sure that they are compatible. We only generate
1056 a warning though, and we prefer the CPU over the
1057 architecture. */
1058 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1059 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1060 ptr->string);
1062 insn_flags = sel->flags;
1065 break;
1068 if (sel->name == NULL)
1069 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1073 /* Guess the tuning options from the architecture if necessary. */
1074 if (arm_tune == arm_none)
1075 arm_tune = target_arch_cpu;
1077 /* If the user did not specify a processor, choose one for them. */
1078 if (insn_flags == 0)
1080 const struct processors * sel;
1081 unsigned int sought;
1082 enum processor_type cpu;
1084 cpu = TARGET_CPU_DEFAULT;
1085 if (cpu == arm_none)
1087 #ifdef SUBTARGET_CPU_DEFAULT
1088 /* Use the subtarget default CPU if none was specified by
1089 configure. */
1090 cpu = SUBTARGET_CPU_DEFAULT;
1091 #endif
1092 /* Default to ARM6. */
1093 if (cpu == arm_none)
1094 cpu = arm6;
1096 sel = &all_cores[cpu];
1098 insn_flags = sel->flags;
1100 /* Now check to see if the user has specified some command line
1101 switch that require certain abilities from the cpu. */
1102 sought = 0;
1104 if (TARGET_INTERWORK || TARGET_THUMB)
1106 sought |= (FL_THUMB | FL_MODE32);
1108 /* There are no ARM processors that support both APCS-26 and
1109 interworking. Therefore we force FL_MODE26 to be removed
1110 from insn_flags here (if it was set), so that the search
1111 below will always be able to find a compatible processor. */
1112 insn_flags &= ~FL_MODE26;
1115 if (sought != 0 && ((sought & insn_flags) != sought))
1117 /* Try to locate a CPU type that supports all of the abilities
1118 of the default CPU, plus the extra abilities requested by
1119 the user. */
1120 for (sel = all_cores; sel->name != NULL; sel++)
1121 if ((sel->flags & sought) == (sought | insn_flags))
1122 break;
1124 if (sel->name == NULL)
1126 unsigned current_bit_count = 0;
1127 const struct processors * best_fit = NULL;
1129 /* Ideally we would like to issue an error message here
1130 saying that it was not possible to find a CPU compatible
1131 with the default CPU, but which also supports the command
1132 line options specified by the programmer, and so they
1133 ought to use the -mcpu=<name> command line option to
1134 override the default CPU type.
1136 If we cannot find a cpu that has both the
1137 characteristics of the default cpu and the given
1138 command line options we scan the array again looking
1139 for a best match. */
1140 for (sel = all_cores; sel->name != NULL; sel++)
1141 if ((sel->flags & sought) == sought)
1143 unsigned count;
1145 count = bit_count (sel->flags & insn_flags);
1147 if (count >= current_bit_count)
1149 best_fit = sel;
1150 current_bit_count = count;
1154 gcc_assert (best_fit);
1155 sel = best_fit;
1158 insn_flags = sel->flags;
1160 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1161 arm_default_cpu = (enum processor_type) (sel - all_cores);
1162 if (arm_tune == arm_none)
1163 arm_tune = arm_default_cpu;
1166 /* The processor for which we should tune should now have been
1167 chosen. */
1168 gcc_assert (arm_tune != arm_none);
1170 tune_flags = all_cores[(int)arm_tune].flags;
1171 if (optimize_size)
1172 targetm.rtx_costs = arm_size_rtx_costs;
1173 else
1174 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1176 /* Make sure that the processor choice does not conflict with any of the
1177 other command line choices. */
1178 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1179 error ("target CPU does not support ARM mode");
1181 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1183 warning (0, "target CPU does not support interworking" );
1184 target_flags &= ~MASK_INTERWORK;
1187 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1189 warning (0, "target CPU does not support THUMB instructions");
1190 target_flags &= ~MASK_THUMB;
1193 if (TARGET_APCS_FRAME && TARGET_THUMB)
1195 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1196 target_flags &= ~MASK_APCS_FRAME;
1199 /* Callee super interworking implies thumb interworking. Adding
1200 this to the flags here simplifies the logic elsewhere. */
1201 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1202 target_flags |= MASK_INTERWORK;
1204 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1205 from here where no function is being compiled currently. */
1206 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1207 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1209 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1210 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1212 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1213 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1215 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1217 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1218 target_flags |= MASK_APCS_FRAME;
1221 if (TARGET_POKE_FUNCTION_NAME)
1222 target_flags |= MASK_APCS_FRAME;
1224 if (TARGET_APCS_REENT && flag_pic)
1225 error ("-fpic and -mapcs-reent are incompatible");
1227 if (TARGET_APCS_REENT)
1228 warning (0, "APCS reentrant code not supported. Ignored");
1230 /* If this target is normally configured to use APCS frames, warn if they
1231 are turned off and debugging is turned on. */
1232 if (TARGET_ARM
1233 && write_symbols != NO_DEBUG
1234 && !TARGET_APCS_FRAME
1235 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1236 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1238 if (TARGET_APCS_FLOAT)
1239 warning (0, "passing floating point arguments in fp regs not yet supported");
1241 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1242 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1243 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1244 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1245 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1246 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1247 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1248 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1249 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1250 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1251 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1252 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1254 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1255 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1256 thumb_code = (TARGET_ARM == 0);
1257 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1258 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1259 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1260 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1262 /* V5 code we generate is completely interworking capable, so we turn off
1263 TARGET_INTERWORK here to avoid many tests later on. */
1265 /* XXX However, we must pass the right pre-processor defines to CPP
1266 or GLD can get confused. This is a hack. */
1267 if (TARGET_INTERWORK)
1268 arm_cpp_interwork = 1;
1270 if (arm_arch5)
1271 target_flags &= ~MASK_INTERWORK;
1273 if (target_abi_name)
1275 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1277 if (streq (arm_all_abis[i].name, target_abi_name))
1279 arm_abi = arm_all_abis[i].abi_type;
1280 break;
1283 if (i == ARRAY_SIZE (arm_all_abis))
1284 error ("invalid ABI option: -mabi=%s", target_abi_name);
1286 else
1287 arm_abi = ARM_DEFAULT_ABI;
1289 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1290 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1292 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1293 error ("iwmmxt abi requires an iwmmxt capable cpu");
1295 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1296 if (target_fpu_name == NULL && target_fpe_name != NULL)
1298 if (streq (target_fpe_name, "2"))
1299 target_fpu_name = "fpe2";
1300 else if (streq (target_fpe_name, "3"))
1301 target_fpu_name = "fpe3";
1302 else
1303 error ("invalid floating point emulation option: -mfpe=%s",
1304 target_fpe_name);
1306 if (target_fpu_name != NULL)
1308 /* The user specified a FPU. */
1309 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1311 if (streq (all_fpus[i].name, target_fpu_name))
1313 arm_fpu_arch = all_fpus[i].fpu;
1314 arm_fpu_tune = arm_fpu_arch;
1315 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1316 break;
1319 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1320 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1322 else
1324 #ifdef FPUTYPE_DEFAULT
1325 /* Use the default if it is specified for this platform. */
1326 arm_fpu_arch = FPUTYPE_DEFAULT;
1327 arm_fpu_tune = FPUTYPE_DEFAULT;
1328 #else
1329 /* Pick one based on CPU type. */
1330 /* ??? Some targets assume FPA is the default.
1331 if ((insn_flags & FL_VFP) != 0)
1332 arm_fpu_arch = FPUTYPE_VFP;
1333 else
1335 if (arm_arch_cirrus)
1336 arm_fpu_arch = FPUTYPE_MAVERICK;
1337 else
1338 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1339 #endif
1340 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1341 arm_fpu_tune = FPUTYPE_FPA;
1342 else
1343 arm_fpu_tune = arm_fpu_arch;
1344 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1345 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1348 if (target_float_abi_name != NULL)
1350 /* The user specified a FP ABI. */
1351 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1353 if (streq (all_float_abis[i].name, target_float_abi_name))
1355 arm_float_abi = all_float_abis[i].abi_type;
1356 break;
1359 if (i == ARRAY_SIZE (all_float_abis))
1360 error ("invalid floating point abi: -mfloat-abi=%s",
1361 target_float_abi_name);
1363 else
1364 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1366 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1367 sorry ("-mfloat-abi=hard and VFP");
1369 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1370 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1371 will ever exist. GCC makes no attempt to support this combination. */
1372 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1373 sorry ("iWMMXt and hardware floating point");
1375 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1376 if (TARGET_THUMB2 && TARGET_IWMMXT)
1377 sorry ("Thumb-2 iWMMXt");
1379 /* If soft-float is specified then don't use FPU. */
1380 if (TARGET_SOFT_FLOAT)
1381 arm_fpu_arch = FPUTYPE_NONE;
1383 /* For arm2/3 there is no need to do any scheduling if there is only
1384 a floating point emulator, or we are doing software floating-point. */
1385 if ((TARGET_SOFT_FLOAT
1386 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1387 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1388 && (tune_flags & FL_MODE32) == 0)
1389 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1391 if (target_thread_switch)
1393 if (strcmp (target_thread_switch, "soft") == 0)
1394 target_thread_pointer = TP_SOFT;
1395 else if (strcmp (target_thread_switch, "auto") == 0)
1396 target_thread_pointer = TP_AUTO;
1397 else if (strcmp (target_thread_switch, "cp15") == 0)
1398 target_thread_pointer = TP_CP15;
1399 else
1400 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1403 /* Use the cp15 method if it is available. */
1404 if (target_thread_pointer == TP_AUTO)
1406 if (arm_arch6k && !TARGET_THUMB)
1407 target_thread_pointer = TP_CP15;
1408 else
1409 target_thread_pointer = TP_SOFT;
1412 if (TARGET_HARD_TP && TARGET_THUMB1)
1413 error ("can not use -mtp=cp15 with 16-bit Thumb");
1415 /* Override the default structure alignment for AAPCS ABI. */
1416 if (TARGET_AAPCS_BASED)
1417 arm_structure_size_boundary = 8;
1419 if (structure_size_string != NULL)
1421 int size = strtol (structure_size_string, NULL, 0);
1423 if (size == 8 || size == 32
1424 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1425 arm_structure_size_boundary = size;
1426 else
1427 warning (0, "structure size boundary can only be set to %s",
1428 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1431 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1433 error ("RTP PIC is incompatible with Thumb");
1434 flag_pic = 0;
1437 /* If stack checking is disabled, we can use r10 as the PIC register,
1438 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1439 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1441 if (TARGET_VXWORKS_RTP)
1442 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1443 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1446 if (flag_pic && TARGET_VXWORKS_RTP)
1447 arm_pic_register = 9;
1449 if (arm_pic_register_string != NULL)
1451 int pic_register = decode_reg_name (arm_pic_register_string);
1453 if (!flag_pic)
1454 warning (0, "-mpic-register= is useless without -fpic");
1456 /* Prevent the user from choosing an obviously stupid PIC register. */
1457 else if (pic_register < 0 || call_used_regs[pic_register]
1458 || pic_register == HARD_FRAME_POINTER_REGNUM
1459 || pic_register == STACK_POINTER_REGNUM
1460 || pic_register >= PC_REGNUM
1461 || (TARGET_VXWORKS_RTP
1462 && (unsigned int) pic_register != arm_pic_register))
1463 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1464 else
1465 arm_pic_register = pic_register;
1468 /* ??? We might want scheduling for thumb2. */
1469 if (TARGET_THUMB && flag_schedule_insns)
1471 /* Don't warn since it's on by default in -O2. */
1472 flag_schedule_insns = 0;
1475 if (optimize_size)
1477 arm_constant_limit = 1;
1479 /* If optimizing for size, bump the number of instructions that we
1480 are prepared to conditionally execute (even on a StrongARM). */
1481 max_insns_skipped = 6;
1483 else
1485 /* For processors with load scheduling, it never costs more than
1486 2 cycles to load a constant, and the load scheduler may well
1487 reduce that to 1. */
1488 if (arm_ld_sched)
1489 arm_constant_limit = 1;
1491 /* On XScale the longer latency of a load makes it more difficult
1492 to achieve a good schedule, so it's faster to synthesize
1493 constants that can be done in two insns. */
1494 if (arm_tune_xscale)
1495 arm_constant_limit = 2;
1497 /* StrongARM has early execution of branches, so a sequence
1498 that is worth skipping is shorter. */
1499 if (arm_tune_strongarm)
1500 max_insns_skipped = 3;
1503 /* Register global variables with the garbage collector. */
1504 arm_add_gc_roots ();
1507 static void
1508 arm_add_gc_roots (void)
1510 gcc_obstack_init(&minipool_obstack);
1511 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1514 /* A table of known ARM exception types.
1515 For use with the interrupt function attribute. */
1517 typedef struct
1519 const char *const arg;
1520 const unsigned long return_value;
1522 isr_attribute_arg;
1524 static const isr_attribute_arg isr_attribute_args [] =
1526 { "IRQ", ARM_FT_ISR },
1527 { "irq", ARM_FT_ISR },
1528 { "FIQ", ARM_FT_FIQ },
1529 { "fiq", ARM_FT_FIQ },
1530 { "ABORT", ARM_FT_ISR },
1531 { "abort", ARM_FT_ISR },
1532 { "ABORT", ARM_FT_ISR },
1533 { "abort", ARM_FT_ISR },
1534 { "UNDEF", ARM_FT_EXCEPTION },
1535 { "undef", ARM_FT_EXCEPTION },
1536 { "SWI", ARM_FT_EXCEPTION },
1537 { "swi", ARM_FT_EXCEPTION },
1538 { NULL, ARM_FT_NORMAL }
1541 /* Returns the (interrupt) function type of the current
1542 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1544 static unsigned long
1545 arm_isr_value (tree argument)
1547 const isr_attribute_arg * ptr;
1548 const char * arg;
1550 if (!arm_arch_notm)
1551 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1553 /* No argument - default to IRQ. */
1554 if (argument == NULL_TREE)
1555 return ARM_FT_ISR;
1557 /* Get the value of the argument. */
1558 if (TREE_VALUE (argument) == NULL_TREE
1559 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1560 return ARM_FT_UNKNOWN;
1562 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1564 /* Check it against the list of known arguments. */
1565 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1566 if (streq (arg, ptr->arg))
1567 return ptr->return_value;
1569 /* An unrecognized interrupt type. */
1570 return ARM_FT_UNKNOWN;
1573 /* Computes the type of the current function. */
1575 static unsigned long
1576 arm_compute_func_type (void)
1578 unsigned long type = ARM_FT_UNKNOWN;
1579 tree a;
1580 tree attr;
1582 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1584 /* Decide if the current function is volatile. Such functions
1585 never return, and many memory cycles can be saved by not storing
1586 register values that will never be needed again. This optimization
1587 was added to speed up context switching in a kernel application. */
1588 if (optimize > 0
1589 && (TREE_NOTHROW (current_function_decl)
1590 || !(flag_unwind_tables
1591 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1592 && TREE_THIS_VOLATILE (current_function_decl))
1593 type |= ARM_FT_VOLATILE;
1595 if (cfun->static_chain_decl != NULL)
1596 type |= ARM_FT_NESTED;
1598 attr = DECL_ATTRIBUTES (current_function_decl);
1600 a = lookup_attribute ("naked", attr);
1601 if (a != NULL_TREE)
1602 type |= ARM_FT_NAKED;
1604 a = lookup_attribute ("isr", attr);
1605 if (a == NULL_TREE)
1606 a = lookup_attribute ("interrupt", attr);
1608 if (a == NULL_TREE)
1609 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1610 else
1611 type |= arm_isr_value (TREE_VALUE (a));
1613 return type;
1616 /* Returns the type of the current function. */
1618 unsigned long
1619 arm_current_func_type (void)
1621 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1622 cfun->machine->func_type = arm_compute_func_type ();
1624 return cfun->machine->func_type;
1627 bool
1628 arm_allocate_stack_slots_for_args (void)
1630 /* Naked functions should not allocate stack slots for arguments. */
1631 return !IS_NAKED (arm_current_func_type ());
1635 /* Return 1 if it is possible to return using a single instruction.
1636 If SIBLING is non-null, this is a test for a return before a sibling
1637 call. SIBLING is the call insn, so we can examine its register usage. */
1640 use_return_insn (int iscond, rtx sibling)
1642 int regno;
1643 unsigned int func_type;
1644 unsigned long saved_int_regs;
1645 unsigned HOST_WIDE_INT stack_adjust;
1646 arm_stack_offsets *offsets;
1648 /* Never use a return instruction before reload has run. */
1649 if (!reload_completed)
1650 return 0;
1652 func_type = arm_current_func_type ();
1654 /* Naked, volatile and stack alignment functions need special
1655 consideration. */
1656 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1657 return 0;
1659 /* So do interrupt functions that use the frame pointer and Thumb
1660 interrupt functions. */
1661 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1662 return 0;
1664 offsets = arm_get_frame_offsets ();
1665 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1667 /* As do variadic functions. */
1668 if (crtl->args.pretend_args_size
1669 || cfun->machine->uses_anonymous_args
1670 /* Or if the function calls __builtin_eh_return () */
1671 || crtl->calls_eh_return
1672 /* Or if the function calls alloca */
1673 || cfun->calls_alloca
1674 /* Or if there is a stack adjustment. However, if the stack pointer
1675 is saved on the stack, we can use a pre-incrementing stack load. */
1676 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1677 && stack_adjust == 4)))
1678 return 0;
1680 saved_int_regs = offsets->saved_regs_mask;
1682 /* Unfortunately, the insn
1684 ldmib sp, {..., sp, ...}
1686 triggers a bug on most SA-110 based devices, such that the stack
1687 pointer won't be correctly restored if the instruction takes a
1688 page fault. We work around this problem by popping r3 along with
1689 the other registers, since that is never slower than executing
1690 another instruction.
1692 We test for !arm_arch5 here, because code for any architecture
1693 less than this could potentially be run on one of the buggy
1694 chips. */
1695 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1697 /* Validate that r3 is a call-clobbered register (always true in
1698 the default abi) ... */
1699 if (!call_used_regs[3])
1700 return 0;
1702 /* ... that it isn't being used for a return value ... */
1703 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1704 return 0;
1706 /* ... or for a tail-call argument ... */
1707 if (sibling)
1709 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1711 if (find_regno_fusage (sibling, USE, 3))
1712 return 0;
1715 /* ... and that there are no call-saved registers in r0-r2
1716 (always true in the default ABI). */
1717 if (saved_int_regs & 0x7)
1718 return 0;
1721 /* Can't be done if interworking with Thumb, and any registers have been
1722 stacked. */
1723 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1724 return 0;
1726 /* On StrongARM, conditional returns are expensive if they aren't
1727 taken and multiple registers have been stacked. */
1728 if (iscond && arm_tune_strongarm)
1730 /* Conditional return when just the LR is stored is a simple
1731 conditional-load instruction, that's not expensive. */
1732 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1733 return 0;
1735 if (flag_pic
1736 && arm_pic_register != INVALID_REGNUM
1737 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1738 return 0;
1741 /* If there are saved registers but the LR isn't saved, then we need
1742 two instructions for the return. */
1743 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1744 return 0;
1746 /* Can't be done if any of the FPA regs are pushed,
1747 since this also requires an insn. */
1748 if (TARGET_HARD_FLOAT && TARGET_FPA)
1749 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1750 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1751 return 0;
1753 /* Likewise VFP regs. */
1754 if (TARGET_HARD_FLOAT && TARGET_VFP)
1755 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1756 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1757 return 0;
1759 if (TARGET_REALLY_IWMMXT)
1760 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1761 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1762 return 0;
1764 return 1;
1767 /* Return TRUE if int I is a valid immediate ARM constant. */
1770 const_ok_for_arm (HOST_WIDE_INT i)
1772 int lowbit;
1774 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1775 be all zero, or all one. */
1776 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1777 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1778 != ((~(unsigned HOST_WIDE_INT) 0)
1779 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1780 return FALSE;
1782 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1784 /* Fast return for 0 and small values. We must do this for zero, since
1785 the code below can't handle that one case. */
1786 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1787 return TRUE;
1789 /* Get the number of trailing zeros. */
1790 lowbit = ffs((int) i) - 1;
1792 /* Only even shifts are allowed in ARM mode so round down to the
1793 nearest even number. */
1794 if (TARGET_ARM)
1795 lowbit &= ~1;
1797 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1798 return TRUE;
1800 if (TARGET_ARM)
1802 /* Allow rotated constants in ARM mode. */
1803 if (lowbit <= 4
1804 && ((i & ~0xc000003f) == 0
1805 || (i & ~0xf000000f) == 0
1806 || (i & ~0xfc000003) == 0))
1807 return TRUE;
1809 else
1811 HOST_WIDE_INT v;
1813 /* Allow repeated pattern. */
1814 v = i & 0xff;
1815 v |= v << 16;
1816 if (i == v || i == (v | (v << 8)))
1817 return TRUE;
1820 return FALSE;
1823 /* Return true if I is a valid constant for the operation CODE. */
1824 static int
1825 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1827 if (const_ok_for_arm (i))
1828 return 1;
1830 switch (code)
1832 case PLUS:
1833 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1835 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1836 case XOR:
1837 case IOR:
1838 return 0;
1840 case AND:
1841 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1843 default:
1844 gcc_unreachable ();
1848 /* Emit a sequence of insns to handle a large constant.
1849 CODE is the code of the operation required, it can be any of SET, PLUS,
1850 IOR, AND, XOR, MINUS;
1851 MODE is the mode in which the operation is being performed;
1852 VAL is the integer to operate on;
1853 SOURCE is the other operand (a register, or a null-pointer for SET);
1854 SUBTARGETS means it is safe to create scratch registers if that will
1855 either produce a simpler sequence, or we will want to cse the values.
1856 Return value is the number of insns emitted. */
1858 /* ??? Tweak this for thumb2. */
1860 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1861 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1863 rtx cond;
1865 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1866 cond = COND_EXEC_TEST (PATTERN (insn));
1867 else
1868 cond = NULL_RTX;
1870 if (subtargets || code == SET
1871 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1872 && REGNO (target) != REGNO (source)))
1874 /* After arm_reorg has been called, we can't fix up expensive
1875 constants by pushing them into memory so we must synthesize
1876 them in-line, regardless of the cost. This is only likely to
1877 be more costly on chips that have load delay slots and we are
1878 compiling without running the scheduler (so no splitting
1879 occurred before the final instruction emission).
1881 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1883 if (!after_arm_reorg
1884 && !cond
1885 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1886 1, 0)
1887 > arm_constant_limit + (code != SET)))
1889 if (code == SET)
1891 /* Currently SET is the only monadic value for CODE, all
1892 the rest are diadic. */
1893 emit_set_insn (target, GEN_INT (val));
1894 return 1;
1896 else
1898 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1900 emit_set_insn (temp, GEN_INT (val));
1901 /* For MINUS, the value is subtracted from, since we never
1902 have subtraction of a constant. */
1903 if (code == MINUS)
1904 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1905 else
1906 emit_set_insn (target,
1907 gen_rtx_fmt_ee (code, mode, source, temp));
1908 return 2;
1913 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1917 /* Return the number of ARM instructions required to synthesize the given
1918 constant. */
1919 static int
1920 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1922 HOST_WIDE_INT temp1;
1923 int num_insns = 0;
1926 int end;
1928 if (i <= 0)
1929 i += 32;
1930 if (remainder & (3 << (i - 2)))
1932 end = i - 8;
1933 if (end < 0)
1934 end += 32;
1935 temp1 = remainder & ((0x0ff << end)
1936 | ((i < end) ? (0xff >> (32 - end)) : 0));
1937 remainder &= ~temp1;
1938 num_insns++;
1939 i -= 6;
1941 i -= 2;
1942 } while (remainder);
1943 return num_insns;
1946 /* Emit an instruction with the indicated PATTERN. If COND is
1947 non-NULL, conditionalize the execution of the instruction on COND
1948 being true. */
1950 static void
1951 emit_constant_insn (rtx cond, rtx pattern)
1953 if (cond)
1954 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1955 emit_insn (pattern);
1958 /* As above, but extra parameter GENERATE which, if clear, suppresses
1959 RTL generation. */
1960 /* ??? This needs more work for thumb2. */
1962 static int
1963 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1964 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1965 int generate)
1967 int can_invert = 0;
1968 int can_negate = 0;
1969 int can_negate_initial = 0;
1970 int can_shift = 0;
1971 int i;
1972 int num_bits_set = 0;
1973 int set_sign_bit_copies = 0;
1974 int clear_sign_bit_copies = 0;
1975 int clear_zero_bit_copies = 0;
1976 int set_zero_bit_copies = 0;
1977 int insns = 0;
1978 unsigned HOST_WIDE_INT temp1, temp2;
1979 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1981 /* Find out which operations are safe for a given CODE. Also do a quick
1982 check for degenerate cases; these can occur when DImode operations
1983 are split. */
1984 switch (code)
1986 case SET:
1987 can_invert = 1;
1988 can_shift = 1;
1989 can_negate = 1;
1990 break;
1992 case PLUS:
1993 can_negate = 1;
1994 can_negate_initial = 1;
1995 break;
1997 case IOR:
1998 if (remainder == 0xffffffff)
2000 if (generate)
2001 emit_constant_insn (cond,
2002 gen_rtx_SET (VOIDmode, target,
2003 GEN_INT (ARM_SIGN_EXTEND (val))));
2004 return 1;
2006 if (remainder == 0)
2008 if (reload_completed && rtx_equal_p (target, source))
2009 return 0;
2010 if (generate)
2011 emit_constant_insn (cond,
2012 gen_rtx_SET (VOIDmode, target, source));
2013 return 1;
2015 break;
2017 case AND:
2018 if (remainder == 0)
2020 if (generate)
2021 emit_constant_insn (cond,
2022 gen_rtx_SET (VOIDmode, target, const0_rtx));
2023 return 1;
2025 if (remainder == 0xffffffff)
2027 if (reload_completed && rtx_equal_p (target, source))
2028 return 0;
2029 if (generate)
2030 emit_constant_insn (cond,
2031 gen_rtx_SET (VOIDmode, target, source));
2032 return 1;
2034 can_invert = 1;
2035 break;
2037 case XOR:
2038 if (remainder == 0)
2040 if (reload_completed && rtx_equal_p (target, source))
2041 return 0;
2042 if (generate)
2043 emit_constant_insn (cond,
2044 gen_rtx_SET (VOIDmode, target, source));
2045 return 1;
2048 /* We don't know how to handle other cases yet. */
2049 gcc_assert (remainder == 0xffffffff);
2051 if (generate)
2052 emit_constant_insn (cond,
2053 gen_rtx_SET (VOIDmode, target,
2054 gen_rtx_NOT (mode, source)));
2055 return 1;
2057 case MINUS:
2058 /* We treat MINUS as (val - source), since (source - val) is always
2059 passed as (source + (-val)). */
2060 if (remainder == 0)
2062 if (generate)
2063 emit_constant_insn (cond,
2064 gen_rtx_SET (VOIDmode, target,
2065 gen_rtx_NEG (mode, source)));
2066 return 1;
2068 if (const_ok_for_arm (val))
2070 if (generate)
2071 emit_constant_insn (cond,
2072 gen_rtx_SET (VOIDmode, target,
2073 gen_rtx_MINUS (mode, GEN_INT (val),
2074 source)));
2075 return 1;
2077 can_negate = 1;
2079 break;
2081 default:
2082 gcc_unreachable ();
2085 /* If we can do it in one insn get out quickly. */
2086 if (const_ok_for_arm (val)
2087 || (can_negate_initial && const_ok_for_arm (-val))
2088 || (can_invert && const_ok_for_arm (~val)))
2090 if (generate)
2091 emit_constant_insn (cond,
2092 gen_rtx_SET (VOIDmode, target,
2093 (source
2094 ? gen_rtx_fmt_ee (code, mode, source,
2095 GEN_INT (val))
2096 : GEN_INT (val))));
2097 return 1;
2100 /* Calculate a few attributes that may be useful for specific
2101 optimizations. */
2102 for (i = 31; i >= 0; i--)
2104 if ((remainder & (1 << i)) == 0)
2105 clear_sign_bit_copies++;
2106 else
2107 break;
2110 for (i = 31; i >= 0; i--)
2112 if ((remainder & (1 << i)) != 0)
2113 set_sign_bit_copies++;
2114 else
2115 break;
2118 for (i = 0; i <= 31; i++)
2120 if ((remainder & (1 << i)) == 0)
2121 clear_zero_bit_copies++;
2122 else
2123 break;
2126 for (i = 0; i <= 31; i++)
2128 if ((remainder & (1 << i)) != 0)
2129 set_zero_bit_copies++;
2130 else
2131 break;
2134 switch (code)
2136 case SET:
2137 /* See if we can use movw. */
2138 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2140 if (generate)
2141 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2142 GEN_INT (val)));
2143 return 1;
2146 /* See if we can do this by sign_extending a constant that is known
2147 to be negative. This is a good, way of doing it, since the shift
2148 may well merge into a subsequent insn. */
2149 if (set_sign_bit_copies > 1)
2151 if (const_ok_for_arm
2152 (temp1 = ARM_SIGN_EXTEND (remainder
2153 << (set_sign_bit_copies - 1))))
2155 if (generate)
2157 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2158 emit_constant_insn (cond,
2159 gen_rtx_SET (VOIDmode, new_src,
2160 GEN_INT (temp1)));
2161 emit_constant_insn (cond,
2162 gen_ashrsi3 (target, new_src,
2163 GEN_INT (set_sign_bit_copies - 1)));
2165 return 2;
2167 /* For an inverted constant, we will need to set the low bits,
2168 these will be shifted out of harm's way. */
2169 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2170 if (const_ok_for_arm (~temp1))
2172 if (generate)
2174 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2175 emit_constant_insn (cond,
2176 gen_rtx_SET (VOIDmode, new_src,
2177 GEN_INT (temp1)));
2178 emit_constant_insn (cond,
2179 gen_ashrsi3 (target, new_src,
2180 GEN_INT (set_sign_bit_copies - 1)));
2182 return 2;
2186 /* See if we can calculate the value as the difference between two
2187 valid immediates. */
2188 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2190 int topshift = clear_sign_bit_copies & ~1;
2192 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2193 & (0xff000000 >> topshift));
2195 /* If temp1 is zero, then that means the 9 most significant
2196 bits of remainder were 1 and we've caused it to overflow.
2197 When topshift is 0 we don't need to do anything since we
2198 can borrow from 'bit 32'. */
2199 if (temp1 == 0 && topshift != 0)
2200 temp1 = 0x80000000 >> (topshift - 1);
2202 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2204 if (const_ok_for_arm (temp2))
2206 if (generate)
2208 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2209 emit_constant_insn (cond,
2210 gen_rtx_SET (VOIDmode, new_src,
2211 GEN_INT (temp1)));
2212 emit_constant_insn (cond,
2213 gen_addsi3 (target, new_src,
2214 GEN_INT (-temp2)));
2217 return 2;
2221 /* See if we can generate this by setting the bottom (or the top)
2222 16 bits, and then shifting these into the other half of the
2223 word. We only look for the simplest cases, to do more would cost
2224 too much. Be careful, however, not to generate this when the
2225 alternative would take fewer insns. */
2226 if (val & 0xffff0000)
2228 temp1 = remainder & 0xffff0000;
2229 temp2 = remainder & 0x0000ffff;
2231 /* Overlaps outside this range are best done using other methods. */
2232 for (i = 9; i < 24; i++)
2234 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2235 && !const_ok_for_arm (temp2))
2237 rtx new_src = (subtargets
2238 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2239 : target);
2240 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2241 source, subtargets, generate);
2242 source = new_src;
2243 if (generate)
2244 emit_constant_insn
2245 (cond,
2246 gen_rtx_SET
2247 (VOIDmode, target,
2248 gen_rtx_IOR (mode,
2249 gen_rtx_ASHIFT (mode, source,
2250 GEN_INT (i)),
2251 source)));
2252 return insns + 1;
2256 /* Don't duplicate cases already considered. */
2257 for (i = 17; i < 24; i++)
2259 if (((temp1 | (temp1 >> i)) == remainder)
2260 && !const_ok_for_arm (temp1))
2262 rtx new_src = (subtargets
2263 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2264 : target);
2265 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2266 source, subtargets, generate);
2267 source = new_src;
2268 if (generate)
2269 emit_constant_insn
2270 (cond,
2271 gen_rtx_SET (VOIDmode, target,
2272 gen_rtx_IOR
2273 (mode,
2274 gen_rtx_LSHIFTRT (mode, source,
2275 GEN_INT (i)),
2276 source)));
2277 return insns + 1;
2281 break;
2283 case IOR:
2284 case XOR:
2285 /* If we have IOR or XOR, and the constant can be loaded in a
2286 single instruction, and we can find a temporary to put it in,
2287 then this can be done in two instructions instead of 3-4. */
2288 if (subtargets
2289 /* TARGET can't be NULL if SUBTARGETS is 0 */
2290 || (reload_completed && !reg_mentioned_p (target, source)))
2292 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2294 if (generate)
2296 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2298 emit_constant_insn (cond,
2299 gen_rtx_SET (VOIDmode, sub,
2300 GEN_INT (val)));
2301 emit_constant_insn (cond,
2302 gen_rtx_SET (VOIDmode, target,
2303 gen_rtx_fmt_ee (code, mode,
2304 source, sub)));
2306 return 2;
2310 if (code == XOR)
2311 break;
2313 if (set_sign_bit_copies > 8
2314 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2316 if (generate)
2318 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2319 rtx shift = GEN_INT (set_sign_bit_copies);
2321 emit_constant_insn
2322 (cond,
2323 gen_rtx_SET (VOIDmode, sub,
2324 gen_rtx_NOT (mode,
2325 gen_rtx_ASHIFT (mode,
2326 source,
2327 shift))));
2328 emit_constant_insn
2329 (cond,
2330 gen_rtx_SET (VOIDmode, target,
2331 gen_rtx_NOT (mode,
2332 gen_rtx_LSHIFTRT (mode, sub,
2333 shift))));
2335 return 2;
2338 if (set_zero_bit_copies > 8
2339 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2341 if (generate)
2343 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2344 rtx shift = GEN_INT (set_zero_bit_copies);
2346 emit_constant_insn
2347 (cond,
2348 gen_rtx_SET (VOIDmode, sub,
2349 gen_rtx_NOT (mode,
2350 gen_rtx_LSHIFTRT (mode,
2351 source,
2352 shift))));
2353 emit_constant_insn
2354 (cond,
2355 gen_rtx_SET (VOIDmode, target,
2356 gen_rtx_NOT (mode,
2357 gen_rtx_ASHIFT (mode, sub,
2358 shift))));
2360 return 2;
2363 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2365 if (generate)
2367 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2368 emit_constant_insn (cond,
2369 gen_rtx_SET (VOIDmode, sub,
2370 gen_rtx_NOT (mode, source)));
2371 source = sub;
2372 if (subtargets)
2373 sub = gen_reg_rtx (mode);
2374 emit_constant_insn (cond,
2375 gen_rtx_SET (VOIDmode, sub,
2376 gen_rtx_AND (mode, source,
2377 GEN_INT (temp1))));
2378 emit_constant_insn (cond,
2379 gen_rtx_SET (VOIDmode, target,
2380 gen_rtx_NOT (mode, sub)));
2382 return 3;
2384 break;
2386 case AND:
2387 /* See if two shifts will do 2 or more insn's worth of work. */
2388 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2390 HOST_WIDE_INT shift_mask = ((0xffffffff
2391 << (32 - clear_sign_bit_copies))
2392 & 0xffffffff);
2394 if ((remainder | shift_mask) != 0xffffffff)
2396 if (generate)
2398 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2399 insns = arm_gen_constant (AND, mode, cond,
2400 remainder | shift_mask,
2401 new_src, source, subtargets, 1);
2402 source = new_src;
2404 else
2406 rtx targ = subtargets ? NULL_RTX : target;
2407 insns = arm_gen_constant (AND, mode, cond,
2408 remainder | shift_mask,
2409 targ, source, subtargets, 0);
2413 if (generate)
2415 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2416 rtx shift = GEN_INT (clear_sign_bit_copies);
2418 emit_insn (gen_ashlsi3 (new_src, source, shift));
2419 emit_insn (gen_lshrsi3 (target, new_src, shift));
2422 return insns + 2;
2425 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2427 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2429 if ((remainder | shift_mask) != 0xffffffff)
2431 if (generate)
2433 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2435 insns = arm_gen_constant (AND, mode, cond,
2436 remainder | shift_mask,
2437 new_src, source, subtargets, 1);
2438 source = new_src;
2440 else
2442 rtx targ = subtargets ? NULL_RTX : target;
2444 insns = arm_gen_constant (AND, mode, cond,
2445 remainder | shift_mask,
2446 targ, source, subtargets, 0);
2450 if (generate)
2452 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2453 rtx shift = GEN_INT (clear_zero_bit_copies);
2455 emit_insn (gen_lshrsi3 (new_src, source, shift));
2456 emit_insn (gen_ashlsi3 (target, new_src, shift));
2459 return insns + 2;
2462 break;
2464 default:
2465 break;
2468 for (i = 0; i < 32; i++)
2469 if (remainder & (1 << i))
2470 num_bits_set++;
2472 if (code == AND || (can_invert && num_bits_set > 16))
2473 remainder = (~remainder) & 0xffffffff;
2474 else if (code == PLUS && num_bits_set > 16)
2475 remainder = (-remainder) & 0xffffffff;
2476 else
2478 can_invert = 0;
2479 can_negate = 0;
2482 /* Now try and find a way of doing the job in either two or three
2483 instructions.
2484 We start by looking for the largest block of zeros that are aligned on
2485 a 2-bit boundary, we then fill up the temps, wrapping around to the
2486 top of the word when we drop off the bottom.
2487 In the worst case this code should produce no more than four insns.
2488 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2489 best place to start. */
2491 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2492 the same. */
2494 int best_start = 0;
2495 if (!TARGET_THUMB2)
2497 int best_consecutive_zeros = 0;
2499 for (i = 0; i < 32; i += 2)
2501 int consecutive_zeros = 0;
2503 if (!(remainder & (3 << i)))
2505 while ((i < 32) && !(remainder & (3 << i)))
2507 consecutive_zeros += 2;
2508 i += 2;
2510 if (consecutive_zeros > best_consecutive_zeros)
2512 best_consecutive_zeros = consecutive_zeros;
2513 best_start = i - consecutive_zeros;
2515 i -= 2;
2519 /* So long as it won't require any more insns to do so, it's
2520 desirable to emit a small constant (in bits 0...9) in the last
2521 insn. This way there is more chance that it can be combined with
2522 a later addressing insn to form a pre-indexed load or store
2523 operation. Consider:
2525 *((volatile int *)0xe0000100) = 1;
2526 *((volatile int *)0xe0000110) = 2;
2528 We want this to wind up as:
2530 mov rA, #0xe0000000
2531 mov rB, #1
2532 str rB, [rA, #0x100]
2533 mov rB, #2
2534 str rB, [rA, #0x110]
2536 rather than having to synthesize both large constants from scratch.
2538 Therefore, we calculate how many insns would be required to emit
2539 the constant starting from `best_start', and also starting from
2540 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2541 yield a shorter sequence, we may as well use zero. */
2542 if (best_start != 0
2543 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2544 && (count_insns_for_constant (remainder, 0) <=
2545 count_insns_for_constant (remainder, best_start)))
2546 best_start = 0;
2549 /* Now start emitting the insns. */
2550 i = best_start;
2553 int end;
2555 if (i <= 0)
2556 i += 32;
2557 if (remainder & (3 << (i - 2)))
2559 end = i - 8;
2560 if (end < 0)
2561 end += 32;
2562 temp1 = remainder & ((0x0ff << end)
2563 | ((i < end) ? (0xff >> (32 - end)) : 0));
2564 remainder &= ~temp1;
2566 if (generate)
2568 rtx new_src, temp1_rtx;
2570 if (code == SET || code == MINUS)
2572 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2573 if (can_invert && code != MINUS)
2574 temp1 = ~temp1;
2576 else
2578 if (remainder && subtargets)
2579 new_src = gen_reg_rtx (mode);
2580 else
2581 new_src = target;
2582 if (can_invert)
2583 temp1 = ~temp1;
2584 else if (can_negate)
2585 temp1 = -temp1;
2588 temp1 = trunc_int_for_mode (temp1, mode);
2589 temp1_rtx = GEN_INT (temp1);
2591 if (code == SET)
2593 else if (code == MINUS)
2594 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2595 else
2596 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2598 emit_constant_insn (cond,
2599 gen_rtx_SET (VOIDmode, new_src,
2600 temp1_rtx));
2601 source = new_src;
2604 if (code == SET)
2606 can_invert = 0;
2607 code = PLUS;
2609 else if (code == MINUS)
2610 code = PLUS;
2612 insns++;
2613 if (TARGET_ARM)
2614 i -= 6;
2615 else
2616 i -= 7;
2618 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2619 shifts. */
2620 if (TARGET_ARM)
2621 i -= 2;
2622 else
2623 i--;
2625 while (remainder);
2628 return insns;
2631 /* Canonicalize a comparison so that we are more likely to recognize it.
2632 This can be done for a few constant compares, where we can make the
2633 immediate value easier to load. */
2635 enum rtx_code
2636 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2637 rtx * op1)
2639 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2640 unsigned HOST_WIDE_INT maxval;
2641 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2643 switch (code)
2645 case EQ:
2646 case NE:
2647 return code;
2649 case GT:
2650 case LE:
2651 if (i != maxval
2652 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2654 *op1 = GEN_INT (i + 1);
2655 return code == GT ? GE : LT;
2657 break;
2659 case GE:
2660 case LT:
2661 if (i != ~maxval
2662 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2664 *op1 = GEN_INT (i - 1);
2665 return code == GE ? GT : LE;
2667 break;
2669 case GTU:
2670 case LEU:
2671 if (i != ~((unsigned HOST_WIDE_INT) 0)
2672 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2674 *op1 = GEN_INT (i + 1);
2675 return code == GTU ? GEU : LTU;
2677 break;
2679 case GEU:
2680 case LTU:
2681 if (i != 0
2682 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2684 *op1 = GEN_INT (i - 1);
2685 return code == GEU ? GTU : LEU;
2687 break;
2689 default:
2690 gcc_unreachable ();
2693 return code;
2697 /* Define how to find the value returned by a function. */
2700 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
2702 enum machine_mode mode;
2703 int unsignedp ATTRIBUTE_UNUSED;
2704 rtx r ATTRIBUTE_UNUSED;
2706 mode = TYPE_MODE (type);
2707 /* Promote integer types. */
2708 if (INTEGRAL_TYPE_P (type))
2709 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2711 /* Promotes small structs returned in a register to full-word size
2712 for big-endian AAPCS. */
2713 if (arm_return_in_msb (type))
2715 HOST_WIDE_INT size = int_size_in_bytes (type);
2716 if (size % UNITS_PER_WORD != 0)
2718 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2719 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2723 return LIBCALL_VALUE(mode);
2726 /* Determine the amount of memory needed to store the possible return
2727 registers of an untyped call. */
2729 arm_apply_result_size (void)
2731 int size = 16;
2733 if (TARGET_ARM)
2735 if (TARGET_HARD_FLOAT_ABI)
2737 if (TARGET_FPA)
2738 size += 12;
2739 if (TARGET_MAVERICK)
2740 size += 8;
2742 if (TARGET_IWMMXT_ABI)
2743 size += 8;
2746 return size;
2749 /* Decide whether a type should be returned in memory (true)
2750 or in a register (false). This is called by the macro
2751 TARGET_RETURN_IN_MEMORY. */
2752 bool
2753 arm_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2755 HOST_WIDE_INT size;
2757 size = int_size_in_bytes (type);
2759 /* Vector values should be returned using ARM registers, not memory (unless
2760 they're over 16 bytes, which will break since we only have four
2761 call-clobbered registers to play with). */
2762 if (TREE_CODE (type) == VECTOR_TYPE)
2763 return (size < 0 || size > (4 * UNITS_PER_WORD));
2765 if (!AGGREGATE_TYPE_P (type) &&
2766 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2767 /* All simple types are returned in registers.
2768 For AAPCS, complex types are treated the same as aggregates. */
2769 return 0;
2771 if (arm_abi != ARM_ABI_APCS)
2773 /* ATPCS and later return aggregate types in memory only if they are
2774 larger than a word (or are variable size). */
2775 return (size < 0 || size > UNITS_PER_WORD);
2778 /* For the arm-wince targets we choose to be compatible with Microsoft's
2779 ARM and Thumb compilers, which always return aggregates in memory. */
2780 #ifndef ARM_WINCE
2781 /* All structures/unions bigger than one word are returned in memory.
2782 Also catch the case where int_size_in_bytes returns -1. In this case
2783 the aggregate is either huge or of variable size, and in either case
2784 we will want to return it via memory and not in a register. */
2785 if (size < 0 || size > UNITS_PER_WORD)
2786 return 1;
2788 if (TREE_CODE (type) == RECORD_TYPE)
2790 tree field;
2792 /* For a struct the APCS says that we only return in a register
2793 if the type is 'integer like' and every addressable element
2794 has an offset of zero. For practical purposes this means
2795 that the structure can have at most one non bit-field element
2796 and that this element must be the first one in the structure. */
2798 /* Find the first field, ignoring non FIELD_DECL things which will
2799 have been created by C++. */
2800 for (field = TYPE_FIELDS (type);
2801 field && TREE_CODE (field) != FIELD_DECL;
2802 field = TREE_CHAIN (field))
2803 continue;
2805 if (field == NULL)
2806 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2808 /* Check that the first field is valid for returning in a register. */
2810 /* ... Floats are not allowed */
2811 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2812 return 1;
2814 /* ... Aggregates that are not themselves valid for returning in
2815 a register are not allowed. */
2816 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
2817 return 1;
2819 /* Now check the remaining fields, if any. Only bitfields are allowed,
2820 since they are not addressable. */
2821 for (field = TREE_CHAIN (field);
2822 field;
2823 field = TREE_CHAIN (field))
2825 if (TREE_CODE (field) != FIELD_DECL)
2826 continue;
2828 if (!DECL_BIT_FIELD_TYPE (field))
2829 return 1;
2832 return 0;
2835 if (TREE_CODE (type) == UNION_TYPE)
2837 tree field;
2839 /* Unions can be returned in registers if every element is
2840 integral, or can be returned in an integer register. */
2841 for (field = TYPE_FIELDS (type);
2842 field;
2843 field = TREE_CHAIN (field))
2845 if (TREE_CODE (field) != FIELD_DECL)
2846 continue;
2848 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2849 return 1;
2851 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
2852 return 1;
2855 return 0;
2857 #endif /* not ARM_WINCE */
2859 /* Return all other types in memory. */
2860 return 1;
2863 /* Indicate whether or not words of a double are in big-endian order. */
2866 arm_float_words_big_endian (void)
2868 if (TARGET_MAVERICK)
2869 return 0;
2871 /* For FPA, float words are always big-endian. For VFP, floats words
2872 follow the memory system mode. */
2874 if (TARGET_FPA)
2876 return 1;
2879 if (TARGET_VFP)
2880 return (TARGET_BIG_END ? 1 : 0);
2882 return 1;
2885 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2886 for a call to a function whose data type is FNTYPE.
2887 For a library call, FNTYPE is NULL. */
2888 void
2889 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2890 rtx libname ATTRIBUTE_UNUSED,
2891 tree fndecl ATTRIBUTE_UNUSED)
2893 /* On the ARM, the offset starts at 0. */
2894 pcum->nregs = 0;
2895 pcum->iwmmxt_nregs = 0;
2896 pcum->can_split = true;
2898 /* Varargs vectors are treated the same as long long.
2899 named_count avoids having to change the way arm handles 'named' */
2900 pcum->named_count = 0;
2901 pcum->nargs = 0;
2903 if (TARGET_REALLY_IWMMXT && fntype)
2905 tree fn_arg;
2907 for (fn_arg = TYPE_ARG_TYPES (fntype);
2908 fn_arg;
2909 fn_arg = TREE_CHAIN (fn_arg))
2910 pcum->named_count += 1;
2912 if (! pcum->named_count)
2913 pcum->named_count = INT_MAX;
2918 /* Return true if mode/type need doubleword alignment. */
2919 bool
2920 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2922 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2923 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2927 /* Determine where to put an argument to a function.
2928 Value is zero to push the argument on the stack,
2929 or a hard register in which to store the argument.
2931 MODE is the argument's machine mode.
2932 TYPE is the data type of the argument (as a tree).
2933 This is null for libcalls where that information may
2934 not be available.
2935 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2936 the preceding args and about the function being called.
2937 NAMED is nonzero if this argument is a named parameter
2938 (otherwise it is an extra parameter matching an ellipsis). */
2941 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2942 tree type, int named)
2944 int nregs;
2946 /* Varargs vectors are treated the same as long long.
2947 named_count avoids having to change the way arm handles 'named' */
2948 if (TARGET_IWMMXT_ABI
2949 && arm_vector_mode_supported_p (mode)
2950 && pcum->named_count > pcum->nargs + 1)
2952 if (pcum->iwmmxt_nregs <= 9)
2953 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2954 else
2956 pcum->can_split = false;
2957 return NULL_RTX;
2961 /* Put doubleword aligned quantities in even register pairs. */
2962 if (pcum->nregs & 1
2963 && ARM_DOUBLEWORD_ALIGN
2964 && arm_needs_doubleword_align (mode, type))
2965 pcum->nregs++;
2967 if (mode == VOIDmode)
2968 /* Pick an arbitrary value for operand 2 of the call insn. */
2969 return const0_rtx;
2971 /* Only allow splitting an arg between regs and memory if all preceding
2972 args were allocated to regs. For args passed by reference we only count
2973 the reference pointer. */
2974 if (pcum->can_split)
2975 nregs = 1;
2976 else
2977 nregs = ARM_NUM_REGS2 (mode, type);
2979 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2980 return NULL_RTX;
2982 return gen_rtx_REG (mode, pcum->nregs);
2985 static int
2986 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2987 tree type, bool named ATTRIBUTE_UNUSED)
2989 int nregs = pcum->nregs;
2991 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
2992 return 0;
2994 if (NUM_ARG_REGS > nregs
2995 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2996 && pcum->can_split)
2997 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2999 return 0;
3002 /* Variable sized types are passed by reference. This is a GCC
3003 extension to the ARM ABI. */
3005 static bool
3006 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3007 enum machine_mode mode ATTRIBUTE_UNUSED,
3008 const_tree type, bool named ATTRIBUTE_UNUSED)
3010 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3013 /* Encode the current state of the #pragma [no_]long_calls. */
3014 typedef enum
3016 OFF, /* No #pragma [no_]long_calls is in effect. */
3017 LONG, /* #pragma long_calls is in effect. */
3018 SHORT /* #pragma no_long_calls is in effect. */
3019 } arm_pragma_enum;
3021 static arm_pragma_enum arm_pragma_long_calls = OFF;
3023 void
3024 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3026 arm_pragma_long_calls = LONG;
3029 void
3030 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3032 arm_pragma_long_calls = SHORT;
3035 void
3036 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3038 arm_pragma_long_calls = OFF;
3041 /* Table of machine attributes. */
3042 const struct attribute_spec arm_attribute_table[] =
3044 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3045 /* Function calls made to this symbol must be done indirectly, because
3046 it may lie outside of the 26 bit addressing range of a normal function
3047 call. */
3048 { "long_call", 0, 0, false, true, true, NULL },
3049 /* Whereas these functions are always known to reside within the 26 bit
3050 addressing range. */
3051 { "short_call", 0, 0, false, true, true, NULL },
3052 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3053 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3054 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3055 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3056 #ifdef ARM_PE
3057 /* ARM/PE has three new attributes:
3058 interfacearm - ?
3059 dllexport - for exporting a function/variable that will live in a dll
3060 dllimport - for importing a function/variable from a dll
3062 Microsoft allows multiple declspecs in one __declspec, separating
3063 them with spaces. We do NOT support this. Instead, use __declspec
3064 multiple times.
3066 { "dllimport", 0, 0, true, false, false, NULL },
3067 { "dllexport", 0, 0, true, false, false, NULL },
3068 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3069 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3070 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3071 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3072 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3073 #endif
3074 { NULL, 0, 0, false, false, false, NULL }
3077 /* Handle an attribute requiring a FUNCTION_DECL;
3078 arguments as in struct attribute_spec.handler. */
3079 static tree
3080 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3081 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3083 if (TREE_CODE (*node) != FUNCTION_DECL)
3085 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3086 IDENTIFIER_POINTER (name));
3087 *no_add_attrs = true;
3090 return NULL_TREE;
3093 /* Handle an "interrupt" or "isr" attribute;
3094 arguments as in struct attribute_spec.handler. */
3095 static tree
3096 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3097 bool *no_add_attrs)
3099 if (DECL_P (*node))
3101 if (TREE_CODE (*node) != FUNCTION_DECL)
3103 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3104 IDENTIFIER_POINTER (name));
3105 *no_add_attrs = true;
3107 /* FIXME: the argument if any is checked for type attributes;
3108 should it be checked for decl ones? */
3110 else
3112 if (TREE_CODE (*node) == FUNCTION_TYPE
3113 || TREE_CODE (*node) == METHOD_TYPE)
3115 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3117 warning (OPT_Wattributes, "%qs attribute ignored",
3118 IDENTIFIER_POINTER (name));
3119 *no_add_attrs = true;
3122 else if (TREE_CODE (*node) == POINTER_TYPE
3123 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3124 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3125 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3127 *node = build_variant_type_copy (*node);
3128 TREE_TYPE (*node) = build_type_attribute_variant
3129 (TREE_TYPE (*node),
3130 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3131 *no_add_attrs = true;
3133 else
3135 /* Possibly pass this attribute on from the type to a decl. */
3136 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3137 | (int) ATTR_FLAG_FUNCTION_NEXT
3138 | (int) ATTR_FLAG_ARRAY_NEXT))
3140 *no_add_attrs = true;
3141 return tree_cons (name, args, NULL_TREE);
3143 else
3145 warning (OPT_Wattributes, "%qs attribute ignored",
3146 IDENTIFIER_POINTER (name));
3151 return NULL_TREE;
3154 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3155 /* Handle the "notshared" attribute. This attribute is another way of
3156 requesting hidden visibility. ARM's compiler supports
3157 "__declspec(notshared)"; we support the same thing via an
3158 attribute. */
3160 static tree
3161 arm_handle_notshared_attribute (tree *node,
3162 tree name ATTRIBUTE_UNUSED,
3163 tree args ATTRIBUTE_UNUSED,
3164 int flags ATTRIBUTE_UNUSED,
3165 bool *no_add_attrs)
3167 tree decl = TYPE_NAME (*node);
3169 if (decl)
3171 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3172 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3173 *no_add_attrs = false;
3175 return NULL_TREE;
3177 #endif
3179 /* Return 0 if the attributes for two types are incompatible, 1 if they
3180 are compatible, and 2 if they are nearly compatible (which causes a
3181 warning to be generated). */
3182 static int
3183 arm_comp_type_attributes (const_tree type1, const_tree type2)
3185 int l1, l2, s1, s2;
3187 /* Check for mismatch of non-default calling convention. */
3188 if (TREE_CODE (type1) != FUNCTION_TYPE)
3189 return 1;
3191 /* Check for mismatched call attributes. */
3192 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3193 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3194 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3195 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3197 /* Only bother to check if an attribute is defined. */
3198 if (l1 | l2 | s1 | s2)
3200 /* If one type has an attribute, the other must have the same attribute. */
3201 if ((l1 != l2) || (s1 != s2))
3202 return 0;
3204 /* Disallow mixed attributes. */
3205 if ((l1 & s2) || (l2 & s1))
3206 return 0;
3209 /* Check for mismatched ISR attribute. */
3210 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3211 if (! l1)
3212 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3213 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3214 if (! l2)
3215 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3216 if (l1 != l2)
3217 return 0;
3219 return 1;
3222 /* Assigns default attributes to newly defined type. This is used to
3223 set short_call/long_call attributes for function types of
3224 functions defined inside corresponding #pragma scopes. */
3225 static void
3226 arm_set_default_type_attributes (tree type)
3228 /* Add __attribute__ ((long_call)) to all functions, when
3229 inside #pragma long_calls or __attribute__ ((short_call)),
3230 when inside #pragma no_long_calls. */
3231 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3233 tree type_attr_list, attr_name;
3234 type_attr_list = TYPE_ATTRIBUTES (type);
3236 if (arm_pragma_long_calls == LONG)
3237 attr_name = get_identifier ("long_call");
3238 else if (arm_pragma_long_calls == SHORT)
3239 attr_name = get_identifier ("short_call");
3240 else
3241 return;
3243 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3244 TYPE_ATTRIBUTES (type) = type_attr_list;
3248 /* Return true if DECL is known to be linked into section SECTION. */
3250 static bool
3251 arm_function_in_section_p (tree decl, section *section)
3253 /* We can only be certain about functions defined in the same
3254 compilation unit. */
3255 if (!TREE_STATIC (decl))
3256 return false;
3258 /* Make sure that SYMBOL always binds to the definition in this
3259 compilation unit. */
3260 if (!targetm.binds_local_p (decl))
3261 return false;
3263 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3264 if (!DECL_SECTION_NAME (decl))
3266 /* Only cater for unit-at-a-time mode, where we know that the user
3267 cannot later specify a section for DECL. */
3268 if (!flag_unit_at_a_time)
3269 return false;
3271 /* Make sure that we will not create a unique section for DECL. */
3272 if (flag_function_sections || DECL_ONE_ONLY (decl))
3273 return false;
3276 return function_section (decl) == section;
3279 /* Return nonzero if a 32-bit "long_call" should be generated for
3280 a call from the current function to DECL. We generate a long_call
3281 if the function:
3283 a. has an __attribute__((long call))
3284 or b. is within the scope of a #pragma long_calls
3285 or c. the -mlong-calls command line switch has been specified
3287 However we do not generate a long call if the function:
3289 d. has an __attribute__ ((short_call))
3290 or e. is inside the scope of a #pragma no_long_calls
3291 or f. is defined in the same section as the current function. */
3293 bool
3294 arm_is_long_call_p (tree decl)
3296 tree attrs;
3298 if (!decl)
3299 return TARGET_LONG_CALLS;
3301 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3302 if (lookup_attribute ("short_call", attrs))
3303 return false;
3305 /* For "f", be conservative, and only cater for cases in which the
3306 whole of the current function is placed in the same section. */
3307 if (!flag_reorder_blocks_and_partition
3308 && arm_function_in_section_p (decl, current_function_section ()))
3309 return false;
3311 if (lookup_attribute ("long_call", attrs))
3312 return true;
3314 return TARGET_LONG_CALLS;
3317 /* Return nonzero if it is ok to make a tail-call to DECL. */
3318 static bool
3319 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3321 unsigned long func_type;
3323 if (cfun->machine->sibcall_blocked)
3324 return false;
3326 /* Never tailcall something for which we have no decl, or if we
3327 are in Thumb mode. */
3328 if (decl == NULL || TARGET_THUMB)
3329 return false;
3331 /* The PIC register is live on entry to VxWorks PLT entries, so we
3332 must make the call before restoring the PIC register. */
3333 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3334 return false;
3336 /* Cannot tail-call to long calls, since these are out of range of
3337 a branch instruction. */
3338 if (arm_is_long_call_p (decl))
3339 return false;
3341 /* If we are interworking and the function is not declared static
3342 then we can't tail-call it unless we know that it exists in this
3343 compilation unit (since it might be a Thumb routine). */
3344 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3345 return false;
3347 func_type = arm_current_func_type ();
3348 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3349 if (IS_INTERRUPT (func_type))
3350 return false;
3352 /* Never tailcall if function may be called with a misaligned SP. */
3353 if (IS_STACKALIGN (func_type))
3354 return false;
3356 /* Everything else is ok. */
3357 return true;
3361 /* Addressing mode support functions. */
3363 /* Return nonzero if X is a legitimate immediate operand when compiling
3364 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3366 legitimate_pic_operand_p (rtx x)
3368 if (GET_CODE (x) == SYMBOL_REF
3369 || (GET_CODE (x) == CONST
3370 && GET_CODE (XEXP (x, 0)) == PLUS
3371 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3372 return 0;
3374 return 1;
3377 /* Record that the current function needs a PIC register. Initialize
3378 cfun->machine->pic_reg if we have not already done so. */
3380 static void
3381 require_pic_register (void)
3383 /* A lot of the logic here is made obscure by the fact that this
3384 routine gets called as part of the rtx cost estimation process.
3385 We don't want those calls to affect any assumptions about the real
3386 function; and further, we can't call entry_of_function() until we
3387 start the real expansion process. */
3388 if (!crtl->uses_pic_offset_table)
3390 gcc_assert (can_create_pseudo_p ());
3391 if (arm_pic_register != INVALID_REGNUM)
3393 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3395 /* Play games to avoid marking the function as needing pic
3396 if we are being called as part of the cost-estimation
3397 process. */
3398 if (current_ir_type () != IR_GIMPLE)
3399 crtl->uses_pic_offset_table = 1;
3401 else
3403 rtx seq;
3405 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3407 /* Play games to avoid marking the function as needing pic
3408 if we are being called as part of the cost-estimation
3409 process. */
3410 if (current_ir_type () != IR_GIMPLE)
3412 crtl->uses_pic_offset_table = 1;
3413 start_sequence ();
3415 arm_load_pic_register (0UL);
3417 seq = get_insns ();
3418 end_sequence ();
3419 emit_insn_after (seq, entry_of_function ());
3426 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3428 if (GET_CODE (orig) == SYMBOL_REF
3429 || GET_CODE (orig) == LABEL_REF)
3431 rtx pic_ref, address;
3432 rtx insn;
3433 int subregs = 0;
3435 /* If this function doesn't have a pic register, create one now. */
3436 require_pic_register ();
3438 if (reg == 0)
3440 gcc_assert (can_create_pseudo_p ());
3441 reg = gen_reg_rtx (Pmode);
3443 subregs = 1;
3446 if (subregs)
3447 address = gen_reg_rtx (Pmode);
3448 else
3449 address = reg;
3451 if (TARGET_ARM)
3452 emit_insn (gen_pic_load_addr_arm (address, orig));
3453 else if (TARGET_THUMB2)
3454 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3455 else /* TARGET_THUMB1 */
3456 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3458 /* VxWorks does not impose a fixed gap between segments; the run-time
3459 gap can be different from the object-file gap. We therefore can't
3460 use GOTOFF unless we are absolutely sure that the symbol is in the
3461 same segment as the GOT. Unfortunately, the flexibility of linker
3462 scripts means that we can't be sure of that in general, so assume
3463 that GOTOFF is never valid on VxWorks. */
3464 if ((GET_CODE (orig) == LABEL_REF
3465 || (GET_CODE (orig) == SYMBOL_REF &&
3466 SYMBOL_REF_LOCAL_P (orig)))
3467 && NEED_GOT_RELOC
3468 && !TARGET_VXWORKS_RTP)
3469 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3470 else
3472 pic_ref = gen_const_mem (Pmode,
3473 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3474 address));
3477 insn = emit_move_insn (reg, pic_ref);
3479 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3480 by loop. */
3481 set_unique_reg_note (insn, REG_EQUAL, orig);
3483 return reg;
3485 else if (GET_CODE (orig) == CONST)
3487 rtx base, offset;
3489 if (GET_CODE (XEXP (orig, 0)) == PLUS
3490 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3491 return orig;
3493 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3494 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3495 return orig;
3497 if (reg == 0)
3499 gcc_assert (can_create_pseudo_p ());
3500 reg = gen_reg_rtx (Pmode);
3503 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3505 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3506 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3507 base == reg ? 0 : reg);
3509 if (GET_CODE (offset) == CONST_INT)
3511 /* The base register doesn't really matter, we only want to
3512 test the index for the appropriate mode. */
3513 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3515 gcc_assert (can_create_pseudo_p ());
3516 offset = force_reg (Pmode, offset);
3519 if (GET_CODE (offset) == CONST_INT)
3520 return plus_constant (base, INTVAL (offset));
3523 if (GET_MODE_SIZE (mode) > 4
3524 && (GET_MODE_CLASS (mode) == MODE_INT
3525 || TARGET_SOFT_FLOAT))
3527 emit_insn (gen_addsi3 (reg, base, offset));
3528 return reg;
3531 return gen_rtx_PLUS (Pmode, base, offset);
3534 return orig;
3538 /* Find a spare register to use during the prolog of a function. */
3540 static int
3541 thumb_find_work_register (unsigned long pushed_regs_mask)
3543 int reg;
3545 /* Check the argument registers first as these are call-used. The
3546 register allocation order means that sometimes r3 might be used
3547 but earlier argument registers might not, so check them all. */
3548 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3549 if (!df_regs_ever_live_p (reg))
3550 return reg;
3552 /* Before going on to check the call-saved registers we can try a couple
3553 more ways of deducing that r3 is available. The first is when we are
3554 pushing anonymous arguments onto the stack and we have less than 4
3555 registers worth of fixed arguments(*). In this case r3 will be part of
3556 the variable argument list and so we can be sure that it will be
3557 pushed right at the start of the function. Hence it will be available
3558 for the rest of the prologue.
3559 (*): ie crtl->args.pretend_args_size is greater than 0. */
3560 if (cfun->machine->uses_anonymous_args
3561 && crtl->args.pretend_args_size > 0)
3562 return LAST_ARG_REGNUM;
3564 /* The other case is when we have fixed arguments but less than 4 registers
3565 worth. In this case r3 might be used in the body of the function, but
3566 it is not being used to convey an argument into the function. In theory
3567 we could just check crtl->args.size to see how many bytes are
3568 being passed in argument registers, but it seems that it is unreliable.
3569 Sometimes it will have the value 0 when in fact arguments are being
3570 passed. (See testcase execute/20021111-1.c for an example). So we also
3571 check the args_info.nregs field as well. The problem with this field is
3572 that it makes no allowances for arguments that are passed to the
3573 function but which are not used. Hence we could miss an opportunity
3574 when a function has an unused argument in r3. But it is better to be
3575 safe than to be sorry. */
3576 if (! cfun->machine->uses_anonymous_args
3577 && crtl->args.size >= 0
3578 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3579 && crtl->args.info.nregs < 4)
3580 return LAST_ARG_REGNUM;
3582 /* Otherwise look for a call-saved register that is going to be pushed. */
3583 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3584 if (pushed_regs_mask & (1 << reg))
3585 return reg;
3587 if (TARGET_THUMB2)
3589 /* Thumb-2 can use high regs. */
3590 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3591 if (pushed_regs_mask & (1 << reg))
3592 return reg;
3594 /* Something went wrong - thumb_compute_save_reg_mask()
3595 should have arranged for a suitable register to be pushed. */
3596 gcc_unreachable ();
3599 static GTY(()) int pic_labelno;
3601 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3602 low register. */
3604 void
3605 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3607 rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
3608 rtx global_offset_table;
3610 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3611 return;
3613 gcc_assert (flag_pic);
3615 pic_reg = cfun->machine->pic_reg;
3616 if (TARGET_VXWORKS_RTP)
3618 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3619 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3620 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3622 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3624 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3625 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3627 else
3629 /* We use an UNSPEC rather than a LABEL_REF because this label
3630 never appears in the code stream. */
3632 labelno = GEN_INT (pic_labelno++);
3633 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3634 l1 = gen_rtx_CONST (VOIDmode, l1);
3636 global_offset_table
3637 = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3638 /* On the ARM the PC register contains 'dot + 8' at the time of the
3639 addition, on the Thumb it is 'dot + 4'. */
3640 pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3641 if (GOT_PCREL)
3643 pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
3644 pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
3646 else
3647 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3649 pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
3650 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3652 if (TARGET_ARM)
3654 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3655 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3657 else if (TARGET_THUMB2)
3659 /* Thumb-2 only allows very limited access to the PC. Calculate the
3660 address in a temporary register. */
3661 if (arm_pic_register != INVALID_REGNUM)
3663 pic_tmp = gen_rtx_REG (SImode,
3664 thumb_find_work_register (saved_regs));
3666 else
3668 gcc_assert (can_create_pseudo_p ());
3669 pic_tmp = gen_reg_rtx (Pmode);
3672 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3673 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3674 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3676 else /* TARGET_THUMB1 */
3678 if (arm_pic_register != INVALID_REGNUM
3679 && REGNO (pic_reg) > LAST_LO_REGNUM)
3681 /* We will have pushed the pic register, so we should always be
3682 able to find a work register. */
3683 pic_tmp = gen_rtx_REG (SImode,
3684 thumb_find_work_register (saved_regs));
3685 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3686 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3688 else
3689 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3690 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3694 /* Need to emit this whether or not we obey regdecls,
3695 since setjmp/longjmp can cause life info to screw up. */
3696 emit_use (pic_reg);
3700 /* Return nonzero if X is valid as an ARM state addressing register. */
3701 static int
3702 arm_address_register_rtx_p (rtx x, int strict_p)
3704 int regno;
3706 if (GET_CODE (x) != REG)
3707 return 0;
3709 regno = REGNO (x);
3711 if (strict_p)
3712 return ARM_REGNO_OK_FOR_BASE_P (regno);
3714 return (regno <= LAST_ARM_REGNUM
3715 || regno >= FIRST_PSEUDO_REGISTER
3716 || regno == FRAME_POINTER_REGNUM
3717 || regno == ARG_POINTER_REGNUM);
3720 /* Return TRUE if this rtx is the difference of a symbol and a label,
3721 and will reduce to a PC-relative relocation in the object file.
3722 Expressions like this can be left alone when generating PIC, rather
3723 than forced through the GOT. */
3724 static int
3725 pcrel_constant_p (rtx x)
3727 if (GET_CODE (x) == MINUS)
3728 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3730 return FALSE;
3733 /* Return nonzero if X is a valid ARM state address operand. */
3735 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3736 int strict_p)
3738 bool use_ldrd;
3739 enum rtx_code code = GET_CODE (x);
3741 if (arm_address_register_rtx_p (x, strict_p))
3742 return 1;
3744 use_ldrd = (TARGET_LDRD
3745 && (mode == DImode
3746 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3748 if (code == POST_INC || code == PRE_DEC
3749 || ((code == PRE_INC || code == POST_DEC)
3750 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3751 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3753 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3754 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3755 && GET_CODE (XEXP (x, 1)) == PLUS
3756 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3758 rtx addend = XEXP (XEXP (x, 1), 1);
3760 /* Don't allow ldrd post increment by register because it's hard
3761 to fixup invalid register choices. */
3762 if (use_ldrd
3763 && GET_CODE (x) == POST_MODIFY
3764 && GET_CODE (addend) == REG)
3765 return 0;
3767 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3768 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3771 /* After reload constants split into minipools will have addresses
3772 from a LABEL_REF. */
3773 else if (reload_completed
3774 && (code == LABEL_REF
3775 || (code == CONST
3776 && GET_CODE (XEXP (x, 0)) == PLUS
3777 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3778 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3779 return 1;
3781 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3782 return 0;
3784 else if (code == PLUS)
3786 rtx xop0 = XEXP (x, 0);
3787 rtx xop1 = XEXP (x, 1);
3789 return ((arm_address_register_rtx_p (xop0, strict_p)
3790 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3791 || (arm_address_register_rtx_p (xop1, strict_p)
3792 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3795 #if 0
3796 /* Reload currently can't handle MINUS, so disable this for now */
3797 else if (GET_CODE (x) == MINUS)
3799 rtx xop0 = XEXP (x, 0);
3800 rtx xop1 = XEXP (x, 1);
3802 return (arm_address_register_rtx_p (xop0, strict_p)
3803 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3805 #endif
3807 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3808 && code == SYMBOL_REF
3809 && CONSTANT_POOL_ADDRESS_P (x)
3810 && ! (flag_pic
3811 && symbol_mentioned_p (get_pool_constant (x))
3812 && ! pcrel_constant_p (get_pool_constant (x))))
3813 return 1;
3815 return 0;
3818 /* Return nonzero if X is a valid Thumb-2 address operand. */
3820 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3822 bool use_ldrd;
3823 enum rtx_code code = GET_CODE (x);
3825 if (arm_address_register_rtx_p (x, strict_p))
3826 return 1;
3828 use_ldrd = (TARGET_LDRD
3829 && (mode == DImode
3830 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3832 if (code == POST_INC || code == PRE_DEC
3833 || ((code == PRE_INC || code == POST_DEC)
3834 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3835 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3837 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3838 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3839 && GET_CODE (XEXP (x, 1)) == PLUS
3840 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3842 /* Thumb-2 only has autoincrement by constant. */
3843 rtx addend = XEXP (XEXP (x, 1), 1);
3844 HOST_WIDE_INT offset;
3846 if (GET_CODE (addend) != CONST_INT)
3847 return 0;
3849 offset = INTVAL(addend);
3850 if (GET_MODE_SIZE (mode) <= 4)
3851 return (offset > -256 && offset < 256);
3853 return (use_ldrd && offset > -1024 && offset < 1024
3854 && (offset & 3) == 0);
3857 /* After reload constants split into minipools will have addresses
3858 from a LABEL_REF. */
3859 else if (reload_completed
3860 && (code == LABEL_REF
3861 || (code == CONST
3862 && GET_CODE (XEXP (x, 0)) == PLUS
3863 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3864 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3865 return 1;
3867 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3868 return 0;
3870 else if (code == PLUS)
3872 rtx xop0 = XEXP (x, 0);
3873 rtx xop1 = XEXP (x, 1);
3875 return ((arm_address_register_rtx_p (xop0, strict_p)
3876 && thumb2_legitimate_index_p (mode, xop1, strict_p))
3877 || (arm_address_register_rtx_p (xop1, strict_p)
3878 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
3881 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3882 && code == SYMBOL_REF
3883 && CONSTANT_POOL_ADDRESS_P (x)
3884 && ! (flag_pic
3885 && symbol_mentioned_p (get_pool_constant (x))
3886 && ! pcrel_constant_p (get_pool_constant (x))))
3887 return 1;
3889 return 0;
3892 /* Return nonzero if INDEX is valid for an address index operand in
3893 ARM state. */
3894 static int
3895 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3896 int strict_p)
3898 HOST_WIDE_INT range;
3899 enum rtx_code code = GET_CODE (index);
3901 /* Standard coprocessor addressing modes. */
3902 if (TARGET_HARD_FLOAT
3903 && (TARGET_FPA || TARGET_MAVERICK)
3904 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3905 || (TARGET_MAVERICK && mode == DImode)))
3906 return (code == CONST_INT && INTVAL (index) < 1024
3907 && INTVAL (index) > -1024
3908 && (INTVAL (index) & 3) == 0);
3910 if (TARGET_NEON
3911 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
3912 return (code == CONST_INT
3913 && INTVAL (index) < 1016
3914 && INTVAL (index) > -1024
3915 && (INTVAL (index) & 3) == 0);
3917 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3918 return (code == CONST_INT
3919 && INTVAL (index) < 1024
3920 && INTVAL (index) > -1024
3921 && (INTVAL (index) & 3) == 0);
3923 if (arm_address_register_rtx_p (index, strict_p)
3924 && (GET_MODE_SIZE (mode) <= 4))
3925 return 1;
3927 if (mode == DImode || mode == DFmode)
3929 if (code == CONST_INT)
3931 HOST_WIDE_INT val = INTVAL (index);
3933 if (TARGET_LDRD)
3934 return val > -256 && val < 256;
3935 else
3936 return val > -4096 && val < 4092;
3939 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3942 if (GET_MODE_SIZE (mode) <= 4
3943 && ! (arm_arch4
3944 && (mode == HImode
3945 || (mode == QImode && outer == SIGN_EXTEND))))
3947 if (code == MULT)
3949 rtx xiop0 = XEXP (index, 0);
3950 rtx xiop1 = XEXP (index, 1);
3952 return ((arm_address_register_rtx_p (xiop0, strict_p)
3953 && power_of_two_operand (xiop1, SImode))
3954 || (arm_address_register_rtx_p (xiop1, strict_p)
3955 && power_of_two_operand (xiop0, SImode)));
3957 else if (code == LSHIFTRT || code == ASHIFTRT
3958 || code == ASHIFT || code == ROTATERT)
3960 rtx op = XEXP (index, 1);
3962 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3963 && GET_CODE (op) == CONST_INT
3964 && INTVAL (op) > 0
3965 && INTVAL (op) <= 31);
3969 /* For ARM v4 we may be doing a sign-extend operation during the
3970 load. */
3971 if (arm_arch4)
3973 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3974 range = 256;
3975 else
3976 range = 4096;
3978 else
3979 range = (mode == HImode) ? 4095 : 4096;
3981 return (code == CONST_INT
3982 && INTVAL (index) < range
3983 && INTVAL (index) > -range);
3986 /* Return true if OP is a valid index scaling factor for Thumb-2 address
3987 index operand. i.e. 1, 2, 4 or 8. */
3988 static bool
3989 thumb2_index_mul_operand (rtx op)
3991 HOST_WIDE_INT val;
3993 if (GET_CODE(op) != CONST_INT)
3994 return false;
3996 val = INTVAL(op);
3997 return (val == 1 || val == 2 || val == 4 || val == 8);
4000 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
4001 static int
4002 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
4004 enum rtx_code code = GET_CODE (index);
4006 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
4007 /* Standard coprocessor addressing modes. */
4008 if (TARGET_HARD_FLOAT
4009 && (TARGET_FPA || TARGET_MAVERICK)
4010 && (GET_MODE_CLASS (mode) == MODE_FLOAT
4011 || (TARGET_MAVERICK && mode == DImode)))
4012 return (code == CONST_INT && INTVAL (index) < 1024
4013 && INTVAL (index) > -1024
4014 && (INTVAL (index) & 3) == 0);
4016 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4018 /* For DImode assume values will usually live in core regs
4019 and only allow LDRD addressing modes. */
4020 if (!TARGET_LDRD || mode != DImode)
4021 return (code == CONST_INT
4022 && INTVAL (index) < 1024
4023 && INTVAL (index) > -1024
4024 && (INTVAL (index) & 3) == 0);
4027 if (TARGET_NEON
4028 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4029 return (code == CONST_INT
4030 && INTVAL (index) < 1016
4031 && INTVAL (index) > -1024
4032 && (INTVAL (index) & 3) == 0);
4034 if (arm_address_register_rtx_p (index, strict_p)
4035 && (GET_MODE_SIZE (mode) <= 4))
4036 return 1;
4038 if (mode == DImode || mode == DFmode)
4040 HOST_WIDE_INT val = INTVAL (index);
4041 /* ??? Can we assume ldrd for thumb2? */
4042 /* Thumb-2 ldrd only has reg+const addressing modes. */
4043 if (code != CONST_INT)
4044 return 0;
4046 /* ldrd supports offsets of +-1020.
4047 However the ldr fallback does not. */
4048 return val > -256 && val < 256 && (val & 3) == 0;
4051 if (code == MULT)
4053 rtx xiop0 = XEXP (index, 0);
4054 rtx xiop1 = XEXP (index, 1);
4056 return ((arm_address_register_rtx_p (xiop0, strict_p)
4057 && thumb2_index_mul_operand (xiop1))
4058 || (arm_address_register_rtx_p (xiop1, strict_p)
4059 && thumb2_index_mul_operand (xiop0)));
4061 else if (code == ASHIFT)
4063 rtx op = XEXP (index, 1);
4065 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4066 && GET_CODE (op) == CONST_INT
4067 && INTVAL (op) > 0
4068 && INTVAL (op) <= 3);
4071 return (code == CONST_INT
4072 && INTVAL (index) < 4096
4073 && INTVAL (index) > -256);
4076 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4077 static int
4078 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4080 int regno;
4082 if (GET_CODE (x) != REG)
4083 return 0;
4085 regno = REGNO (x);
4087 if (strict_p)
4088 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4090 return (regno <= LAST_LO_REGNUM
4091 || regno > LAST_VIRTUAL_REGISTER
4092 || regno == FRAME_POINTER_REGNUM
4093 || (GET_MODE_SIZE (mode) >= 4
4094 && (regno == STACK_POINTER_REGNUM
4095 || regno >= FIRST_PSEUDO_REGISTER
4096 || x == hard_frame_pointer_rtx
4097 || x == arg_pointer_rtx)));
4100 /* Return nonzero if x is a legitimate index register. This is the case
4101 for any base register that can access a QImode object. */
4102 inline static int
4103 thumb1_index_register_rtx_p (rtx x, int strict_p)
4105 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4108 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4110 The AP may be eliminated to either the SP or the FP, so we use the
4111 least common denominator, e.g. SImode, and offsets from 0 to 64.
4113 ??? Verify whether the above is the right approach.
4115 ??? Also, the FP may be eliminated to the SP, so perhaps that
4116 needs special handling also.
4118 ??? Look at how the mips16 port solves this problem. It probably uses
4119 better ways to solve some of these problems.
4121 Although it is not incorrect, we don't accept QImode and HImode
4122 addresses based on the frame pointer or arg pointer until the
4123 reload pass starts. This is so that eliminating such addresses
4124 into stack based ones won't produce impossible code. */
4126 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4128 /* ??? Not clear if this is right. Experiment. */
4129 if (GET_MODE_SIZE (mode) < 4
4130 && !(reload_in_progress || reload_completed)
4131 && (reg_mentioned_p (frame_pointer_rtx, x)
4132 || reg_mentioned_p (arg_pointer_rtx, x)
4133 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4134 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4135 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4136 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4137 return 0;
4139 /* Accept any base register. SP only in SImode or larger. */
4140 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4141 return 1;
4143 /* This is PC relative data before arm_reorg runs. */
4144 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4145 && GET_CODE (x) == SYMBOL_REF
4146 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4147 return 1;
4149 /* This is PC relative data after arm_reorg runs. */
4150 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4151 && (GET_CODE (x) == LABEL_REF
4152 || (GET_CODE (x) == CONST
4153 && GET_CODE (XEXP (x, 0)) == PLUS
4154 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4155 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4156 return 1;
4158 /* Post-inc indexing only supported for SImode and larger. */
4159 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4160 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4161 return 1;
4163 else if (GET_CODE (x) == PLUS)
4165 /* REG+REG address can be any two index registers. */
4166 /* We disallow FRAME+REG addressing since we know that FRAME
4167 will be replaced with STACK, and SP relative addressing only
4168 permits SP+OFFSET. */
4169 if (GET_MODE_SIZE (mode) <= 4
4170 && XEXP (x, 0) != frame_pointer_rtx
4171 && XEXP (x, 1) != frame_pointer_rtx
4172 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4173 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4174 return 1;
4176 /* REG+const has 5-7 bit offset for non-SP registers. */
4177 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4178 || XEXP (x, 0) == arg_pointer_rtx)
4179 && GET_CODE (XEXP (x, 1)) == CONST_INT
4180 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4181 return 1;
4183 /* REG+const has 10-bit offset for SP, but only SImode and
4184 larger is supported. */
4185 /* ??? Should probably check for DI/DFmode overflow here
4186 just like GO_IF_LEGITIMATE_OFFSET does. */
4187 else if (GET_CODE (XEXP (x, 0)) == REG
4188 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4189 && GET_MODE_SIZE (mode) >= 4
4190 && GET_CODE (XEXP (x, 1)) == CONST_INT
4191 && INTVAL (XEXP (x, 1)) >= 0
4192 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4193 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4194 return 1;
4196 else if (GET_CODE (XEXP (x, 0)) == REG
4197 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4198 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4199 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4200 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4201 && GET_MODE_SIZE (mode) >= 4
4202 && GET_CODE (XEXP (x, 1)) == CONST_INT
4203 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4204 return 1;
4207 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4208 && GET_MODE_SIZE (mode) == 4
4209 && GET_CODE (x) == SYMBOL_REF
4210 && CONSTANT_POOL_ADDRESS_P (x)
4211 && ! (flag_pic
4212 && symbol_mentioned_p (get_pool_constant (x))
4213 && ! pcrel_constant_p (get_pool_constant (x))))
4214 return 1;
4216 return 0;
4219 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4220 instruction of mode MODE. */
4222 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4224 switch (GET_MODE_SIZE (mode))
4226 case 1:
4227 return val >= 0 && val < 32;
4229 case 2:
4230 return val >= 0 && val < 64 && (val & 1) == 0;
4232 default:
4233 return (val >= 0
4234 && (val + GET_MODE_SIZE (mode)) <= 128
4235 && (val & 3) == 0);
4239 /* Build the SYMBOL_REF for __tls_get_addr. */
4241 static GTY(()) rtx tls_get_addr_libfunc;
4243 static rtx
4244 get_tls_get_addr (void)
4246 if (!tls_get_addr_libfunc)
4247 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4248 return tls_get_addr_libfunc;
4251 static rtx
4252 arm_load_tp (rtx target)
4254 if (!target)
4255 target = gen_reg_rtx (SImode);
4257 if (TARGET_HARD_TP)
4259 /* Can return in any reg. */
4260 emit_insn (gen_load_tp_hard (target));
4262 else
4264 /* Always returned in r0. Immediately copy the result into a pseudo,
4265 otherwise other uses of r0 (e.g. setting up function arguments) may
4266 clobber the value. */
4268 rtx tmp;
4270 emit_insn (gen_load_tp_soft ());
4272 tmp = gen_rtx_REG (SImode, 0);
4273 emit_move_insn (target, tmp);
4275 return target;
4278 static rtx
4279 load_tls_operand (rtx x, rtx reg)
4281 rtx tmp;
4283 if (reg == NULL_RTX)
4284 reg = gen_reg_rtx (SImode);
4286 tmp = gen_rtx_CONST (SImode, x);
4288 emit_move_insn (reg, tmp);
4290 return reg;
4293 static rtx
4294 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4296 rtx insns, label, labelno, sum;
4298 start_sequence ();
4300 labelno = GEN_INT (pic_labelno++);
4301 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4302 label = gen_rtx_CONST (VOIDmode, label);
4304 sum = gen_rtx_UNSPEC (Pmode,
4305 gen_rtvec (4, x, GEN_INT (reloc), label,
4306 GEN_INT (TARGET_ARM ? 8 : 4)),
4307 UNSPEC_TLS);
4308 reg = load_tls_operand (sum, reg);
4310 if (TARGET_ARM)
4311 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4312 else if (TARGET_THUMB2)
4314 rtx tmp;
4315 /* Thumb-2 only allows very limited access to the PC. Calculate
4316 the address in a temporary register. */
4317 tmp = gen_reg_rtx (SImode);
4318 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4319 emit_insn (gen_addsi3(reg, reg, tmp));
4321 else /* TARGET_THUMB1 */
4322 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4324 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4325 Pmode, 1, reg, Pmode);
4327 insns = get_insns ();
4328 end_sequence ();
4330 return insns;
4334 legitimize_tls_address (rtx x, rtx reg)
4336 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4337 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4339 switch (model)
4341 case TLS_MODEL_GLOBAL_DYNAMIC:
4342 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4343 dest = gen_reg_rtx (Pmode);
4344 emit_libcall_block (insns, dest, ret, x);
4345 return dest;
4347 case TLS_MODEL_LOCAL_DYNAMIC:
4348 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4350 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4351 share the LDM result with other LD model accesses. */
4352 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4353 UNSPEC_TLS);
4354 dest = gen_reg_rtx (Pmode);
4355 emit_libcall_block (insns, dest, ret, eqv);
4357 /* Load the addend. */
4358 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4359 UNSPEC_TLS);
4360 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4361 return gen_rtx_PLUS (Pmode, dest, addend);
4363 case TLS_MODEL_INITIAL_EXEC:
4364 labelno = GEN_INT (pic_labelno++);
4365 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4366 label = gen_rtx_CONST (VOIDmode, label);
4367 sum = gen_rtx_UNSPEC (Pmode,
4368 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4369 GEN_INT (TARGET_ARM ? 8 : 4)),
4370 UNSPEC_TLS);
4371 reg = load_tls_operand (sum, reg);
4373 if (TARGET_ARM)
4374 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4375 else if (TARGET_THUMB2)
4377 rtx tmp;
4378 /* Thumb-2 only allows very limited access to the PC. Calculate
4379 the address in a temporary register. */
4380 tmp = gen_reg_rtx (SImode);
4381 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4382 emit_insn (gen_addsi3(reg, reg, tmp));
4383 emit_move_insn (reg, gen_const_mem (SImode, reg));
4385 else
4387 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4388 emit_move_insn (reg, gen_const_mem (SImode, reg));
4391 tp = arm_load_tp (NULL_RTX);
4393 return gen_rtx_PLUS (Pmode, tp, reg);
4395 case TLS_MODEL_LOCAL_EXEC:
4396 tp = arm_load_tp (NULL_RTX);
4398 reg = gen_rtx_UNSPEC (Pmode,
4399 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4400 UNSPEC_TLS);
4401 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4403 return gen_rtx_PLUS (Pmode, tp, reg);
4405 default:
4406 abort ();
4410 /* Try machine-dependent ways of modifying an illegitimate address
4411 to be legitimate. If we find one, return the new, valid address. */
4413 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4415 if (arm_tls_symbol_p (x))
4416 return legitimize_tls_address (x, NULL_RTX);
4418 if (GET_CODE (x) == PLUS)
4420 rtx xop0 = XEXP (x, 0);
4421 rtx xop1 = XEXP (x, 1);
4423 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4424 xop0 = force_reg (SImode, xop0);
4426 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4427 xop1 = force_reg (SImode, xop1);
4429 if (ARM_BASE_REGISTER_RTX_P (xop0)
4430 && GET_CODE (xop1) == CONST_INT)
4432 HOST_WIDE_INT n, low_n;
4433 rtx base_reg, val;
4434 n = INTVAL (xop1);
4436 /* VFP addressing modes actually allow greater offsets, but for
4437 now we just stick with the lowest common denominator. */
4438 if (mode == DImode
4439 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4441 low_n = n & 0x0f;
4442 n &= ~0x0f;
4443 if (low_n > 4)
4445 n += 16;
4446 low_n -= 16;
4449 else
4451 low_n = ((mode) == TImode ? 0
4452 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4453 n -= low_n;
4456 base_reg = gen_reg_rtx (SImode);
4457 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4458 emit_move_insn (base_reg, val);
4459 x = plus_constant (base_reg, low_n);
4461 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4462 x = gen_rtx_PLUS (SImode, xop0, xop1);
4465 /* XXX We don't allow MINUS any more -- see comment in
4466 arm_legitimate_address_p (). */
4467 else if (GET_CODE (x) == MINUS)
4469 rtx xop0 = XEXP (x, 0);
4470 rtx xop1 = XEXP (x, 1);
4472 if (CONSTANT_P (xop0))
4473 xop0 = force_reg (SImode, xop0);
4475 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4476 xop1 = force_reg (SImode, xop1);
4478 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4479 x = gen_rtx_MINUS (SImode, xop0, xop1);
4482 /* Make sure to take full advantage of the pre-indexed addressing mode
4483 with absolute addresses which often allows for the base register to
4484 be factorized for multiple adjacent memory references, and it might
4485 even allows for the mini pool to be avoided entirely. */
4486 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4488 unsigned int bits;
4489 HOST_WIDE_INT mask, base, index;
4490 rtx base_reg;
4492 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4493 use a 8-bit index. So let's use a 12-bit index for SImode only and
4494 hope that arm_gen_constant will enable ldrb to use more bits. */
4495 bits = (mode == SImode) ? 12 : 8;
4496 mask = (1 << bits) - 1;
4497 base = INTVAL (x) & ~mask;
4498 index = INTVAL (x) & mask;
4499 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4501 /* It'll most probably be more efficient to generate the base
4502 with more bits set and use a negative index instead. */
4503 base |= mask;
4504 index -= mask;
4506 base_reg = force_reg (SImode, GEN_INT (base));
4507 x = plus_constant (base_reg, index);
4510 if (flag_pic)
4512 /* We need to find and carefully transform any SYMBOL and LABEL
4513 references; so go back to the original address expression. */
4514 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4516 if (new_x != orig_x)
4517 x = new_x;
4520 return x;
4524 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4525 to be legitimate. If we find one, return the new, valid address. */
4527 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4529 if (arm_tls_symbol_p (x))
4530 return legitimize_tls_address (x, NULL_RTX);
4532 if (GET_CODE (x) == PLUS
4533 && GET_CODE (XEXP (x, 1)) == CONST_INT
4534 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4535 || INTVAL (XEXP (x, 1)) < 0))
4537 rtx xop0 = XEXP (x, 0);
4538 rtx xop1 = XEXP (x, 1);
4539 HOST_WIDE_INT offset = INTVAL (xop1);
4541 /* Try and fold the offset into a biasing of the base register and
4542 then offsetting that. Don't do this when optimizing for space
4543 since it can cause too many CSEs. */
4544 if (optimize_size && offset >= 0
4545 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4547 HOST_WIDE_INT delta;
4549 if (offset >= 256)
4550 delta = offset - (256 - GET_MODE_SIZE (mode));
4551 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4552 delta = 31 * GET_MODE_SIZE (mode);
4553 else
4554 delta = offset & (~31 * GET_MODE_SIZE (mode));
4556 xop0 = force_operand (plus_constant (xop0, offset - delta),
4557 NULL_RTX);
4558 x = plus_constant (xop0, delta);
4560 else if (offset < 0 && offset > -256)
4561 /* Small negative offsets are best done with a subtract before the
4562 dereference, forcing these into a register normally takes two
4563 instructions. */
4564 x = force_operand (x, NULL_RTX);
4565 else
4567 /* For the remaining cases, force the constant into a register. */
4568 xop1 = force_reg (SImode, xop1);
4569 x = gen_rtx_PLUS (SImode, xop0, xop1);
4572 else if (GET_CODE (x) == PLUS
4573 && s_register_operand (XEXP (x, 1), SImode)
4574 && !s_register_operand (XEXP (x, 0), SImode))
4576 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4578 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4581 if (flag_pic)
4583 /* We need to find and carefully transform any SYMBOL and LABEL
4584 references; so go back to the original address expression. */
4585 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4587 if (new_x != orig_x)
4588 x = new_x;
4591 return x;
4595 thumb_legitimize_reload_address (rtx *x_p,
4596 enum machine_mode mode,
4597 int opnum, int type,
4598 int ind_levels ATTRIBUTE_UNUSED)
4600 rtx x = *x_p;
4602 if (GET_CODE (x) == PLUS
4603 && GET_MODE_SIZE (mode) < 4
4604 && REG_P (XEXP (x, 0))
4605 && XEXP (x, 0) == stack_pointer_rtx
4606 && GET_CODE (XEXP (x, 1)) == CONST_INT
4607 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4609 rtx orig_x = x;
4611 x = copy_rtx (x);
4612 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4613 Pmode, VOIDmode, 0, 0, opnum, type);
4614 return x;
4617 /* If both registers are hi-regs, then it's better to reload the
4618 entire expression rather than each register individually. That
4619 only requires one reload register rather than two. */
4620 if (GET_CODE (x) == PLUS
4621 && REG_P (XEXP (x, 0))
4622 && REG_P (XEXP (x, 1))
4623 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4624 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4626 rtx orig_x = x;
4628 x = copy_rtx (x);
4629 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4630 Pmode, VOIDmode, 0, 0, opnum, type);
4631 return x;
4634 return NULL;
4637 /* Test for various thread-local symbols. */
4639 /* Return TRUE if X is a thread-local symbol. */
4641 static bool
4642 arm_tls_symbol_p (rtx x)
4644 if (! TARGET_HAVE_TLS)
4645 return false;
4647 if (GET_CODE (x) != SYMBOL_REF)
4648 return false;
4650 return SYMBOL_REF_TLS_MODEL (x) != 0;
4653 /* Helper for arm_tls_referenced_p. */
4655 static int
4656 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4658 if (GET_CODE (*x) == SYMBOL_REF)
4659 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4661 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4662 TLS offsets, not real symbol references. */
4663 if (GET_CODE (*x) == UNSPEC
4664 && XINT (*x, 1) == UNSPEC_TLS)
4665 return -1;
4667 return 0;
4670 /* Return TRUE if X contains any TLS symbol references. */
4672 bool
4673 arm_tls_referenced_p (rtx x)
4675 if (! TARGET_HAVE_TLS)
4676 return false;
4678 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4681 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4683 bool
4684 arm_cannot_force_const_mem (rtx x)
4686 rtx base, offset;
4688 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4690 split_const (x, &base, &offset);
4691 if (GET_CODE (base) == SYMBOL_REF
4692 && !offset_within_block_p (base, INTVAL (offset)))
4693 return true;
4695 return arm_tls_referenced_p (x);
4698 #define REG_OR_SUBREG_REG(X) \
4699 (GET_CODE (X) == REG \
4700 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4702 #define REG_OR_SUBREG_RTX(X) \
4703 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4705 #ifndef COSTS_N_INSNS
4706 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4707 #endif
4708 static inline int
4709 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4711 enum machine_mode mode = GET_MODE (x);
4713 switch (code)
4715 case ASHIFT:
4716 case ASHIFTRT:
4717 case LSHIFTRT:
4718 case ROTATERT:
4719 case PLUS:
4720 case MINUS:
4721 case COMPARE:
4722 case NEG:
4723 case NOT:
4724 return COSTS_N_INSNS (1);
4726 case MULT:
4727 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4729 int cycles = 0;
4730 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4732 while (i)
4734 i >>= 2;
4735 cycles++;
4737 return COSTS_N_INSNS (2) + cycles;
4739 return COSTS_N_INSNS (1) + 16;
4741 case SET:
4742 return (COSTS_N_INSNS (1)
4743 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4744 + GET_CODE (SET_DEST (x)) == MEM));
4746 case CONST_INT:
4747 if (outer == SET)
4749 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4750 return 0;
4751 if (thumb_shiftable_const (INTVAL (x)))
4752 return COSTS_N_INSNS (2);
4753 return COSTS_N_INSNS (3);
4755 else if ((outer == PLUS || outer == COMPARE)
4756 && INTVAL (x) < 256 && INTVAL (x) > -256)
4757 return 0;
4758 else if (outer == AND
4759 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4760 return COSTS_N_INSNS (1);
4761 else if (outer == ASHIFT || outer == ASHIFTRT
4762 || outer == LSHIFTRT)
4763 return 0;
4764 return COSTS_N_INSNS (2);
4766 case CONST:
4767 case CONST_DOUBLE:
4768 case LABEL_REF:
4769 case SYMBOL_REF:
4770 return COSTS_N_INSNS (3);
4772 case UDIV:
4773 case UMOD:
4774 case DIV:
4775 case MOD:
4776 return 100;
4778 case TRUNCATE:
4779 return 99;
4781 case AND:
4782 case XOR:
4783 case IOR:
4784 /* XXX guess. */
4785 return 8;
4787 case MEM:
4788 /* XXX another guess. */
4789 /* Memory costs quite a lot for the first word, but subsequent words
4790 load at the equivalent of a single insn each. */
4791 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4792 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4793 ? 4 : 0));
4795 case IF_THEN_ELSE:
4796 /* XXX a guess. */
4797 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4798 return 14;
4799 return 2;
4801 case ZERO_EXTEND:
4802 /* XXX still guessing. */
4803 switch (GET_MODE (XEXP (x, 0)))
4805 case QImode:
4806 return (1 + (mode == DImode ? 4 : 0)
4807 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4809 case HImode:
4810 return (4 + (mode == DImode ? 4 : 0)
4811 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4813 case SImode:
4814 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4816 default:
4817 return 99;
4820 default:
4821 return 99;
4826 /* Worker routine for arm_rtx_costs. */
4827 /* ??? This needs updating for thumb2. */
4828 static inline int
4829 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4831 enum machine_mode mode = GET_MODE (x);
4832 enum rtx_code subcode;
4833 int extra_cost;
4835 switch (code)
4837 case MEM:
4838 /* Memory costs quite a lot for the first word, but subsequent words
4839 load at the equivalent of a single insn each. */
4840 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4841 + (GET_CODE (x) == SYMBOL_REF
4842 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4844 case DIV:
4845 case MOD:
4846 case UDIV:
4847 case UMOD:
4848 return optimize_size ? COSTS_N_INSNS (2) : 100;
4850 case ROTATE:
4851 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4852 return 4;
4853 /* Fall through */
4854 case ROTATERT:
4855 if (mode != SImode)
4856 return 8;
4857 /* Fall through */
4858 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4859 if (mode == DImode)
4860 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4861 + ((GET_CODE (XEXP (x, 0)) == REG
4862 || (GET_CODE (XEXP (x, 0)) == SUBREG
4863 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4864 ? 0 : 8));
4865 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4866 || (GET_CODE (XEXP (x, 0)) == SUBREG
4867 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4868 ? 0 : 4)
4869 + ((GET_CODE (XEXP (x, 1)) == REG
4870 || (GET_CODE (XEXP (x, 1)) == SUBREG
4871 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4872 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4873 ? 0 : 4));
4875 case MINUS:
4876 if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
4878 extra_cost = rtx_cost (XEXP (x, 1), code);
4879 if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
4880 extra_cost += 4 * ARM_NUM_REGS (mode);
4881 return extra_cost;
4884 if (mode == DImode)
4885 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4886 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4887 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4888 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4889 ? 0 : 8));
4891 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4892 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4893 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4894 && arm_const_double_rtx (XEXP (x, 1))))
4895 ? 0 : 8)
4896 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4897 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4898 && arm_const_double_rtx (XEXP (x, 0))))
4899 ? 0 : 8));
4901 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4902 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4903 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4904 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4905 || subcode == ASHIFTRT || subcode == LSHIFTRT
4906 || subcode == ROTATE || subcode == ROTATERT
4907 || (subcode == MULT
4908 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4909 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4910 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4911 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4912 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4913 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4914 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4915 return 1;
4916 /* Fall through */
4918 case PLUS:
4919 if (arm_arch6 && mode == SImode
4920 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4921 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4922 return 1 + (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM ? 10 : 0)
4923 + (GET_CODE (XEXP (x, 1)) == MEM ? 10 : 0);
4925 if (GET_CODE (XEXP (x, 0)) == MULT)
4927 extra_cost = rtx_cost (XEXP (x, 0), code);
4928 if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4929 extra_cost += 4 * ARM_NUM_REGS (mode);
4930 return extra_cost;
4933 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4934 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4935 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4936 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4937 && arm_const_double_rtx (XEXP (x, 1))))
4938 ? 0 : 8));
4940 /* Fall through */
4941 case AND: case XOR: case IOR:
4942 extra_cost = 0;
4944 /* Normally the frame registers will be spilt into reg+const during
4945 reload, so it is a bad idea to combine them with other instructions,
4946 since then they might not be moved outside of loops. As a compromise
4947 we allow integration with ops that have a constant as their second
4948 operand. */
4949 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4950 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4951 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4952 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4953 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4954 extra_cost = 4;
4956 if (mode == DImode)
4957 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4958 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4959 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4960 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4961 ? 0 : 8));
4963 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4964 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4965 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4966 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4967 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4968 ? 0 : 4));
4970 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4971 return (1 + extra_cost
4972 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4973 || subcode == LSHIFTRT || subcode == ASHIFTRT
4974 || subcode == ROTATE || subcode == ROTATERT
4975 || (subcode == MULT
4976 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4977 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4978 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4979 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4980 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4981 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4982 ? 0 : 4));
4984 return 8;
4986 case MULT:
4987 /* This should have been handled by the CPU specific routines. */
4988 gcc_unreachable ();
4990 case TRUNCATE:
4991 if (arm_arch3m && mode == SImode
4992 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4993 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4994 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4995 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4996 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4997 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4998 return 8;
4999 return 99;
5001 case NEG:
5002 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5003 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
5004 /* Fall through */
5005 case NOT:
5006 if (mode == DImode)
5007 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5009 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
5011 case IF_THEN_ELSE:
5012 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5013 return 14;
5014 return 2;
5016 case COMPARE:
5017 return 1;
5019 case ABS:
5020 return 4 + (mode == DImode ? 4 : 0);
5022 case SIGN_EXTEND:
5023 if (arm_arch_thumb2 && mode == SImode)
5024 return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
5026 if (GET_MODE (XEXP (x, 0)) == QImode)
5027 return (4 + (mode == DImode ? 4 : 0)
5028 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5029 /* Fall through */
5030 case ZERO_EXTEND:
5031 if (arm_arch6 && mode == SImode)
5032 return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
5034 switch (GET_MODE (XEXP (x, 0)))
5036 case QImode:
5037 return (1 + (mode == DImode ? 4 : 0)
5038 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5040 case HImode:
5041 return (4 + (mode == DImode ? 4 : 0)
5042 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5044 case SImode:
5045 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5047 case V8QImode:
5048 case V4HImode:
5049 case V2SImode:
5050 case V4QImode:
5051 case V2HImode:
5052 return 1;
5054 default:
5055 gcc_unreachable ();
5057 gcc_unreachable ();
5059 case CONST_INT:
5060 if (const_ok_for_arm (INTVAL (x)))
5061 return outer == SET ? 2 : -1;
5062 else if (outer == AND
5063 && const_ok_for_arm (~INTVAL (x)))
5064 return -1;
5065 else if ((outer == COMPARE
5066 || outer == PLUS || outer == MINUS)
5067 && const_ok_for_arm (-INTVAL (x)))
5068 return -1;
5069 else
5070 return 5;
5072 case CONST:
5073 case LABEL_REF:
5074 case SYMBOL_REF:
5075 return 6;
5077 case CONST_DOUBLE:
5078 if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
5079 return outer == SET ? 2 : -1;
5080 else if ((outer == COMPARE || outer == PLUS)
5081 && neg_const_double_rtx_ok_for_fpa (x))
5082 return -1;
5083 return 7;
5085 default:
5086 return 99;
5090 /* RTX costs when optimizing for size. */
5091 static bool
5092 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
5094 enum machine_mode mode = GET_MODE (x);
5096 if (TARGET_THUMB1)
5098 /* XXX TBD. For now, use the standard costs. */
5099 *total = thumb1_rtx_costs (x, code, outer_code);
5100 return true;
5103 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
5104 switch (code)
5106 case MEM:
5107 /* A memory access costs 1 insn if the mode is small, or the address is
5108 a single register, otherwise it costs one insn per word. */
5109 if (REG_P (XEXP (x, 0)))
5110 *total = COSTS_N_INSNS (1);
5111 else
5112 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5113 return true;
5115 case DIV:
5116 case MOD:
5117 case UDIV:
5118 case UMOD:
5119 /* Needs a libcall, so it costs about this. */
5120 *total = COSTS_N_INSNS (2);
5121 return false;
5123 case ROTATE:
5124 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5126 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
5127 return true;
5129 /* Fall through */
5130 case ROTATERT:
5131 case ASHIFT:
5132 case LSHIFTRT:
5133 case ASHIFTRT:
5134 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5136 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
5137 return true;
5139 else if (mode == SImode)
5141 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
5142 /* Slightly disparage register shifts, but not by much. */
5143 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5144 *total += 1 + rtx_cost (XEXP (x, 1), code);
5145 return true;
5148 /* Needs a libcall. */
5149 *total = COSTS_N_INSNS (2);
5150 return false;
5152 case MINUS:
5153 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5155 *total = COSTS_N_INSNS (1);
5156 return false;
5159 if (mode == SImode)
5161 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5162 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5164 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5165 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5166 || subcode1 == ROTATE || subcode1 == ROTATERT
5167 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5168 || subcode1 == ASHIFTRT)
5170 /* It's just the cost of the two operands. */
5171 *total = 0;
5172 return false;
5175 *total = COSTS_N_INSNS (1);
5176 return false;
5179 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5180 return false;
5182 case PLUS:
5183 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5185 *total = COSTS_N_INSNS (1);
5186 return false;
5189 /* Fall through */
5190 case AND: case XOR: case IOR:
5191 if (mode == SImode)
5193 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5195 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5196 || subcode == LSHIFTRT || subcode == ASHIFTRT
5197 || (code == AND && subcode == NOT))
5199 /* It's just the cost of the two operands. */
5200 *total = 0;
5201 return false;
5205 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5206 return false;
5208 case MULT:
5209 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5210 return false;
5212 case NEG:
5213 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5214 *total = COSTS_N_INSNS (1);
5215 /* Fall through */
5216 case NOT:
5217 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5219 return false;
5221 case IF_THEN_ELSE:
5222 *total = 0;
5223 return false;
5225 case COMPARE:
5226 if (cc_register (XEXP (x, 0), VOIDmode))
5227 * total = 0;
5228 else
5229 *total = COSTS_N_INSNS (1);
5230 return false;
5232 case ABS:
5233 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5234 *total = COSTS_N_INSNS (1);
5235 else
5236 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5237 return false;
5239 case SIGN_EXTEND:
5240 *total = 0;
5241 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5243 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5244 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5246 if (mode == DImode)
5247 *total += COSTS_N_INSNS (1);
5248 return false;
5250 case ZERO_EXTEND:
5251 *total = 0;
5252 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5254 switch (GET_MODE (XEXP (x, 0)))
5256 case QImode:
5257 *total += COSTS_N_INSNS (1);
5258 break;
5260 case HImode:
5261 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5263 case SImode:
5264 break;
5266 default:
5267 *total += COSTS_N_INSNS (2);
5271 if (mode == DImode)
5272 *total += COSTS_N_INSNS (1);
5274 return false;
5276 case CONST_INT:
5277 if (const_ok_for_arm (INTVAL (x)))
5278 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5279 else if (const_ok_for_arm (~INTVAL (x)))
5280 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5281 else if (const_ok_for_arm (-INTVAL (x)))
5283 if (outer_code == COMPARE || outer_code == PLUS
5284 || outer_code == MINUS)
5285 *total = 0;
5286 else
5287 *total = COSTS_N_INSNS (1);
5289 else
5290 *total = COSTS_N_INSNS (2);
5291 return true;
5293 case CONST:
5294 case LABEL_REF:
5295 case SYMBOL_REF:
5296 *total = COSTS_N_INSNS (2);
5297 return true;
5299 case CONST_DOUBLE:
5300 *total = COSTS_N_INSNS (4);
5301 return true;
5303 default:
5304 if (mode != VOIDmode)
5305 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5306 else
5307 *total = COSTS_N_INSNS (4); /* How knows? */
5308 return false;
5312 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5313 supported on any "slowmul" cores, so it can be ignored. */
5315 static bool
5316 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5318 enum machine_mode mode = GET_MODE (x);
5320 if (TARGET_THUMB)
5322 *total = thumb1_rtx_costs (x, code, outer_code);
5323 return true;
5326 switch (code)
5328 case MULT:
5329 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5330 || mode == DImode)
5332 *total = 30;
5333 return true;
5336 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5338 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5339 & (unsigned HOST_WIDE_INT) 0xffffffff);
5340 int cost, const_ok = const_ok_for_arm (i);
5341 int j, booth_unit_size;
5343 /* Tune as appropriate. */
5344 cost = const_ok ? 4 : 8;
5345 booth_unit_size = 2;
5346 for (j = 0; i && j < 32; j += booth_unit_size)
5348 i >>= booth_unit_size;
5349 cost += 2;
5352 *total = cost;
5353 return true;
5356 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5357 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5358 return true;
5360 default:
5361 *total = arm_rtx_costs_1 (x, code, outer_code);
5362 return true;
5367 /* RTX cost for cores with a fast multiply unit (M variants). */
5369 static bool
5370 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5372 enum machine_mode mode = GET_MODE (x);
5374 if (TARGET_THUMB1)
5376 *total = thumb1_rtx_costs (x, code, outer_code);
5377 return true;
5380 /* ??? should thumb2 use different costs? */
5381 switch (code)
5383 case MULT:
5384 /* There is no point basing this on the tuning, since it is always the
5385 fast variant if it exists at all. */
5386 if (mode == DImode
5387 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5388 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5389 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5391 *total = 8;
5392 return true;
5396 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5397 || mode == DImode)
5399 *total = 30;
5400 return true;
5403 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5405 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5406 & (unsigned HOST_WIDE_INT) 0xffffffff);
5407 int cost, const_ok = const_ok_for_arm (i);
5408 int j, booth_unit_size;
5410 /* Tune as appropriate. */
5411 cost = const_ok ? 4 : 8;
5412 booth_unit_size = 8;
5413 for (j = 0; i && j < 32; j += booth_unit_size)
5415 i >>= booth_unit_size;
5416 cost += 2;
5419 *total = cost;
5420 return true;
5423 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5424 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5425 return true;
5427 default:
5428 *total = arm_rtx_costs_1 (x, code, outer_code);
5429 return true;
5434 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5435 so it can be ignored. */
5437 static bool
5438 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
5440 enum machine_mode mode = GET_MODE (x);
5442 if (TARGET_THUMB)
5444 *total = thumb1_rtx_costs (x, code, outer_code);
5445 return true;
5448 switch (code)
5450 case MULT:
5451 /* There is no point basing this on the tuning, since it is always the
5452 fast variant if it exists at all. */
5453 if (mode == DImode
5454 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5455 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5456 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5458 *total = 8;
5459 return true;
5463 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5464 || mode == DImode)
5466 *total = 30;
5467 return true;
5470 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5472 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5473 & (unsigned HOST_WIDE_INT) 0xffffffff);
5474 int cost, const_ok = const_ok_for_arm (i);
5475 unsigned HOST_WIDE_INT masked_const;
5477 /* The cost will be related to two insns.
5478 First a load of the constant (MOV or LDR), then a multiply. */
5479 cost = 2;
5480 if (! const_ok)
5481 cost += 1; /* LDR is probably more expensive because
5482 of longer result latency. */
5483 masked_const = i & 0xffff8000;
5484 if (masked_const != 0 && masked_const != 0xffff8000)
5486 masked_const = i & 0xf8000000;
5487 if (masked_const == 0 || masked_const == 0xf8000000)
5488 cost += 1;
5489 else
5490 cost += 2;
5492 *total = cost;
5493 return true;
5496 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5497 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5498 return true;
5500 case COMPARE:
5501 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5502 will stall until the multiplication is complete. */
5503 if (GET_CODE (XEXP (x, 0)) == MULT)
5504 *total = 4 + rtx_cost (XEXP (x, 0), code);
5505 else
5506 *total = arm_rtx_costs_1 (x, code, outer_code);
5507 return true;
5509 default:
5510 *total = arm_rtx_costs_1 (x, code, outer_code);
5511 return true;
5516 /* RTX costs for 9e (and later) cores. */
5518 static bool
5519 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5521 enum machine_mode mode = GET_MODE (x);
5522 int nonreg_cost;
5523 int cost;
5525 if (TARGET_THUMB1)
5527 switch (code)
5529 case MULT:
5530 *total = COSTS_N_INSNS (3);
5531 return true;
5533 default:
5534 *total = thumb1_rtx_costs (x, code, outer_code);
5535 return true;
5539 switch (code)
5541 case MULT:
5542 /* There is no point basing this on the tuning, since it is always the
5543 fast variant if it exists at all. */
5544 if (mode == DImode
5545 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5546 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5547 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5549 *total = 3;
5550 return true;
5554 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5556 *total = 30;
5557 return true;
5559 if (mode == DImode)
5561 cost = 7;
5562 nonreg_cost = 8;
5564 else
5566 cost = 2;
5567 nonreg_cost = 4;
5571 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5572 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5573 return true;
5575 default:
5576 *total = arm_rtx_costs_1 (x, code, outer_code);
5577 return true;
5580 /* All address computations that can be done are free, but rtx cost returns
5581 the same for practically all of them. So we weight the different types
5582 of address here in the order (most pref first):
5583 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5584 static inline int
5585 arm_arm_address_cost (rtx x)
5587 enum rtx_code c = GET_CODE (x);
5589 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5590 return 0;
5591 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5592 return 10;
5594 if (c == PLUS || c == MINUS)
5596 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5597 return 2;
5599 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5600 return 3;
5602 return 4;
5605 return 6;
5608 static inline int
5609 arm_thumb_address_cost (rtx x)
5611 enum rtx_code c = GET_CODE (x);
5613 if (c == REG)
5614 return 1;
5615 if (c == PLUS
5616 && GET_CODE (XEXP (x, 0)) == REG
5617 && GET_CODE (XEXP (x, 1)) == CONST_INT)
5618 return 1;
5620 return 2;
5623 static int
5624 arm_address_cost (rtx x)
5626 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5629 static int
5630 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5632 rtx i_pat, d_pat;
5634 /* Some true dependencies can have a higher cost depending
5635 on precisely how certain input operands are used. */
5636 if (arm_tune_xscale
5637 && REG_NOTE_KIND (link) == 0
5638 && recog_memoized (insn) >= 0
5639 && recog_memoized (dep) >= 0)
5641 int shift_opnum = get_attr_shift (insn);
5642 enum attr_type attr_type = get_attr_type (dep);
5644 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5645 operand for INSN. If we have a shifted input operand and the
5646 instruction we depend on is another ALU instruction, then we may
5647 have to account for an additional stall. */
5648 if (shift_opnum != 0
5649 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5651 rtx shifted_operand;
5652 int opno;
5654 /* Get the shifted operand. */
5655 extract_insn (insn);
5656 shifted_operand = recog_data.operand[shift_opnum];
5658 /* Iterate over all the operands in DEP. If we write an operand
5659 that overlaps with SHIFTED_OPERAND, then we have increase the
5660 cost of this dependency. */
5661 extract_insn (dep);
5662 preprocess_constraints ();
5663 for (opno = 0; opno < recog_data.n_operands; opno++)
5665 /* We can ignore strict inputs. */
5666 if (recog_data.operand_type[opno] == OP_IN)
5667 continue;
5669 if (reg_overlap_mentioned_p (recog_data.operand[opno],
5670 shifted_operand))
5671 return 2;
5676 /* XXX This is not strictly true for the FPA. */
5677 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5678 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5679 return 0;
5681 /* Call insns don't incur a stall, even if they follow a load. */
5682 if (REG_NOTE_KIND (link) == 0
5683 && GET_CODE (insn) == CALL_INSN)
5684 return 1;
5686 if ((i_pat = single_set (insn)) != NULL
5687 && GET_CODE (SET_SRC (i_pat)) == MEM
5688 && (d_pat = single_set (dep)) != NULL
5689 && GET_CODE (SET_DEST (d_pat)) == MEM)
5691 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5692 /* This is a load after a store, there is no conflict if the load reads
5693 from a cached area. Assume that loads from the stack, and from the
5694 constant pool are cached, and that others will miss. This is a
5695 hack. */
5697 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5698 || reg_mentioned_p (stack_pointer_rtx, src_mem)
5699 || reg_mentioned_p (frame_pointer_rtx, src_mem)
5700 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5701 return 1;
5704 return cost;
5707 static int fp_consts_inited = 0;
5709 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5710 static const char * const strings_fp[8] =
5712 "0", "1", "2", "3",
5713 "4", "5", "0.5", "10"
5716 static REAL_VALUE_TYPE values_fp[8];
5718 static void
5719 init_fp_table (void)
5721 int i;
5722 REAL_VALUE_TYPE r;
5724 if (TARGET_VFP)
5725 fp_consts_inited = 1;
5726 else
5727 fp_consts_inited = 8;
5729 for (i = 0; i < fp_consts_inited; i++)
5731 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5732 values_fp[i] = r;
5736 /* Return TRUE if rtx X is a valid immediate FP constant. */
5738 arm_const_double_rtx (rtx x)
5740 REAL_VALUE_TYPE r;
5741 int i;
5743 if (!fp_consts_inited)
5744 init_fp_table ();
5746 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5747 if (REAL_VALUE_MINUS_ZERO (r))
5748 return 0;
5750 for (i = 0; i < fp_consts_inited; i++)
5751 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5752 return 1;
5754 return 0;
5757 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5759 neg_const_double_rtx_ok_for_fpa (rtx x)
5761 REAL_VALUE_TYPE r;
5762 int i;
5764 if (!fp_consts_inited)
5765 init_fp_table ();
5767 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5768 r = REAL_VALUE_NEGATE (r);
5769 if (REAL_VALUE_MINUS_ZERO (r))
5770 return 0;
5772 for (i = 0; i < 8; i++)
5773 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5774 return 1;
5776 return 0;
5780 /* VFPv3 has a fairly wide range of representable immediates, formed from
5781 "quarter-precision" floating-point values. These can be evaluated using this
5782 formula (with ^ for exponentiation):
5784 -1^s * n * 2^-r
5786 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5787 16 <= n <= 31 and 0 <= r <= 7.
5789 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5791 - A (most-significant) is the sign bit.
5792 - BCD are the exponent (encoded as r XOR 3).
5793 - EFGH are the mantissa (encoded as n - 16).
5796 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5797 fconst[sd] instruction, or -1 if X isn't suitable. */
5798 static int
5799 vfp3_const_double_index (rtx x)
5801 REAL_VALUE_TYPE r, m;
5802 int sign, exponent;
5803 unsigned HOST_WIDE_INT mantissa, mant_hi;
5804 unsigned HOST_WIDE_INT mask;
5805 HOST_WIDE_INT m1, m2;
5806 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
5808 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
5809 return -1;
5811 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5813 /* We can't represent these things, so detect them first. */
5814 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
5815 return -1;
5817 /* Extract sign, exponent and mantissa. */
5818 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
5819 r = REAL_VALUE_ABS (r);
5820 exponent = REAL_EXP (&r);
5821 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5822 highest (sign) bit, with a fixed binary point at bit point_pos.
5823 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5824 bits for the mantissa, this may fail (low bits would be lost). */
5825 real_ldexp (&m, &r, point_pos - exponent);
5826 REAL_VALUE_TO_INT (&m1, &m2, m);
5827 mantissa = m1;
5828 mant_hi = m2;
5830 /* If there are bits set in the low part of the mantissa, we can't
5831 represent this value. */
5832 if (mantissa != 0)
5833 return -1;
5835 /* Now make it so that mantissa contains the most-significant bits, and move
5836 the point_pos to indicate that the least-significant bits have been
5837 discarded. */
5838 point_pos -= HOST_BITS_PER_WIDE_INT;
5839 mantissa = mant_hi;
5841 /* We can permit four significant bits of mantissa only, plus a high bit
5842 which is always 1. */
5843 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
5844 if ((mantissa & mask) != 0)
5845 return -1;
5847 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5848 mantissa >>= point_pos - 5;
5850 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5851 floating-point immediate zero with Neon using an integer-zero load, but
5852 that case is handled elsewhere.) */
5853 if (mantissa == 0)
5854 return -1;
5856 gcc_assert (mantissa >= 16 && mantissa <= 31);
5858 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5859 normalized significands are in the range [1, 2). (Our mantissa is shifted
5860 left 4 places at this point relative to normalized IEEE754 values). GCC
5861 internally uses [0.5, 1) (see real.c), so the exponent returned from
5862 REAL_EXP must be altered. */
5863 exponent = 5 - exponent;
5865 if (exponent < 0 || exponent > 7)
5866 return -1;
5868 /* Sign, mantissa and exponent are now in the correct form to plug into the
5869 formula described in the comment above. */
5870 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
5873 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5875 vfp3_const_double_rtx (rtx x)
5877 if (!TARGET_VFP3)
5878 return 0;
5880 return vfp3_const_double_index (x) != -1;
5883 /* Recognize immediates which can be used in various Neon instructions. Legal
5884 immediates are described by the following table (for VMVN variants, the
5885 bitwise inverse of the constant shown is recognized. In either case, VMOV
5886 is output and the correct instruction to use for a given constant is chosen
5887 by the assembler). The constant shown is replicated across all elements of
5888 the destination vector.
5890 insn elems variant constant (binary)
5891 ---- ----- ------- -----------------
5892 vmov i32 0 00000000 00000000 00000000 abcdefgh
5893 vmov i32 1 00000000 00000000 abcdefgh 00000000
5894 vmov i32 2 00000000 abcdefgh 00000000 00000000
5895 vmov i32 3 abcdefgh 00000000 00000000 00000000
5896 vmov i16 4 00000000 abcdefgh
5897 vmov i16 5 abcdefgh 00000000
5898 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5899 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5900 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5901 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5902 vmvn i16 10 00000000 abcdefgh
5903 vmvn i16 11 abcdefgh 00000000
5904 vmov i32 12 00000000 00000000 abcdefgh 11111111
5905 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5906 vmov i32 14 00000000 abcdefgh 11111111 11111111
5907 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5908 vmov i8 16 abcdefgh
5909 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5910 eeeeeeee ffffffff gggggggg hhhhhhhh
5911 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
5913 For case 18, B = !b. Representable values are exactly those accepted by
5914 vfp3_const_double_index, but are output as floating-point numbers rather
5915 than indices.
5917 Variants 0-5 (inclusive) may also be used as immediates for the second
5918 operand of VORR/VBIC instructions.
5920 The INVERSE argument causes the bitwise inverse of the given operand to be
5921 recognized instead (used for recognizing legal immediates for the VAND/VORN
5922 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
5923 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
5924 output, rather than the real insns vbic/vorr).
5926 INVERSE makes no difference to the recognition of float vectors.
5928 The return value is the variant of immediate as shown in the above table, or
5929 -1 if the given value doesn't match any of the listed patterns.
5931 static int
5932 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5933 rtx *modconst, int *elementwidth)
5935 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
5936 matches = 1; \
5937 for (i = 0; i < idx; i += (STRIDE)) \
5938 if (!(TEST)) \
5939 matches = 0; \
5940 if (matches) \
5942 immtype = (CLASS); \
5943 elsize = (ELSIZE); \
5944 break; \
5947 unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5948 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5949 unsigned char bytes[16];
5950 int immtype = -1, matches;
5951 unsigned int invmask = inverse ? 0xff : 0;
5953 /* Vectors of float constants. */
5954 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5956 rtx el0 = CONST_VECTOR_ELT (op, 0);
5957 REAL_VALUE_TYPE r0;
5959 if (!vfp3_const_double_rtx (el0))
5960 return -1;
5962 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
5964 for (i = 1; i < n_elts; i++)
5966 rtx elt = CONST_VECTOR_ELT (op, i);
5967 REAL_VALUE_TYPE re;
5969 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
5971 if (!REAL_VALUES_EQUAL (r0, re))
5972 return -1;
5975 if (modconst)
5976 *modconst = CONST_VECTOR_ELT (op, 0);
5978 if (elementwidth)
5979 *elementwidth = 0;
5981 return 18;
5984 /* Splat vector constant out into a byte vector. */
5985 for (i = 0; i < n_elts; i++)
5987 rtx el = CONST_VECTOR_ELT (op, i);
5988 unsigned HOST_WIDE_INT elpart;
5989 unsigned int part, parts;
5991 if (GET_CODE (el) == CONST_INT)
5993 elpart = INTVAL (el);
5994 parts = 1;
5996 else if (GET_CODE (el) == CONST_DOUBLE)
5998 elpart = CONST_DOUBLE_LOW (el);
5999 parts = 2;
6001 else
6002 gcc_unreachable ();
6004 for (part = 0; part < parts; part++)
6006 unsigned int byte;
6007 for (byte = 0; byte < innersize; byte++)
6009 bytes[idx++] = (elpart & 0xff) ^ invmask;
6010 elpart >>= BITS_PER_UNIT;
6012 if (GET_CODE (el) == CONST_DOUBLE)
6013 elpart = CONST_DOUBLE_HIGH (el);
6017 /* Sanity check. */
6018 gcc_assert (idx == GET_MODE_SIZE (mode));
6022 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6023 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6025 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6026 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6028 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6029 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6031 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6032 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6034 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6036 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6038 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6039 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6041 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6042 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6044 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6045 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6047 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6048 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6050 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6052 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6054 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6055 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6057 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6058 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6060 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6061 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6063 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6064 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6066 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6068 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6069 && bytes[i] == bytes[(i + 8) % idx]);
6071 while (0);
6073 if (immtype == -1)
6074 return -1;
6076 if (elementwidth)
6077 *elementwidth = elsize;
6079 if (modconst)
6081 unsigned HOST_WIDE_INT imm = 0;
6083 /* Un-invert bytes of recognized vector, if necessary. */
6084 if (invmask != 0)
6085 for (i = 0; i < idx; i++)
6086 bytes[i] ^= invmask;
6088 if (immtype == 17)
6090 /* FIXME: Broken on 32-bit H_W_I hosts. */
6091 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6093 for (i = 0; i < 8; i++)
6094 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6095 << (i * BITS_PER_UNIT);
6097 *modconst = GEN_INT (imm);
6099 else
6101 unsigned HOST_WIDE_INT imm = 0;
6103 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6104 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6106 *modconst = GEN_INT (imm);
6110 return immtype;
6111 #undef CHECK
6114 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6115 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6116 float elements), and a modified constant (whatever should be output for a
6117 VMOV) in *MODCONST. */
6120 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6121 rtx *modconst, int *elementwidth)
6123 rtx tmpconst;
6124 int tmpwidth;
6125 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6127 if (retval == -1)
6128 return 0;
6130 if (modconst)
6131 *modconst = tmpconst;
6133 if (elementwidth)
6134 *elementwidth = tmpwidth;
6136 return 1;
6139 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6140 the immediate is valid, write a constant suitable for using as an operand
6141 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6142 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6145 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6146 rtx *modconst, int *elementwidth)
6148 rtx tmpconst;
6149 int tmpwidth;
6150 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6152 if (retval < 0 || retval > 5)
6153 return 0;
6155 if (modconst)
6156 *modconst = tmpconst;
6158 if (elementwidth)
6159 *elementwidth = tmpwidth;
6161 return 1;
6164 /* Return a string suitable for output of Neon immediate logic operation
6165 MNEM. */
6167 char *
6168 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6169 int inverse, int quad)
6171 int width, is_valid;
6172 static char templ[40];
6174 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6176 gcc_assert (is_valid != 0);
6178 if (quad)
6179 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6180 else
6181 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6183 return templ;
6186 /* Output a sequence of pairwise operations to implement a reduction.
6187 NOTE: We do "too much work" here, because pairwise operations work on two
6188 registers-worth of operands in one go. Unfortunately we can't exploit those
6189 extra calculations to do the full operation in fewer steps, I don't think.
6190 Although all vector elements of the result but the first are ignored, we
6191 actually calculate the same result in each of the elements. An alternative
6192 such as initially loading a vector with zero to use as each of the second
6193 operands would use up an additional register and take an extra instruction,
6194 for no particular gain. */
6196 void
6197 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6198 rtx (*reduc) (rtx, rtx, rtx))
6200 enum machine_mode inner = GET_MODE_INNER (mode);
6201 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6202 rtx tmpsum = op1;
6204 for (i = parts / 2; i >= 1; i /= 2)
6206 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6207 emit_insn (reduc (dest, tmpsum, tmpsum));
6208 tmpsum = dest;
6212 /* Initialize a vector with non-constant elements. FIXME: We can do better
6213 than the current implementation (building a vector on the stack and then
6214 loading it) in many cases. See rs6000.c. */
6216 void
6217 neon_expand_vector_init (rtx target, rtx vals)
6219 enum machine_mode mode = GET_MODE (target);
6220 enum machine_mode inner = GET_MODE_INNER (mode);
6221 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6222 rtx mem;
6224 gcc_assert (VECTOR_MODE_P (mode));
6226 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6227 for (i = 0; i < n_elts; i++)
6228 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6229 XVECEXP (vals, 0, i));
6231 emit_move_insn (target, mem);
6234 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6235 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6236 reported source locations are bogus. */
6238 static void
6239 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6240 const char *err)
6242 HOST_WIDE_INT lane;
6244 gcc_assert (GET_CODE (operand) == CONST_INT);
6246 lane = INTVAL (operand);
6248 if (lane < low || lane >= high)
6249 error (err);
6252 /* Bounds-check lanes. */
6254 void
6255 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6257 bounds_check (operand, low, high, "lane out of range");
6260 /* Bounds-check constants. */
6262 void
6263 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6265 bounds_check (operand, low, high, "constant out of range");
6268 HOST_WIDE_INT
6269 neon_element_bits (enum machine_mode mode)
6271 if (mode == DImode)
6272 return GET_MODE_BITSIZE (mode);
6273 else
6274 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6278 /* Predicates for `match_operand' and `match_operator'. */
6280 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6282 cirrus_memory_offset (rtx op)
6284 /* Reject eliminable registers. */
6285 if (! (reload_in_progress || reload_completed)
6286 && ( reg_mentioned_p (frame_pointer_rtx, op)
6287 || reg_mentioned_p (arg_pointer_rtx, op)
6288 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6289 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6290 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6291 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6292 return 0;
6294 if (GET_CODE (op) == MEM)
6296 rtx ind;
6298 ind = XEXP (op, 0);
6300 /* Match: (mem (reg)). */
6301 if (GET_CODE (ind) == REG)
6302 return 1;
6304 /* Match:
6305 (mem (plus (reg)
6306 (const))). */
6307 if (GET_CODE (ind) == PLUS
6308 && GET_CODE (XEXP (ind, 0)) == REG
6309 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6310 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6311 return 1;
6314 return 0;
6317 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6318 WB is true if full writeback address modes are allowed and is false
6319 if limited writeback address modes (POST_INC and PRE_DEC) are
6320 allowed. */
6323 arm_coproc_mem_operand (rtx op, bool wb)
6325 rtx ind;
6327 /* Reject eliminable registers. */
6328 if (! (reload_in_progress || reload_completed)
6329 && ( reg_mentioned_p (frame_pointer_rtx, op)
6330 || reg_mentioned_p (arg_pointer_rtx, op)
6331 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6332 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6333 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6334 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6335 return FALSE;
6337 /* Constants are converted into offsets from labels. */
6338 if (GET_CODE (op) != MEM)
6339 return FALSE;
6341 ind = XEXP (op, 0);
6343 if (reload_completed
6344 && (GET_CODE (ind) == LABEL_REF
6345 || (GET_CODE (ind) == CONST
6346 && GET_CODE (XEXP (ind, 0)) == PLUS
6347 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6348 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6349 return TRUE;
6351 /* Match: (mem (reg)). */
6352 if (GET_CODE (ind) == REG)
6353 return arm_address_register_rtx_p (ind, 0);
6355 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6356 acceptable in any case (subject to verification by
6357 arm_address_register_rtx_p). We need WB to be true to accept
6358 PRE_INC and POST_DEC. */
6359 if (GET_CODE (ind) == POST_INC
6360 || GET_CODE (ind) == PRE_DEC
6361 || (wb
6362 && (GET_CODE (ind) == PRE_INC
6363 || GET_CODE (ind) == POST_DEC)))
6364 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6366 if (wb
6367 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6368 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6369 && GET_CODE (XEXP (ind, 1)) == PLUS
6370 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6371 ind = XEXP (ind, 1);
6373 /* Match:
6374 (plus (reg)
6375 (const)). */
6376 if (GET_CODE (ind) == PLUS
6377 && GET_CODE (XEXP (ind, 0)) == REG
6378 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6379 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6380 && INTVAL (XEXP (ind, 1)) > -1024
6381 && INTVAL (XEXP (ind, 1)) < 1024
6382 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6383 return TRUE;
6385 return FALSE;
6388 /* Return TRUE if OP is a memory operand which we can load or store a vector
6389 to/from. If CORE is true, we're moving from ARM registers not Neon
6390 registers. */
6392 neon_vector_mem_operand (rtx op, bool core)
6394 rtx ind;
6396 /* Reject eliminable registers. */
6397 if (! (reload_in_progress || reload_completed)
6398 && ( reg_mentioned_p (frame_pointer_rtx, op)
6399 || reg_mentioned_p (arg_pointer_rtx, op)
6400 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6401 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6402 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6403 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6404 return FALSE;
6406 /* Constants are converted into offsets from labels. */
6407 if (GET_CODE (op) != MEM)
6408 return FALSE;
6410 ind = XEXP (op, 0);
6412 if (reload_completed
6413 && (GET_CODE (ind) == LABEL_REF
6414 || (GET_CODE (ind) == CONST
6415 && GET_CODE (XEXP (ind, 0)) == PLUS
6416 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6417 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6418 return TRUE;
6420 /* Match: (mem (reg)). */
6421 if (GET_CODE (ind) == REG)
6422 return arm_address_register_rtx_p (ind, 0);
6424 /* Allow post-increment with Neon registers. */
6425 if (!core && GET_CODE (ind) == POST_INC)
6426 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6428 #if 0
6429 /* FIXME: We can support this too if we use VLD1/VST1. */
6430 if (!core
6431 && GET_CODE (ind) == POST_MODIFY
6432 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6433 && GET_CODE (XEXP (ind, 1)) == PLUS
6434 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6435 ind = XEXP (ind, 1);
6436 #endif
6438 /* Match:
6439 (plus (reg)
6440 (const)). */
6441 if (!core
6442 && GET_CODE (ind) == PLUS
6443 && GET_CODE (XEXP (ind, 0)) == REG
6444 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6445 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6446 && INTVAL (XEXP (ind, 1)) > -1024
6447 && INTVAL (XEXP (ind, 1)) < 1016
6448 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6449 return TRUE;
6451 return FALSE;
6454 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6455 type. */
6457 neon_struct_mem_operand (rtx op)
6459 rtx ind;
6461 /* Reject eliminable registers. */
6462 if (! (reload_in_progress || reload_completed)
6463 && ( reg_mentioned_p (frame_pointer_rtx, op)
6464 || reg_mentioned_p (arg_pointer_rtx, op)
6465 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6466 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6467 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6468 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6469 return FALSE;
6471 /* Constants are converted into offsets from labels. */
6472 if (GET_CODE (op) != MEM)
6473 return FALSE;
6475 ind = XEXP (op, 0);
6477 if (reload_completed
6478 && (GET_CODE (ind) == LABEL_REF
6479 || (GET_CODE (ind) == CONST
6480 && GET_CODE (XEXP (ind, 0)) == PLUS
6481 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6482 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6483 return TRUE;
6485 /* Match: (mem (reg)). */
6486 if (GET_CODE (ind) == REG)
6487 return arm_address_register_rtx_p (ind, 0);
6489 return FALSE;
6492 /* Return true if X is a register that will be eliminated later on. */
6494 arm_eliminable_register (rtx x)
6496 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
6497 || REGNO (x) == ARG_POINTER_REGNUM
6498 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
6499 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
6502 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6503 coprocessor registers. Otherwise return NO_REGS. */
6505 enum reg_class
6506 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
6508 if (TARGET_NEON
6509 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6510 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6511 && neon_vector_mem_operand (x, FALSE))
6512 return NO_REGS;
6514 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
6515 return NO_REGS;
6517 return GENERAL_REGS;
6520 /* Values which must be returned in the most-significant end of the return
6521 register. */
6523 static bool
6524 arm_return_in_msb (const_tree valtype)
6526 return (TARGET_AAPCS_BASED
6527 && BYTES_BIG_ENDIAN
6528 && (AGGREGATE_TYPE_P (valtype)
6529 || TREE_CODE (valtype) == COMPLEX_TYPE));
6532 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6533 Use by the Cirrus Maverick code which has to workaround
6534 a hardware bug triggered by such instructions. */
6535 static bool
6536 arm_memory_load_p (rtx insn)
6538 rtx body, lhs, rhs;;
6540 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
6541 return false;
6543 body = PATTERN (insn);
6545 if (GET_CODE (body) != SET)
6546 return false;
6548 lhs = XEXP (body, 0);
6549 rhs = XEXP (body, 1);
6551 lhs = REG_OR_SUBREG_RTX (lhs);
6553 /* If the destination is not a general purpose
6554 register we do not have to worry. */
6555 if (GET_CODE (lhs) != REG
6556 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
6557 return false;
6559 /* As well as loads from memory we also have to react
6560 to loads of invalid constants which will be turned
6561 into loads from the minipool. */
6562 return (GET_CODE (rhs) == MEM
6563 || GET_CODE (rhs) == SYMBOL_REF
6564 || note_invalid_constants (insn, -1, false));
6567 /* Return TRUE if INSN is a Cirrus instruction. */
6568 static bool
6569 arm_cirrus_insn_p (rtx insn)
6571 enum attr_cirrus attr;
6573 /* get_attr cannot accept USE or CLOBBER. */
6574 if (!insn
6575 || GET_CODE (insn) != INSN
6576 || GET_CODE (PATTERN (insn)) == USE
6577 || GET_CODE (PATTERN (insn)) == CLOBBER)
6578 return 0;
6580 attr = get_attr_cirrus (insn);
6582 return attr != CIRRUS_NOT;
6585 /* Cirrus reorg for invalid instruction combinations. */
6586 static void
6587 cirrus_reorg (rtx first)
6589 enum attr_cirrus attr;
6590 rtx body = PATTERN (first);
6591 rtx t;
6592 int nops;
6594 /* Any branch must be followed by 2 non Cirrus instructions. */
6595 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
6597 nops = 0;
6598 t = next_nonnote_insn (first);
6600 if (arm_cirrus_insn_p (t))
6601 ++ nops;
6603 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6604 ++ nops;
6606 while (nops --)
6607 emit_insn_after (gen_nop (), first);
6609 return;
6612 /* (float (blah)) is in parallel with a clobber. */
6613 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
6614 body = XVECEXP (body, 0, 0);
6616 if (GET_CODE (body) == SET)
6618 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
6620 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6621 be followed by a non Cirrus insn. */
6622 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
6624 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
6625 emit_insn_after (gen_nop (), first);
6627 return;
6629 else if (arm_memory_load_p (first))
6631 unsigned int arm_regno;
6633 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6634 ldr/cfmv64hr combination where the Rd field is the same
6635 in both instructions must be split with a non Cirrus
6636 insn. Example:
6638 ldr r0, blah
6640 cfmvsr mvf0, r0. */
6642 /* Get Arm register number for ldr insn. */
6643 if (GET_CODE (lhs) == REG)
6644 arm_regno = REGNO (lhs);
6645 else
6647 gcc_assert (GET_CODE (rhs) == REG);
6648 arm_regno = REGNO (rhs);
6651 /* Next insn. */
6652 first = next_nonnote_insn (first);
6654 if (! arm_cirrus_insn_p (first))
6655 return;
6657 body = PATTERN (first);
6659 /* (float (blah)) is in parallel with a clobber. */
6660 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
6661 body = XVECEXP (body, 0, 0);
6663 if (GET_CODE (body) == FLOAT)
6664 body = XEXP (body, 0);
6666 if (get_attr_cirrus (first) == CIRRUS_MOVE
6667 && GET_CODE (XEXP (body, 1)) == REG
6668 && arm_regno == REGNO (XEXP (body, 1)))
6669 emit_insn_after (gen_nop (), first);
6671 return;
6675 /* get_attr cannot accept USE or CLOBBER. */
6676 if (!first
6677 || GET_CODE (first) != INSN
6678 || GET_CODE (PATTERN (first)) == USE
6679 || GET_CODE (PATTERN (first)) == CLOBBER)
6680 return;
6682 attr = get_attr_cirrus (first);
6684 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6685 must be followed by a non-coprocessor instruction. */
6686 if (attr == CIRRUS_COMPARE)
6688 nops = 0;
6690 t = next_nonnote_insn (first);
6692 if (arm_cirrus_insn_p (t))
6693 ++ nops;
6695 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6696 ++ nops;
6698 while (nops --)
6699 emit_insn_after (gen_nop (), first);
6701 return;
6705 /* Return TRUE if X references a SYMBOL_REF. */
6707 symbol_mentioned_p (rtx x)
6709 const char * fmt;
6710 int i;
6712 if (GET_CODE (x) == SYMBOL_REF)
6713 return 1;
6715 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6716 are constant offsets, not symbols. */
6717 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6718 return 0;
6720 fmt = GET_RTX_FORMAT (GET_CODE (x));
6722 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6724 if (fmt[i] == 'E')
6726 int j;
6728 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6729 if (symbol_mentioned_p (XVECEXP (x, i, j)))
6730 return 1;
6732 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
6733 return 1;
6736 return 0;
6739 /* Return TRUE if X references a LABEL_REF. */
6741 label_mentioned_p (rtx x)
6743 const char * fmt;
6744 int i;
6746 if (GET_CODE (x) == LABEL_REF)
6747 return 1;
6749 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6750 instruction, but they are constant offsets, not symbols. */
6751 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6752 return 0;
6754 fmt = GET_RTX_FORMAT (GET_CODE (x));
6755 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6757 if (fmt[i] == 'E')
6759 int j;
6761 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6762 if (label_mentioned_p (XVECEXP (x, i, j)))
6763 return 1;
6765 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
6766 return 1;
6769 return 0;
6773 tls_mentioned_p (rtx x)
6775 switch (GET_CODE (x))
6777 case CONST:
6778 return tls_mentioned_p (XEXP (x, 0));
6780 case UNSPEC:
6781 if (XINT (x, 1) == UNSPEC_TLS)
6782 return 1;
6784 default:
6785 return 0;
6789 /* Must not copy a SET whose source operand is PC-relative. */
6791 static bool
6792 arm_cannot_copy_insn_p (rtx insn)
6794 rtx pat = PATTERN (insn);
6796 if (GET_CODE (pat) == SET)
6798 rtx rhs = SET_SRC (pat);
6800 if (GET_CODE (rhs) == UNSPEC
6801 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
6802 return TRUE;
6804 if (GET_CODE (rhs) == MEM
6805 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
6806 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
6807 return TRUE;
6810 return FALSE;
6813 enum rtx_code
6814 minmax_code (rtx x)
6816 enum rtx_code code = GET_CODE (x);
6818 switch (code)
6820 case SMAX:
6821 return GE;
6822 case SMIN:
6823 return LE;
6824 case UMIN:
6825 return LEU;
6826 case UMAX:
6827 return GEU;
6828 default:
6829 gcc_unreachable ();
6833 /* Return 1 if memory locations are adjacent. */
6835 adjacent_mem_locations (rtx a, rtx b)
6837 /* We don't guarantee to preserve the order of these memory refs. */
6838 if (volatile_refs_p (a) || volatile_refs_p (b))
6839 return 0;
6841 if ((GET_CODE (XEXP (a, 0)) == REG
6842 || (GET_CODE (XEXP (a, 0)) == PLUS
6843 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
6844 && (GET_CODE (XEXP (b, 0)) == REG
6845 || (GET_CODE (XEXP (b, 0)) == PLUS
6846 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
6848 HOST_WIDE_INT val0 = 0, val1 = 0;
6849 rtx reg0, reg1;
6850 int val_diff;
6852 if (GET_CODE (XEXP (a, 0)) == PLUS)
6854 reg0 = XEXP (XEXP (a, 0), 0);
6855 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
6857 else
6858 reg0 = XEXP (a, 0);
6860 if (GET_CODE (XEXP (b, 0)) == PLUS)
6862 reg1 = XEXP (XEXP (b, 0), 0);
6863 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
6865 else
6866 reg1 = XEXP (b, 0);
6868 /* Don't accept any offset that will require multiple
6869 instructions to handle, since this would cause the
6870 arith_adjacentmem pattern to output an overlong sequence. */
6871 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
6872 return 0;
6874 /* Don't allow an eliminable register: register elimination can make
6875 the offset too large. */
6876 if (arm_eliminable_register (reg0))
6877 return 0;
6879 val_diff = val1 - val0;
6881 if (arm_ld_sched)
6883 /* If the target has load delay slots, then there's no benefit
6884 to using an ldm instruction unless the offset is zero and
6885 we are optimizing for size. */
6886 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
6887 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
6888 && (val_diff == 4 || val_diff == -4));
6891 return ((REGNO (reg0) == REGNO (reg1))
6892 && (val_diff == 4 || val_diff == -4));
6895 return 0;
6899 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6900 HOST_WIDE_INT *load_offset)
6902 int unsorted_regs[4];
6903 HOST_WIDE_INT unsorted_offsets[4];
6904 int order[4];
6905 int base_reg = -1;
6906 int i;
6908 /* Can only handle 2, 3, or 4 insns at present,
6909 though could be easily extended if required. */
6910 gcc_assert (nops >= 2 && nops <= 4);
6912 /* Loop over the operands and check that the memory references are
6913 suitable (i.e. immediate offsets from the same base register). At
6914 the same time, extract the target register, and the memory
6915 offsets. */
6916 for (i = 0; i < nops; i++)
6918 rtx reg;
6919 rtx offset;
6921 /* Convert a subreg of a mem into the mem itself. */
6922 if (GET_CODE (operands[nops + i]) == SUBREG)
6923 operands[nops + i] = alter_subreg (operands + (nops + i));
6925 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6927 /* Don't reorder volatile memory references; it doesn't seem worth
6928 looking for the case where the order is ok anyway. */
6929 if (MEM_VOLATILE_P (operands[nops + i]))
6930 return 0;
6932 offset = const0_rtx;
6934 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6935 || (GET_CODE (reg) == SUBREG
6936 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6937 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6938 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6939 == REG)
6940 || (GET_CODE (reg) == SUBREG
6941 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6942 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6943 == CONST_INT)))
6945 if (i == 0)
6947 base_reg = REGNO (reg);
6948 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6949 ? REGNO (operands[i])
6950 : REGNO (SUBREG_REG (operands[i])));
6951 order[0] = 0;
6953 else
6955 if (base_reg != (int) REGNO (reg))
6956 /* Not addressed from the same base register. */
6957 return 0;
6959 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6960 ? REGNO (operands[i])
6961 : REGNO (SUBREG_REG (operands[i])));
6962 if (unsorted_regs[i] < unsorted_regs[order[0]])
6963 order[0] = i;
6966 /* If it isn't an integer register, or if it overwrites the
6967 base register but isn't the last insn in the list, then
6968 we can't do this. */
6969 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
6970 || (i != nops - 1 && unsorted_regs[i] == base_reg))
6971 return 0;
6973 unsorted_offsets[i] = INTVAL (offset);
6975 else
6976 /* Not a suitable memory address. */
6977 return 0;
6980 /* All the useful information has now been extracted from the
6981 operands into unsorted_regs and unsorted_offsets; additionally,
6982 order[0] has been set to the lowest numbered register in the
6983 list. Sort the registers into order, and check that the memory
6984 offsets are ascending and adjacent. */
6986 for (i = 1; i < nops; i++)
6988 int j;
6990 order[i] = order[i - 1];
6991 for (j = 0; j < nops; j++)
6992 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6993 && (order[i] == order[i - 1]
6994 || unsorted_regs[j] < unsorted_regs[order[i]]))
6995 order[i] = j;
6997 /* Have we found a suitable register? if not, one must be used more
6998 than once. */
6999 if (order[i] == order[i - 1])
7000 return 0;
7002 /* Is the memory address adjacent and ascending? */
7003 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7004 return 0;
7007 if (base)
7009 *base = base_reg;
7011 for (i = 0; i < nops; i++)
7012 regs[i] = unsorted_regs[order[i]];
7014 *load_offset = unsorted_offsets[order[0]];
7017 if (unsorted_offsets[order[0]] == 0)
7018 return 1; /* ldmia */
7020 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7021 return 2; /* ldmib */
7023 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7024 return 3; /* ldmda */
7026 if (unsorted_offsets[order[nops - 1]] == -4)
7027 return 4; /* ldmdb */
7029 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7030 if the offset isn't small enough. The reason 2 ldrs are faster
7031 is because these ARMs are able to do more than one cache access
7032 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7033 whilst the ARM8 has a double bandwidth cache. This means that
7034 these cores can do both an instruction fetch and a data fetch in
7035 a single cycle, so the trick of calculating the address into a
7036 scratch register (one of the result regs) and then doing a load
7037 multiple actually becomes slower (and no smaller in code size).
7038 That is the transformation
7040 ldr rd1, [rbase + offset]
7041 ldr rd2, [rbase + offset + 4]
7045 add rd1, rbase, offset
7046 ldmia rd1, {rd1, rd2}
7048 produces worse code -- '3 cycles + any stalls on rd2' instead of
7049 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7050 access per cycle, the first sequence could never complete in less
7051 than 6 cycles, whereas the ldm sequence would only take 5 and
7052 would make better use of sequential accesses if not hitting the
7053 cache.
7055 We cheat here and test 'arm_ld_sched' which we currently know to
7056 only be true for the ARM8, ARM9 and StrongARM. If this ever
7057 changes, then the test below needs to be reworked. */
7058 if (nops == 2 && arm_ld_sched)
7059 return 0;
7061 /* Can't do it without setting up the offset, only do this if it takes
7062 no more than one insn. */
7063 return (const_ok_for_arm (unsorted_offsets[order[0]])
7064 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7067 const char *
7068 emit_ldm_seq (rtx *operands, int nops)
7070 int regs[4];
7071 int base_reg;
7072 HOST_WIDE_INT offset;
7073 char buf[100];
7074 int i;
7076 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7078 case 1:
7079 strcpy (buf, "ldm%(ia%)\t");
7080 break;
7082 case 2:
7083 strcpy (buf, "ldm%(ib%)\t");
7084 break;
7086 case 3:
7087 strcpy (buf, "ldm%(da%)\t");
7088 break;
7090 case 4:
7091 strcpy (buf, "ldm%(db%)\t");
7092 break;
7094 case 5:
7095 if (offset >= 0)
7096 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7097 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7098 (long) offset);
7099 else
7100 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7101 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7102 (long) -offset);
7103 output_asm_insn (buf, operands);
7104 base_reg = regs[0];
7105 strcpy (buf, "ldm%(ia%)\t");
7106 break;
7108 default:
7109 gcc_unreachable ();
7112 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7113 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7115 for (i = 1; i < nops; i++)
7116 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7117 reg_names[regs[i]]);
7119 strcat (buf, "}\t%@ phole ldm");
7121 output_asm_insn (buf, operands);
7122 return "";
7126 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7127 HOST_WIDE_INT * load_offset)
7129 int unsorted_regs[4];
7130 HOST_WIDE_INT unsorted_offsets[4];
7131 int order[4];
7132 int base_reg = -1;
7133 int i;
7135 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7136 extended if required. */
7137 gcc_assert (nops >= 2 && nops <= 4);
7139 /* Loop over the operands and check that the memory references are
7140 suitable (i.e. immediate offsets from the same base register). At
7141 the same time, extract the target register, and the memory
7142 offsets. */
7143 for (i = 0; i < nops; i++)
7145 rtx reg;
7146 rtx offset;
7148 /* Convert a subreg of a mem into the mem itself. */
7149 if (GET_CODE (operands[nops + i]) == SUBREG)
7150 operands[nops + i] = alter_subreg (operands + (nops + i));
7152 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7154 /* Don't reorder volatile memory references; it doesn't seem worth
7155 looking for the case where the order is ok anyway. */
7156 if (MEM_VOLATILE_P (operands[nops + i]))
7157 return 0;
7159 offset = const0_rtx;
7161 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7162 || (GET_CODE (reg) == SUBREG
7163 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7164 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7165 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7166 == REG)
7167 || (GET_CODE (reg) == SUBREG
7168 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7169 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7170 == CONST_INT)))
7172 if (i == 0)
7174 base_reg = REGNO (reg);
7175 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7176 ? REGNO (operands[i])
7177 : REGNO (SUBREG_REG (operands[i])));
7178 order[0] = 0;
7180 else
7182 if (base_reg != (int) REGNO (reg))
7183 /* Not addressed from the same base register. */
7184 return 0;
7186 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7187 ? REGNO (operands[i])
7188 : REGNO (SUBREG_REG (operands[i])));
7189 if (unsorted_regs[i] < unsorted_regs[order[0]])
7190 order[0] = i;
7193 /* If it isn't an integer register, then we can't do this. */
7194 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7195 return 0;
7197 unsorted_offsets[i] = INTVAL (offset);
7199 else
7200 /* Not a suitable memory address. */
7201 return 0;
7204 /* All the useful information has now been extracted from the
7205 operands into unsorted_regs and unsorted_offsets; additionally,
7206 order[0] has been set to the lowest numbered register in the
7207 list. Sort the registers into order, and check that the memory
7208 offsets are ascending and adjacent. */
7210 for (i = 1; i < nops; i++)
7212 int j;
7214 order[i] = order[i - 1];
7215 for (j = 0; j < nops; j++)
7216 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7217 && (order[i] == order[i - 1]
7218 || unsorted_regs[j] < unsorted_regs[order[i]]))
7219 order[i] = j;
7221 /* Have we found a suitable register? if not, one must be used more
7222 than once. */
7223 if (order[i] == order[i - 1])
7224 return 0;
7226 /* Is the memory address adjacent and ascending? */
7227 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7228 return 0;
7231 if (base)
7233 *base = base_reg;
7235 for (i = 0; i < nops; i++)
7236 regs[i] = unsorted_regs[order[i]];
7238 *load_offset = unsorted_offsets[order[0]];
7241 if (unsorted_offsets[order[0]] == 0)
7242 return 1; /* stmia */
7244 if (unsorted_offsets[order[0]] == 4)
7245 return 2; /* stmib */
7247 if (unsorted_offsets[order[nops - 1]] == 0)
7248 return 3; /* stmda */
7250 if (unsorted_offsets[order[nops - 1]] == -4)
7251 return 4; /* stmdb */
7253 return 0;
7256 const char *
7257 emit_stm_seq (rtx *operands, int nops)
7259 int regs[4];
7260 int base_reg;
7261 HOST_WIDE_INT offset;
7262 char buf[100];
7263 int i;
7265 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7267 case 1:
7268 strcpy (buf, "stm%(ia%)\t");
7269 break;
7271 case 2:
7272 strcpy (buf, "stm%(ib%)\t");
7273 break;
7275 case 3:
7276 strcpy (buf, "stm%(da%)\t");
7277 break;
7279 case 4:
7280 strcpy (buf, "stm%(db%)\t");
7281 break;
7283 default:
7284 gcc_unreachable ();
7287 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7288 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7290 for (i = 1; i < nops; i++)
7291 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7292 reg_names[regs[i]]);
7294 strcat (buf, "}\t%@ phole stm");
7296 output_asm_insn (buf, operands);
7297 return "";
7300 /* Routines for use in generating RTL. */
7303 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7304 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7306 HOST_WIDE_INT offset = *offsetp;
7307 int i = 0, j;
7308 rtx result;
7309 int sign = up ? 1 : -1;
7310 rtx mem, addr;
7312 /* XScale has load-store double instructions, but they have stricter
7313 alignment requirements than load-store multiple, so we cannot
7314 use them.
7316 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7317 the pipeline until completion.
7319 NREGS CYCLES
7325 An ldr instruction takes 1-3 cycles, but does not block the
7326 pipeline.
7328 NREGS CYCLES
7329 1 1-3
7330 2 2-6
7331 3 3-9
7332 4 4-12
7334 Best case ldr will always win. However, the more ldr instructions
7335 we issue, the less likely we are to be able to schedule them well.
7336 Using ldr instructions also increases code size.
7338 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7339 for counts of 3 or 4 regs. */
7340 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7342 rtx seq;
7344 start_sequence ();
7346 for (i = 0; i < count; i++)
7348 addr = plus_constant (from, i * 4 * sign);
7349 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7350 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7351 offset += 4 * sign;
7354 if (write_back)
7356 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7357 *offsetp = offset;
7360 seq = get_insns ();
7361 end_sequence ();
7363 return seq;
7366 result = gen_rtx_PARALLEL (VOIDmode,
7367 rtvec_alloc (count + (write_back ? 1 : 0)));
7368 if (write_back)
7370 XVECEXP (result, 0, 0)
7371 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7372 i = 1;
7373 count++;
7376 for (j = 0; i < count; i++, j++)
7378 addr = plus_constant (from, j * 4 * sign);
7379 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7380 XVECEXP (result, 0, i)
7381 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7382 offset += 4 * sign;
7385 if (write_back)
7386 *offsetp = offset;
7388 return result;
7392 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7393 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7395 HOST_WIDE_INT offset = *offsetp;
7396 int i = 0, j;
7397 rtx result;
7398 int sign = up ? 1 : -1;
7399 rtx mem, addr;
7401 /* See arm_gen_load_multiple for discussion of
7402 the pros/cons of ldm/stm usage for XScale. */
7403 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7405 rtx seq;
7407 start_sequence ();
7409 for (i = 0; i < count; i++)
7411 addr = plus_constant (to, i * 4 * sign);
7412 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7413 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7414 offset += 4 * sign;
7417 if (write_back)
7419 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7420 *offsetp = offset;
7423 seq = get_insns ();
7424 end_sequence ();
7426 return seq;
7429 result = gen_rtx_PARALLEL (VOIDmode,
7430 rtvec_alloc (count + (write_back ? 1 : 0)));
7431 if (write_back)
7433 XVECEXP (result, 0, 0)
7434 = gen_rtx_SET (VOIDmode, to,
7435 plus_constant (to, count * 4 * sign));
7436 i = 1;
7437 count++;
7440 for (j = 0; i < count; i++, j++)
7442 addr = plus_constant (to, j * 4 * sign);
7443 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7444 XVECEXP (result, 0, i)
7445 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7446 offset += 4 * sign;
7449 if (write_back)
7450 *offsetp = offset;
7452 return result;
7456 arm_gen_movmemqi (rtx *operands)
7458 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7459 HOST_WIDE_INT srcoffset, dstoffset;
7460 int i;
7461 rtx src, dst, srcbase, dstbase;
7462 rtx part_bytes_reg = NULL;
7463 rtx mem;
7465 if (GET_CODE (operands[2]) != CONST_INT
7466 || GET_CODE (operands[3]) != CONST_INT
7467 || INTVAL (operands[2]) > 64
7468 || INTVAL (operands[3]) & 3)
7469 return 0;
7471 dstbase = operands[0];
7472 srcbase = operands[1];
7474 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
7475 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
7477 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
7478 out_words_to_go = INTVAL (operands[2]) / 4;
7479 last_bytes = INTVAL (operands[2]) & 3;
7480 dstoffset = srcoffset = 0;
7482 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
7483 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
7485 for (i = 0; in_words_to_go >= 2; i+=4)
7487 if (in_words_to_go > 4)
7488 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
7489 srcbase, &srcoffset));
7490 else
7491 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
7492 FALSE, srcbase, &srcoffset));
7494 if (out_words_to_go)
7496 if (out_words_to_go > 4)
7497 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
7498 dstbase, &dstoffset));
7499 else if (out_words_to_go != 1)
7500 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
7501 dst, TRUE,
7502 (last_bytes == 0
7503 ? FALSE : TRUE),
7504 dstbase, &dstoffset));
7505 else
7507 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7508 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
7509 if (last_bytes != 0)
7511 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
7512 dstoffset += 4;
7517 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
7518 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
7521 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7522 if (out_words_to_go)
7524 rtx sreg;
7526 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7527 sreg = copy_to_reg (mem);
7529 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7530 emit_move_insn (mem, sreg);
7531 in_words_to_go--;
7533 gcc_assert (!in_words_to_go); /* Sanity check */
7536 if (in_words_to_go)
7538 gcc_assert (in_words_to_go > 0);
7540 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7541 part_bytes_reg = copy_to_mode_reg (SImode, mem);
7544 gcc_assert (!last_bytes || part_bytes_reg);
7546 if (BYTES_BIG_ENDIAN && last_bytes)
7548 rtx tmp = gen_reg_rtx (SImode);
7550 /* The bytes we want are in the top end of the word. */
7551 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
7552 GEN_INT (8 * (4 - last_bytes))));
7553 part_bytes_reg = tmp;
7555 while (last_bytes)
7557 mem = adjust_automodify_address (dstbase, QImode,
7558 plus_constant (dst, last_bytes - 1),
7559 dstoffset + last_bytes - 1);
7560 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7562 if (--last_bytes)
7564 tmp = gen_reg_rtx (SImode);
7565 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
7566 part_bytes_reg = tmp;
7571 else
7573 if (last_bytes > 1)
7575 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
7576 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
7577 last_bytes -= 2;
7578 if (last_bytes)
7580 rtx tmp = gen_reg_rtx (SImode);
7581 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
7582 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
7583 part_bytes_reg = tmp;
7584 dstoffset += 2;
7588 if (last_bytes)
7590 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
7591 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7595 return 1;
7598 /* Select a dominance comparison mode if possible for a test of the general
7599 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7600 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7601 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7602 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7603 In all cases OP will be either EQ or NE, but we don't need to know which
7604 here. If we are unable to support a dominance comparison we return
7605 CC mode. This will then fail to match for the RTL expressions that
7606 generate this call. */
7607 enum machine_mode
7608 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
7610 enum rtx_code cond1, cond2;
7611 int swapped = 0;
7613 /* Currently we will probably get the wrong result if the individual
7614 comparisons are not simple. This also ensures that it is safe to
7615 reverse a comparison if necessary. */
7616 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
7617 != CCmode)
7618 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
7619 != CCmode))
7620 return CCmode;
7622 /* The if_then_else variant of this tests the second condition if the
7623 first passes, but is true if the first fails. Reverse the first
7624 condition to get a true "inclusive-or" expression. */
7625 if (cond_or == DOM_CC_NX_OR_Y)
7626 cond1 = reverse_condition (cond1);
7628 /* If the comparisons are not equal, and one doesn't dominate the other,
7629 then we can't do this. */
7630 if (cond1 != cond2
7631 && !comparison_dominates_p (cond1, cond2)
7632 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
7633 return CCmode;
7635 if (swapped)
7637 enum rtx_code temp = cond1;
7638 cond1 = cond2;
7639 cond2 = temp;
7642 switch (cond1)
7644 case EQ:
7645 if (cond_or == DOM_CC_X_AND_Y)
7646 return CC_DEQmode;
7648 switch (cond2)
7650 case EQ: return CC_DEQmode;
7651 case LE: return CC_DLEmode;
7652 case LEU: return CC_DLEUmode;
7653 case GE: return CC_DGEmode;
7654 case GEU: return CC_DGEUmode;
7655 default: gcc_unreachable ();
7658 case LT:
7659 if (cond_or == DOM_CC_X_AND_Y)
7660 return CC_DLTmode;
7662 switch (cond2)
7664 case LT:
7665 return CC_DLTmode;
7666 case LE:
7667 return CC_DLEmode;
7668 case NE:
7669 return CC_DNEmode;
7670 default:
7671 gcc_unreachable ();
7674 case GT:
7675 if (cond_or == DOM_CC_X_AND_Y)
7676 return CC_DGTmode;
7678 switch (cond2)
7680 case GT:
7681 return CC_DGTmode;
7682 case GE:
7683 return CC_DGEmode;
7684 case NE:
7685 return CC_DNEmode;
7686 default:
7687 gcc_unreachable ();
7690 case LTU:
7691 if (cond_or == DOM_CC_X_AND_Y)
7692 return CC_DLTUmode;
7694 switch (cond2)
7696 case LTU:
7697 return CC_DLTUmode;
7698 case LEU:
7699 return CC_DLEUmode;
7700 case NE:
7701 return CC_DNEmode;
7702 default:
7703 gcc_unreachable ();
7706 case GTU:
7707 if (cond_or == DOM_CC_X_AND_Y)
7708 return CC_DGTUmode;
7710 switch (cond2)
7712 case GTU:
7713 return CC_DGTUmode;
7714 case GEU:
7715 return CC_DGEUmode;
7716 case NE:
7717 return CC_DNEmode;
7718 default:
7719 gcc_unreachable ();
7722 /* The remaining cases only occur when both comparisons are the
7723 same. */
7724 case NE:
7725 gcc_assert (cond1 == cond2);
7726 return CC_DNEmode;
7728 case LE:
7729 gcc_assert (cond1 == cond2);
7730 return CC_DLEmode;
7732 case GE:
7733 gcc_assert (cond1 == cond2);
7734 return CC_DGEmode;
7736 case LEU:
7737 gcc_assert (cond1 == cond2);
7738 return CC_DLEUmode;
7740 case GEU:
7741 gcc_assert (cond1 == cond2);
7742 return CC_DGEUmode;
7744 default:
7745 gcc_unreachable ();
7749 enum machine_mode
7750 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
7752 /* All floating point compares return CCFP if it is an equality
7753 comparison, and CCFPE otherwise. */
7754 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
7756 switch (op)
7758 case EQ:
7759 case NE:
7760 case UNORDERED:
7761 case ORDERED:
7762 case UNLT:
7763 case UNLE:
7764 case UNGT:
7765 case UNGE:
7766 case UNEQ:
7767 case LTGT:
7768 return CCFPmode;
7770 case LT:
7771 case LE:
7772 case GT:
7773 case GE:
7774 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
7775 return CCFPmode;
7776 return CCFPEmode;
7778 default:
7779 gcc_unreachable ();
7783 /* A compare with a shifted operand. Because of canonicalization, the
7784 comparison will have to be swapped when we emit the assembler. */
7785 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
7786 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7787 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
7788 || GET_CODE (x) == ROTATERT))
7789 return CC_SWPmode;
7791 /* This operation is performed swapped, but since we only rely on the Z
7792 flag we don't need an additional mode. */
7793 if (GET_MODE (y) == SImode && REG_P (y)
7794 && GET_CODE (x) == NEG
7795 && (op == EQ || op == NE))
7796 return CC_Zmode;
7798 /* This is a special case that is used by combine to allow a
7799 comparison of a shifted byte load to be split into a zero-extend
7800 followed by a comparison of the shifted integer (only valid for
7801 equalities and unsigned inequalities). */
7802 if (GET_MODE (x) == SImode
7803 && GET_CODE (x) == ASHIFT
7804 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
7805 && GET_CODE (XEXP (x, 0)) == SUBREG
7806 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
7807 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
7808 && (op == EQ || op == NE
7809 || op == GEU || op == GTU || op == LTU || op == LEU)
7810 && GET_CODE (y) == CONST_INT)
7811 return CC_Zmode;
7813 /* A construct for a conditional compare, if the false arm contains
7814 0, then both conditions must be true, otherwise either condition
7815 must be true. Not all conditions are possible, so CCmode is
7816 returned if it can't be done. */
7817 if (GET_CODE (x) == IF_THEN_ELSE
7818 && (XEXP (x, 2) == const0_rtx
7819 || XEXP (x, 2) == const1_rtx)
7820 && COMPARISON_P (XEXP (x, 0))
7821 && COMPARISON_P (XEXP (x, 1)))
7822 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7823 INTVAL (XEXP (x, 2)));
7825 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7826 if (GET_CODE (x) == AND
7827 && COMPARISON_P (XEXP (x, 0))
7828 && COMPARISON_P (XEXP (x, 1)))
7829 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7830 DOM_CC_X_AND_Y);
7832 if (GET_CODE (x) == IOR
7833 && COMPARISON_P (XEXP (x, 0))
7834 && COMPARISON_P (XEXP (x, 1)))
7835 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7836 DOM_CC_X_OR_Y);
7838 /* An operation (on Thumb) where we want to test for a single bit.
7839 This is done by shifting that bit up into the top bit of a
7840 scratch register; we can then branch on the sign bit. */
7841 if (TARGET_THUMB1
7842 && GET_MODE (x) == SImode
7843 && (op == EQ || op == NE)
7844 && GET_CODE (x) == ZERO_EXTRACT
7845 && XEXP (x, 1) == const1_rtx)
7846 return CC_Nmode;
7848 /* An operation that sets the condition codes as a side-effect, the
7849 V flag is not set correctly, so we can only use comparisons where
7850 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7851 instead.) */
7852 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7853 if (GET_MODE (x) == SImode
7854 && y == const0_rtx
7855 && (op == EQ || op == NE || op == LT || op == GE)
7856 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
7857 || GET_CODE (x) == AND || GET_CODE (x) == IOR
7858 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
7859 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
7860 || GET_CODE (x) == LSHIFTRT
7861 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7862 || GET_CODE (x) == ROTATERT
7863 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
7864 return CC_NOOVmode;
7866 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
7867 return CC_Zmode;
7869 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
7870 && GET_CODE (x) == PLUS
7871 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
7872 return CC_Cmode;
7874 return CCmode;
7877 /* X and Y are two things to compare using CODE. Emit the compare insn and
7878 return the rtx for register 0 in the proper mode. FP means this is a
7879 floating point compare: I don't think that it is needed on the arm. */
7881 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
7883 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
7884 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
7886 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
7888 return cc_reg;
7891 /* Generate a sequence of insns that will generate the correct return
7892 address mask depending on the physical architecture that the program
7893 is running on. */
7895 arm_gen_return_addr_mask (void)
7897 rtx reg = gen_reg_rtx (Pmode);
7899 emit_insn (gen_return_addr_mask (reg));
7900 return reg;
7903 void
7904 arm_reload_in_hi (rtx *operands)
7906 rtx ref = operands[1];
7907 rtx base, scratch;
7908 HOST_WIDE_INT offset = 0;
7910 if (GET_CODE (ref) == SUBREG)
7912 offset = SUBREG_BYTE (ref);
7913 ref = SUBREG_REG (ref);
7916 if (GET_CODE (ref) == REG)
7918 /* We have a pseudo which has been spilt onto the stack; there
7919 are two cases here: the first where there is a simple
7920 stack-slot replacement and a second where the stack-slot is
7921 out of range, or is used as a subreg. */
7922 if (reg_equiv_mem[REGNO (ref)])
7924 ref = reg_equiv_mem[REGNO (ref)];
7925 base = find_replacement (&XEXP (ref, 0));
7927 else
7928 /* The slot is out of range, or was dressed up in a SUBREG. */
7929 base = reg_equiv_address[REGNO (ref)];
7931 else
7932 base = find_replacement (&XEXP (ref, 0));
7934 /* Handle the case where the address is too complex to be offset by 1. */
7935 if (GET_CODE (base) == MINUS
7936 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
7938 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7940 emit_set_insn (base_plus, base);
7941 base = base_plus;
7943 else if (GET_CODE (base) == PLUS)
7945 /* The addend must be CONST_INT, or we would have dealt with it above. */
7946 HOST_WIDE_INT hi, lo;
7948 offset += INTVAL (XEXP (base, 1));
7949 base = XEXP (base, 0);
7951 /* Rework the address into a legal sequence of insns. */
7952 /* Valid range for lo is -4095 -> 4095 */
7953 lo = (offset >= 0
7954 ? (offset & 0xfff)
7955 : -((-offset) & 0xfff));
7957 /* Corner case, if lo is the max offset then we would be out of range
7958 once we have added the additional 1 below, so bump the msb into the
7959 pre-loading insn(s). */
7960 if (lo == 4095)
7961 lo &= 0x7ff;
7963 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7964 ^ (HOST_WIDE_INT) 0x80000000)
7965 - (HOST_WIDE_INT) 0x80000000);
7967 gcc_assert (hi + lo == offset);
7969 if (hi != 0)
7971 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7973 /* Get the base address; addsi3 knows how to handle constants
7974 that require more than one insn. */
7975 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7976 base = base_plus;
7977 offset = lo;
7981 /* Operands[2] may overlap operands[0] (though it won't overlap
7982 operands[1]), that's why we asked for a DImode reg -- so we can
7983 use the bit that does not overlap. */
7984 if (REGNO (operands[2]) == REGNO (operands[0]))
7985 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7986 else
7987 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
7989 emit_insn (gen_zero_extendqisi2 (scratch,
7990 gen_rtx_MEM (QImode,
7991 plus_constant (base,
7992 offset))));
7993 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
7994 gen_rtx_MEM (QImode,
7995 plus_constant (base,
7996 offset + 1))));
7997 if (!BYTES_BIG_ENDIAN)
7998 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7999 gen_rtx_IOR (SImode,
8000 gen_rtx_ASHIFT
8001 (SImode,
8002 gen_rtx_SUBREG (SImode, operands[0], 0),
8003 GEN_INT (8)),
8004 scratch));
8005 else
8006 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
8007 gen_rtx_IOR (SImode,
8008 gen_rtx_ASHIFT (SImode, scratch,
8009 GEN_INT (8)),
8010 gen_rtx_SUBREG (SImode, operands[0], 0)));
8013 /* Handle storing a half-word to memory during reload by synthesizing as two
8014 byte stores. Take care not to clobber the input values until after we
8015 have moved them somewhere safe. This code assumes that if the DImode
8016 scratch in operands[2] overlaps either the input value or output address
8017 in some way, then that value must die in this insn (we absolutely need
8018 two scratch registers for some corner cases). */
8019 void
8020 arm_reload_out_hi (rtx *operands)
8022 rtx ref = operands[0];
8023 rtx outval = operands[1];
8024 rtx base, scratch;
8025 HOST_WIDE_INT offset = 0;
8027 if (GET_CODE (ref) == SUBREG)
8029 offset = SUBREG_BYTE (ref);
8030 ref = SUBREG_REG (ref);
8033 if (GET_CODE (ref) == REG)
8035 /* We have a pseudo which has been spilt onto the stack; there
8036 are two cases here: the first where there is a simple
8037 stack-slot replacement and a second where the stack-slot is
8038 out of range, or is used as a subreg. */
8039 if (reg_equiv_mem[REGNO (ref)])
8041 ref = reg_equiv_mem[REGNO (ref)];
8042 base = find_replacement (&XEXP (ref, 0));
8044 else
8045 /* The slot is out of range, or was dressed up in a SUBREG. */
8046 base = reg_equiv_address[REGNO (ref)];
8048 else
8049 base = find_replacement (&XEXP (ref, 0));
8051 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8053 /* Handle the case where the address is too complex to be offset by 1. */
8054 if (GET_CODE (base) == MINUS
8055 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8057 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8059 /* Be careful not to destroy OUTVAL. */
8060 if (reg_overlap_mentioned_p (base_plus, outval))
8062 /* Updating base_plus might destroy outval, see if we can
8063 swap the scratch and base_plus. */
8064 if (!reg_overlap_mentioned_p (scratch, outval))
8066 rtx tmp = scratch;
8067 scratch = base_plus;
8068 base_plus = tmp;
8070 else
8072 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8074 /* Be conservative and copy OUTVAL into the scratch now,
8075 this should only be necessary if outval is a subreg
8076 of something larger than a word. */
8077 /* XXX Might this clobber base? I can't see how it can,
8078 since scratch is known to overlap with OUTVAL, and
8079 must be wider than a word. */
8080 emit_insn (gen_movhi (scratch_hi, outval));
8081 outval = scratch_hi;
8085 emit_set_insn (base_plus, base);
8086 base = base_plus;
8088 else if (GET_CODE (base) == PLUS)
8090 /* The addend must be CONST_INT, or we would have dealt with it above. */
8091 HOST_WIDE_INT hi, lo;
8093 offset += INTVAL (XEXP (base, 1));
8094 base = XEXP (base, 0);
8096 /* Rework the address into a legal sequence of insns. */
8097 /* Valid range for lo is -4095 -> 4095 */
8098 lo = (offset >= 0
8099 ? (offset & 0xfff)
8100 : -((-offset) & 0xfff));
8102 /* Corner case, if lo is the max offset then we would be out of range
8103 once we have added the additional 1 below, so bump the msb into the
8104 pre-loading insn(s). */
8105 if (lo == 4095)
8106 lo &= 0x7ff;
8108 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8109 ^ (HOST_WIDE_INT) 0x80000000)
8110 - (HOST_WIDE_INT) 0x80000000);
8112 gcc_assert (hi + lo == offset);
8114 if (hi != 0)
8116 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8118 /* Be careful not to destroy OUTVAL. */
8119 if (reg_overlap_mentioned_p (base_plus, outval))
8121 /* Updating base_plus might destroy outval, see if we
8122 can swap the scratch and base_plus. */
8123 if (!reg_overlap_mentioned_p (scratch, outval))
8125 rtx tmp = scratch;
8126 scratch = base_plus;
8127 base_plus = tmp;
8129 else
8131 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8133 /* Be conservative and copy outval into scratch now,
8134 this should only be necessary if outval is a
8135 subreg of something larger than a word. */
8136 /* XXX Might this clobber base? I can't see how it
8137 can, since scratch is known to overlap with
8138 outval. */
8139 emit_insn (gen_movhi (scratch_hi, outval));
8140 outval = scratch_hi;
8144 /* Get the base address; addsi3 knows how to handle constants
8145 that require more than one insn. */
8146 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8147 base = base_plus;
8148 offset = lo;
8152 if (BYTES_BIG_ENDIAN)
8154 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8155 plus_constant (base, offset + 1)),
8156 gen_lowpart (QImode, outval)));
8157 emit_insn (gen_lshrsi3 (scratch,
8158 gen_rtx_SUBREG (SImode, outval, 0),
8159 GEN_INT (8)));
8160 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8161 gen_lowpart (QImode, scratch)));
8163 else
8165 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8166 gen_lowpart (QImode, outval)));
8167 emit_insn (gen_lshrsi3 (scratch,
8168 gen_rtx_SUBREG (SImode, outval, 0),
8169 GEN_INT (8)));
8170 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8171 plus_constant (base, offset + 1)),
8172 gen_lowpart (QImode, scratch)));
8176 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8177 (padded to the size of a word) should be passed in a register. */
8179 static bool
8180 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8182 if (TARGET_AAPCS_BASED)
8183 return must_pass_in_stack_var_size (mode, type);
8184 else
8185 return must_pass_in_stack_var_size_or_pad (mode, type);
8189 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8190 Return true if an argument passed on the stack should be padded upwards,
8191 i.e. if the least-significant byte has useful data.
8192 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8193 aggregate types are placed in the lowest memory address. */
8195 bool
8196 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8198 if (!TARGET_AAPCS_BASED)
8199 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8201 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8202 return false;
8204 return true;
8208 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8209 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8210 byte of the register has useful data, and return the opposite if the
8211 most significant byte does.
8212 For AAPCS, small aggregates and small complex types are always padded
8213 upwards. */
8215 bool
8216 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8217 tree type, int first ATTRIBUTE_UNUSED)
8219 if (TARGET_AAPCS_BASED
8220 && BYTES_BIG_ENDIAN
8221 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8222 && int_size_in_bytes (type) <= 4)
8223 return true;
8225 /* Otherwise, use default padding. */
8226 return !BYTES_BIG_ENDIAN;
8230 /* Print a symbolic form of X to the debug file, F. */
8231 static void
8232 arm_print_value (FILE *f, rtx x)
8234 switch (GET_CODE (x))
8236 case CONST_INT:
8237 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8238 return;
8240 case CONST_DOUBLE:
8241 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8242 return;
8244 case CONST_VECTOR:
8246 int i;
8248 fprintf (f, "<");
8249 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8251 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8252 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8253 fputc (',', f);
8255 fprintf (f, ">");
8257 return;
8259 case CONST_STRING:
8260 fprintf (f, "\"%s\"", XSTR (x, 0));
8261 return;
8263 case SYMBOL_REF:
8264 fprintf (f, "`%s'", XSTR (x, 0));
8265 return;
8267 case LABEL_REF:
8268 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8269 return;
8271 case CONST:
8272 arm_print_value (f, XEXP (x, 0));
8273 return;
8275 case PLUS:
8276 arm_print_value (f, XEXP (x, 0));
8277 fprintf (f, "+");
8278 arm_print_value (f, XEXP (x, 1));
8279 return;
8281 case PC:
8282 fprintf (f, "pc");
8283 return;
8285 default:
8286 fprintf (f, "????");
8287 return;
8291 /* Routines for manipulation of the constant pool. */
8293 /* Arm instructions cannot load a large constant directly into a
8294 register; they have to come from a pc relative load. The constant
8295 must therefore be placed in the addressable range of the pc
8296 relative load. Depending on the precise pc relative load
8297 instruction the range is somewhere between 256 bytes and 4k. This
8298 means that we often have to dump a constant inside a function, and
8299 generate code to branch around it.
8301 It is important to minimize this, since the branches will slow
8302 things down and make the code larger.
8304 Normally we can hide the table after an existing unconditional
8305 branch so that there is no interruption of the flow, but in the
8306 worst case the code looks like this:
8308 ldr rn, L1
8310 b L2
8311 align
8312 L1: .long value
8316 ldr rn, L3
8318 b L4
8319 align
8320 L3: .long value
8324 We fix this by performing a scan after scheduling, which notices
8325 which instructions need to have their operands fetched from the
8326 constant table and builds the table.
8328 The algorithm starts by building a table of all the constants that
8329 need fixing up and all the natural barriers in the function (places
8330 where a constant table can be dropped without breaking the flow).
8331 For each fixup we note how far the pc-relative replacement will be
8332 able to reach and the offset of the instruction into the function.
8334 Having built the table we then group the fixes together to form
8335 tables that are as large as possible (subject to addressing
8336 constraints) and emit each table of constants after the last
8337 barrier that is within range of all the instructions in the group.
8338 If a group does not contain a barrier, then we forcibly create one
8339 by inserting a jump instruction into the flow. Once the table has
8340 been inserted, the insns are then modified to reference the
8341 relevant entry in the pool.
8343 Possible enhancements to the algorithm (not implemented) are:
8345 1) For some processors and object formats, there may be benefit in
8346 aligning the pools to the start of cache lines; this alignment
8347 would need to be taken into account when calculating addressability
8348 of a pool. */
8350 /* These typedefs are located at the start of this file, so that
8351 they can be used in the prototypes there. This comment is to
8352 remind readers of that fact so that the following structures
8353 can be understood more easily.
8355 typedef struct minipool_node Mnode;
8356 typedef struct minipool_fixup Mfix; */
8358 struct minipool_node
8360 /* Doubly linked chain of entries. */
8361 Mnode * next;
8362 Mnode * prev;
8363 /* The maximum offset into the code that this entry can be placed. While
8364 pushing fixes for forward references, all entries are sorted in order
8365 of increasing max_address. */
8366 HOST_WIDE_INT max_address;
8367 /* Similarly for an entry inserted for a backwards ref. */
8368 HOST_WIDE_INT min_address;
8369 /* The number of fixes referencing this entry. This can become zero
8370 if we "unpush" an entry. In this case we ignore the entry when we
8371 come to emit the code. */
8372 int refcount;
8373 /* The offset from the start of the minipool. */
8374 HOST_WIDE_INT offset;
8375 /* The value in table. */
8376 rtx value;
8377 /* The mode of value. */
8378 enum machine_mode mode;
8379 /* The size of the value. With iWMMXt enabled
8380 sizes > 4 also imply an alignment of 8-bytes. */
8381 int fix_size;
8384 struct minipool_fixup
8386 Mfix * next;
8387 rtx insn;
8388 HOST_WIDE_INT address;
8389 rtx * loc;
8390 enum machine_mode mode;
8391 int fix_size;
8392 rtx value;
8393 Mnode * minipool;
8394 HOST_WIDE_INT forwards;
8395 HOST_WIDE_INT backwards;
8398 /* Fixes less than a word need padding out to a word boundary. */
8399 #define MINIPOOL_FIX_SIZE(mode) \
8400 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8402 static Mnode * minipool_vector_head;
8403 static Mnode * minipool_vector_tail;
8404 static rtx minipool_vector_label;
8405 static int minipool_pad;
8407 /* The linked list of all minipool fixes required for this function. */
8408 Mfix * minipool_fix_head;
8409 Mfix * minipool_fix_tail;
8410 /* The fix entry for the current minipool, once it has been placed. */
8411 Mfix * minipool_barrier;
8413 /* Determines if INSN is the start of a jump table. Returns the end
8414 of the TABLE or NULL_RTX. */
8415 static rtx
8416 is_jump_table (rtx insn)
8418 rtx table;
8420 if (GET_CODE (insn) == JUMP_INSN
8421 && JUMP_LABEL (insn) != NULL
8422 && ((table = next_real_insn (JUMP_LABEL (insn)))
8423 == next_real_insn (insn))
8424 && table != NULL
8425 && GET_CODE (table) == JUMP_INSN
8426 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8427 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8428 return table;
8430 return NULL_RTX;
8433 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8434 #define JUMP_TABLES_IN_TEXT_SECTION 0
8435 #endif
8437 static HOST_WIDE_INT
8438 get_jump_table_size (rtx insn)
8440 /* ADDR_VECs only take room if read-only data does into the text
8441 section. */
8442 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8444 rtx body = PATTERN (insn);
8445 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8446 HOST_WIDE_INT size;
8447 HOST_WIDE_INT modesize;
8449 modesize = GET_MODE_SIZE (GET_MODE (body));
8450 size = modesize * XVECLEN (body, elt);
8451 switch (modesize)
8453 case 1:
8454 /* Round up size of TBB table to a halfword boundary. */
8455 size = (size + 1) & ~(HOST_WIDE_INT)1;
8456 break;
8457 case 2:
8458 /* No padding necessary for TBH. */
8459 break;
8460 case 4:
8461 /* Add two bytes for alignment on Thumb. */
8462 if (TARGET_THUMB)
8463 size += 2;
8464 break;
8465 default:
8466 gcc_unreachable ();
8468 return size;
8471 return 0;
8474 /* Move a minipool fix MP from its current location to before MAX_MP.
8475 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8476 constraints may need updating. */
8477 static Mnode *
8478 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
8479 HOST_WIDE_INT max_address)
8481 /* The code below assumes these are different. */
8482 gcc_assert (mp != max_mp);
8484 if (max_mp == NULL)
8486 if (max_address < mp->max_address)
8487 mp->max_address = max_address;
8489 else
8491 if (max_address > max_mp->max_address - mp->fix_size)
8492 mp->max_address = max_mp->max_address - mp->fix_size;
8493 else
8494 mp->max_address = max_address;
8496 /* Unlink MP from its current position. Since max_mp is non-null,
8497 mp->prev must be non-null. */
8498 mp->prev->next = mp->next;
8499 if (mp->next != NULL)
8500 mp->next->prev = mp->prev;
8501 else
8502 minipool_vector_tail = mp->prev;
8504 /* Re-insert it before MAX_MP. */
8505 mp->next = max_mp;
8506 mp->prev = max_mp->prev;
8507 max_mp->prev = mp;
8509 if (mp->prev != NULL)
8510 mp->prev->next = mp;
8511 else
8512 minipool_vector_head = mp;
8515 /* Save the new entry. */
8516 max_mp = mp;
8518 /* Scan over the preceding entries and adjust their addresses as
8519 required. */
8520 while (mp->prev != NULL
8521 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8523 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8524 mp = mp->prev;
8527 return max_mp;
8530 /* Add a constant to the minipool for a forward reference. Returns the
8531 node added or NULL if the constant will not fit in this pool. */
8532 static Mnode *
8533 add_minipool_forward_ref (Mfix *fix)
8535 /* If set, max_mp is the first pool_entry that has a lower
8536 constraint than the one we are trying to add. */
8537 Mnode * max_mp = NULL;
8538 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
8539 Mnode * mp;
8541 /* If the minipool starts before the end of FIX->INSN then this FIX
8542 can not be placed into the current pool. Furthermore, adding the
8543 new constant pool entry may cause the pool to start FIX_SIZE bytes
8544 earlier. */
8545 if (minipool_vector_head &&
8546 (fix->address + get_attr_length (fix->insn)
8547 >= minipool_vector_head->max_address - fix->fix_size))
8548 return NULL;
8550 /* Scan the pool to see if a constant with the same value has
8551 already been added. While we are doing this, also note the
8552 location where we must insert the constant if it doesn't already
8553 exist. */
8554 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8556 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8557 && fix->mode == mp->mode
8558 && (GET_CODE (fix->value) != CODE_LABEL
8559 || (CODE_LABEL_NUMBER (fix->value)
8560 == CODE_LABEL_NUMBER (mp->value)))
8561 && rtx_equal_p (fix->value, mp->value))
8563 /* More than one fix references this entry. */
8564 mp->refcount++;
8565 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
8568 /* Note the insertion point if necessary. */
8569 if (max_mp == NULL
8570 && mp->max_address > max_address)
8571 max_mp = mp;
8573 /* If we are inserting an 8-bytes aligned quantity and
8574 we have not already found an insertion point, then
8575 make sure that all such 8-byte aligned quantities are
8576 placed at the start of the pool. */
8577 if (ARM_DOUBLEWORD_ALIGN
8578 && max_mp == NULL
8579 && fix->fix_size >= 8
8580 && mp->fix_size < 8)
8582 max_mp = mp;
8583 max_address = mp->max_address;
8587 /* The value is not currently in the minipool, so we need to create
8588 a new entry for it. If MAX_MP is NULL, the entry will be put on
8589 the end of the list since the placement is less constrained than
8590 any existing entry. Otherwise, we insert the new fix before
8591 MAX_MP and, if necessary, adjust the constraints on the other
8592 entries. */
8593 mp = XNEW (Mnode);
8594 mp->fix_size = fix->fix_size;
8595 mp->mode = fix->mode;
8596 mp->value = fix->value;
8597 mp->refcount = 1;
8598 /* Not yet required for a backwards ref. */
8599 mp->min_address = -65536;
8601 if (max_mp == NULL)
8603 mp->max_address = max_address;
8604 mp->next = NULL;
8605 mp->prev = minipool_vector_tail;
8607 if (mp->prev == NULL)
8609 minipool_vector_head = mp;
8610 minipool_vector_label = gen_label_rtx ();
8612 else
8613 mp->prev->next = mp;
8615 minipool_vector_tail = mp;
8617 else
8619 if (max_address > max_mp->max_address - mp->fix_size)
8620 mp->max_address = max_mp->max_address - mp->fix_size;
8621 else
8622 mp->max_address = max_address;
8624 mp->next = max_mp;
8625 mp->prev = max_mp->prev;
8626 max_mp->prev = mp;
8627 if (mp->prev != NULL)
8628 mp->prev->next = mp;
8629 else
8630 minipool_vector_head = mp;
8633 /* Save the new entry. */
8634 max_mp = mp;
8636 /* Scan over the preceding entries and adjust their addresses as
8637 required. */
8638 while (mp->prev != NULL
8639 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8641 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8642 mp = mp->prev;
8645 return max_mp;
8648 static Mnode *
8649 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
8650 HOST_WIDE_INT min_address)
8652 HOST_WIDE_INT offset;
8654 /* The code below assumes these are different. */
8655 gcc_assert (mp != min_mp);
8657 if (min_mp == NULL)
8659 if (min_address > mp->min_address)
8660 mp->min_address = min_address;
8662 else
8664 /* We will adjust this below if it is too loose. */
8665 mp->min_address = min_address;
8667 /* Unlink MP from its current position. Since min_mp is non-null,
8668 mp->next must be non-null. */
8669 mp->next->prev = mp->prev;
8670 if (mp->prev != NULL)
8671 mp->prev->next = mp->next;
8672 else
8673 minipool_vector_head = mp->next;
8675 /* Reinsert it after MIN_MP. */
8676 mp->prev = min_mp;
8677 mp->next = min_mp->next;
8678 min_mp->next = mp;
8679 if (mp->next != NULL)
8680 mp->next->prev = mp;
8681 else
8682 minipool_vector_tail = mp;
8685 min_mp = mp;
8687 offset = 0;
8688 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8690 mp->offset = offset;
8691 if (mp->refcount > 0)
8692 offset += mp->fix_size;
8694 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
8695 mp->next->min_address = mp->min_address + mp->fix_size;
8698 return min_mp;
8701 /* Add a constant to the minipool for a backward reference. Returns the
8702 node added or NULL if the constant will not fit in this pool.
8704 Note that the code for insertion for a backwards reference can be
8705 somewhat confusing because the calculated offsets for each fix do
8706 not take into account the size of the pool (which is still under
8707 construction. */
8708 static Mnode *
8709 add_minipool_backward_ref (Mfix *fix)
8711 /* If set, min_mp is the last pool_entry that has a lower constraint
8712 than the one we are trying to add. */
8713 Mnode *min_mp = NULL;
8714 /* This can be negative, since it is only a constraint. */
8715 HOST_WIDE_INT min_address = fix->address - fix->backwards;
8716 Mnode *mp;
8718 /* If we can't reach the current pool from this insn, or if we can't
8719 insert this entry at the end of the pool without pushing other
8720 fixes out of range, then we don't try. This ensures that we
8721 can't fail later on. */
8722 if (min_address >= minipool_barrier->address
8723 || (minipool_vector_tail->min_address + fix->fix_size
8724 >= minipool_barrier->address))
8725 return NULL;
8727 /* Scan the pool to see if a constant with the same value has
8728 already been added. While we are doing this, also note the
8729 location where we must insert the constant if it doesn't already
8730 exist. */
8731 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
8733 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8734 && fix->mode == mp->mode
8735 && (GET_CODE (fix->value) != CODE_LABEL
8736 || (CODE_LABEL_NUMBER (fix->value)
8737 == CODE_LABEL_NUMBER (mp->value)))
8738 && rtx_equal_p (fix->value, mp->value)
8739 /* Check that there is enough slack to move this entry to the
8740 end of the table (this is conservative). */
8741 && (mp->max_address
8742 > (minipool_barrier->address
8743 + minipool_vector_tail->offset
8744 + minipool_vector_tail->fix_size)))
8746 mp->refcount++;
8747 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
8750 if (min_mp != NULL)
8751 mp->min_address += fix->fix_size;
8752 else
8754 /* Note the insertion point if necessary. */
8755 if (mp->min_address < min_address)
8757 /* For now, we do not allow the insertion of 8-byte alignment
8758 requiring nodes anywhere but at the start of the pool. */
8759 if (ARM_DOUBLEWORD_ALIGN
8760 && fix->fix_size >= 8 && mp->fix_size < 8)
8761 return NULL;
8762 else
8763 min_mp = mp;
8765 else if (mp->max_address
8766 < minipool_barrier->address + mp->offset + fix->fix_size)
8768 /* Inserting before this entry would push the fix beyond
8769 its maximum address (which can happen if we have
8770 re-located a forwards fix); force the new fix to come
8771 after it. */
8772 min_mp = mp;
8773 min_address = mp->min_address + fix->fix_size;
8775 /* If we are inserting an 8-bytes aligned quantity and
8776 we have not already found an insertion point, then
8777 make sure that all such 8-byte aligned quantities are
8778 placed at the start of the pool. */
8779 else if (ARM_DOUBLEWORD_ALIGN
8780 && min_mp == NULL
8781 && fix->fix_size >= 8
8782 && mp->fix_size < 8)
8784 min_mp = mp;
8785 min_address = mp->min_address + fix->fix_size;
8790 /* We need to create a new entry. */
8791 mp = XNEW (Mnode);
8792 mp->fix_size = fix->fix_size;
8793 mp->mode = fix->mode;
8794 mp->value = fix->value;
8795 mp->refcount = 1;
8796 mp->max_address = minipool_barrier->address + 65536;
8798 mp->min_address = min_address;
8800 if (min_mp == NULL)
8802 mp->prev = NULL;
8803 mp->next = minipool_vector_head;
8805 if (mp->next == NULL)
8807 minipool_vector_tail = mp;
8808 minipool_vector_label = gen_label_rtx ();
8810 else
8811 mp->next->prev = mp;
8813 minipool_vector_head = mp;
8815 else
8817 mp->next = min_mp->next;
8818 mp->prev = min_mp;
8819 min_mp->next = mp;
8821 if (mp->next != NULL)
8822 mp->next->prev = mp;
8823 else
8824 minipool_vector_tail = mp;
8827 /* Save the new entry. */
8828 min_mp = mp;
8830 if (mp->prev)
8831 mp = mp->prev;
8832 else
8833 mp->offset = 0;
8835 /* Scan over the following entries and adjust their offsets. */
8836 while (mp->next != NULL)
8838 if (mp->next->min_address < mp->min_address + mp->fix_size)
8839 mp->next->min_address = mp->min_address + mp->fix_size;
8841 if (mp->refcount)
8842 mp->next->offset = mp->offset + mp->fix_size;
8843 else
8844 mp->next->offset = mp->offset;
8846 mp = mp->next;
8849 return min_mp;
8852 static void
8853 assign_minipool_offsets (Mfix *barrier)
8855 HOST_WIDE_INT offset = 0;
8856 Mnode *mp;
8858 minipool_barrier = barrier;
8860 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8862 mp->offset = offset;
8864 if (mp->refcount > 0)
8865 offset += mp->fix_size;
8869 /* Output the literal table */
8870 static void
8871 dump_minipool (rtx scan)
8873 Mnode * mp;
8874 Mnode * nmp;
8875 int align64 = 0;
8877 if (ARM_DOUBLEWORD_ALIGN)
8878 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8879 if (mp->refcount > 0 && mp->fix_size >= 8)
8881 align64 = 1;
8882 break;
8885 if (dump_file)
8886 fprintf (dump_file,
8887 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8888 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
8890 scan = emit_label_after (gen_label_rtx (), scan);
8891 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
8892 scan = emit_label_after (minipool_vector_label, scan);
8894 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
8896 if (mp->refcount > 0)
8898 if (dump_file)
8900 fprintf (dump_file,
8901 ";; Offset %u, min %ld, max %ld ",
8902 (unsigned) mp->offset, (unsigned long) mp->min_address,
8903 (unsigned long) mp->max_address);
8904 arm_print_value (dump_file, mp->value);
8905 fputc ('\n', dump_file);
8908 switch (mp->fix_size)
8910 #ifdef HAVE_consttable_1
8911 case 1:
8912 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
8913 break;
8915 #endif
8916 #ifdef HAVE_consttable_2
8917 case 2:
8918 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
8919 break;
8921 #endif
8922 #ifdef HAVE_consttable_4
8923 case 4:
8924 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
8925 break;
8927 #endif
8928 #ifdef HAVE_consttable_8
8929 case 8:
8930 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
8931 break;
8933 #endif
8934 #ifdef HAVE_consttable_16
8935 case 16:
8936 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
8937 break;
8939 #endif
8940 default:
8941 gcc_unreachable ();
8945 nmp = mp->next;
8946 free (mp);
8949 minipool_vector_head = minipool_vector_tail = NULL;
8950 scan = emit_insn_after (gen_consttable_end (), scan);
8951 scan = emit_barrier_after (scan);
8954 /* Return the cost of forcibly inserting a barrier after INSN. */
8955 static int
8956 arm_barrier_cost (rtx insn)
8958 /* Basing the location of the pool on the loop depth is preferable,
8959 but at the moment, the basic block information seems to be
8960 corrupt by this stage of the compilation. */
8961 int base_cost = 50;
8962 rtx next = next_nonnote_insn (insn);
8964 if (next != NULL && GET_CODE (next) == CODE_LABEL)
8965 base_cost -= 20;
8967 switch (GET_CODE (insn))
8969 case CODE_LABEL:
8970 /* It will always be better to place the table before the label, rather
8971 than after it. */
8972 return 50;
8974 case INSN:
8975 case CALL_INSN:
8976 return base_cost;
8978 case JUMP_INSN:
8979 return base_cost - 10;
8981 default:
8982 return base_cost + 10;
8986 /* Find the best place in the insn stream in the range
8987 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
8988 Create the barrier by inserting a jump and add a new fix entry for
8989 it. */
8990 static Mfix *
8991 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
8993 HOST_WIDE_INT count = 0;
8994 rtx barrier;
8995 rtx from = fix->insn;
8996 /* The instruction after which we will insert the jump. */
8997 rtx selected = NULL;
8998 int selected_cost;
8999 /* The address at which the jump instruction will be placed. */
9000 HOST_WIDE_INT selected_address;
9001 Mfix * new_fix;
9002 HOST_WIDE_INT max_count = max_address - fix->address;
9003 rtx label = gen_label_rtx ();
9005 selected_cost = arm_barrier_cost (from);
9006 selected_address = fix->address;
9008 while (from && count < max_count)
9010 rtx tmp;
9011 int new_cost;
9013 /* This code shouldn't have been called if there was a natural barrier
9014 within range. */
9015 gcc_assert (GET_CODE (from) != BARRIER);
9017 /* Count the length of this insn. */
9018 count += get_attr_length (from);
9020 /* If there is a jump table, add its length. */
9021 tmp = is_jump_table (from);
9022 if (tmp != NULL)
9024 count += get_jump_table_size (tmp);
9026 /* Jump tables aren't in a basic block, so base the cost on
9027 the dispatch insn. If we select this location, we will
9028 still put the pool after the table. */
9029 new_cost = arm_barrier_cost (from);
9031 if (count < max_count
9032 && (!selected || new_cost <= selected_cost))
9034 selected = tmp;
9035 selected_cost = new_cost;
9036 selected_address = fix->address + count;
9039 /* Continue after the dispatch table. */
9040 from = NEXT_INSN (tmp);
9041 continue;
9044 new_cost = arm_barrier_cost (from);
9046 if (count < max_count
9047 && (!selected || new_cost <= selected_cost))
9049 selected = from;
9050 selected_cost = new_cost;
9051 selected_address = fix->address + count;
9054 from = NEXT_INSN (from);
9057 /* Make sure that we found a place to insert the jump. */
9058 gcc_assert (selected);
9060 /* Create a new JUMP_INSN that branches around a barrier. */
9061 from = emit_jump_insn_after (gen_jump (label), selected);
9062 JUMP_LABEL (from) = label;
9063 barrier = emit_barrier_after (from);
9064 emit_label_after (label, barrier);
9066 /* Create a minipool barrier entry for the new barrier. */
9067 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9068 new_fix->insn = barrier;
9069 new_fix->address = selected_address;
9070 new_fix->next = fix->next;
9071 fix->next = new_fix;
9073 return new_fix;
9076 /* Record that there is a natural barrier in the insn stream at
9077 ADDRESS. */
9078 static void
9079 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9081 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9083 fix->insn = insn;
9084 fix->address = address;
9086 fix->next = NULL;
9087 if (minipool_fix_head != NULL)
9088 minipool_fix_tail->next = fix;
9089 else
9090 minipool_fix_head = fix;
9092 minipool_fix_tail = fix;
9095 /* Record INSN, which will need fixing up to load a value from the
9096 minipool. ADDRESS is the offset of the insn since the start of the
9097 function; LOC is a pointer to the part of the insn which requires
9098 fixing; VALUE is the constant that must be loaded, which is of type
9099 MODE. */
9100 static void
9101 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9102 enum machine_mode mode, rtx value)
9104 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9106 fix->insn = insn;
9107 fix->address = address;
9108 fix->loc = loc;
9109 fix->mode = mode;
9110 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9111 fix->value = value;
9112 fix->forwards = get_attr_pool_range (insn);
9113 fix->backwards = get_attr_neg_pool_range (insn);
9114 fix->minipool = NULL;
9116 /* If an insn doesn't have a range defined for it, then it isn't
9117 expecting to be reworked by this code. Better to stop now than
9118 to generate duff assembly code. */
9119 gcc_assert (fix->forwards || fix->backwards);
9121 /* If an entry requires 8-byte alignment then assume all constant pools
9122 require 4 bytes of padding. Trying to do this later on a per-pool
9123 basis is awkward because existing pool entries have to be modified. */
9124 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9125 minipool_pad = 4;
9127 if (dump_file)
9129 fprintf (dump_file,
9130 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9131 GET_MODE_NAME (mode),
9132 INSN_UID (insn), (unsigned long) address,
9133 -1 * (long)fix->backwards, (long)fix->forwards);
9134 arm_print_value (dump_file, fix->value);
9135 fprintf (dump_file, "\n");
9138 /* Add it to the chain of fixes. */
9139 fix->next = NULL;
9141 if (minipool_fix_head != NULL)
9142 minipool_fix_tail->next = fix;
9143 else
9144 minipool_fix_head = fix;
9146 minipool_fix_tail = fix;
9149 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9150 Returns the number of insns needed, or 99 if we don't know how to
9151 do it. */
9153 arm_const_double_inline_cost (rtx val)
9155 rtx lowpart, highpart;
9156 enum machine_mode mode;
9158 mode = GET_MODE (val);
9160 if (mode == VOIDmode)
9161 mode = DImode;
9163 gcc_assert (GET_MODE_SIZE (mode) == 8);
9165 lowpart = gen_lowpart (SImode, val);
9166 highpart = gen_highpart_mode (SImode, mode, val);
9168 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9169 gcc_assert (GET_CODE (highpart) == CONST_INT);
9171 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9172 NULL_RTX, NULL_RTX, 0, 0)
9173 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9174 NULL_RTX, NULL_RTX, 0, 0));
9177 /* Return true if it is worthwhile to split a 64-bit constant into two
9178 32-bit operations. This is the case if optimizing for size, or
9179 if we have load delay slots, or if one 32-bit part can be done with
9180 a single data operation. */
9181 bool
9182 arm_const_double_by_parts (rtx val)
9184 enum machine_mode mode = GET_MODE (val);
9185 rtx part;
9187 if (optimize_size || arm_ld_sched)
9188 return true;
9190 if (mode == VOIDmode)
9191 mode = DImode;
9193 part = gen_highpart_mode (SImode, mode, val);
9195 gcc_assert (GET_CODE (part) == CONST_INT);
9197 if (const_ok_for_arm (INTVAL (part))
9198 || const_ok_for_arm (~INTVAL (part)))
9199 return true;
9201 part = gen_lowpart (SImode, val);
9203 gcc_assert (GET_CODE (part) == CONST_INT);
9205 if (const_ok_for_arm (INTVAL (part))
9206 || const_ok_for_arm (~INTVAL (part)))
9207 return true;
9209 return false;
9212 /* Scan INSN and note any of its operands that need fixing.
9213 If DO_PUSHES is false we do not actually push any of the fixups
9214 needed. The function returns TRUE if any fixups were needed/pushed.
9215 This is used by arm_memory_load_p() which needs to know about loads
9216 of constants that will be converted into minipool loads. */
9217 static bool
9218 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9220 bool result = false;
9221 int opno;
9223 extract_insn (insn);
9225 if (!constrain_operands (1))
9226 fatal_insn_not_found (insn);
9228 if (recog_data.n_alternatives == 0)
9229 return false;
9231 /* Fill in recog_op_alt with information about the constraints of
9232 this insn. */
9233 preprocess_constraints ();
9235 for (opno = 0; opno < recog_data.n_operands; opno++)
9237 /* Things we need to fix can only occur in inputs. */
9238 if (recog_data.operand_type[opno] != OP_IN)
9239 continue;
9241 /* If this alternative is a memory reference, then any mention
9242 of constants in this alternative is really to fool reload
9243 into allowing us to accept one there. We need to fix them up
9244 now so that we output the right code. */
9245 if (recog_op_alt[opno][which_alternative].memory_ok)
9247 rtx op = recog_data.operand[opno];
9249 if (CONSTANT_P (op))
9251 if (do_pushes)
9252 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9253 recog_data.operand_mode[opno], op);
9254 result = true;
9256 else if (GET_CODE (op) == MEM
9257 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9258 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9260 if (do_pushes)
9262 rtx cop = avoid_constant_pool_reference (op);
9264 /* Casting the address of something to a mode narrower
9265 than a word can cause avoid_constant_pool_reference()
9266 to return the pool reference itself. That's no good to
9267 us here. Lets just hope that we can use the
9268 constant pool value directly. */
9269 if (op == cop)
9270 cop = get_pool_constant (XEXP (op, 0));
9272 push_minipool_fix (insn, address,
9273 recog_data.operand_loc[opno],
9274 recog_data.operand_mode[opno], cop);
9277 result = true;
9282 return result;
9285 /* Gcc puts the pool in the wrong place for ARM, since we can only
9286 load addresses a limited distance around the pc. We do some
9287 special munging to move the constant pool values to the correct
9288 point in the code. */
9289 static void
9290 arm_reorg (void)
9292 rtx insn;
9293 HOST_WIDE_INT address = 0;
9294 Mfix * fix;
9296 minipool_fix_head = minipool_fix_tail = NULL;
9298 /* The first insn must always be a note, or the code below won't
9299 scan it properly. */
9300 insn = get_insns ();
9301 gcc_assert (GET_CODE (insn) == NOTE);
9302 minipool_pad = 0;
9304 /* Scan all the insns and record the operands that will need fixing. */
9305 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9307 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9308 && (arm_cirrus_insn_p (insn)
9309 || GET_CODE (insn) == JUMP_INSN
9310 || arm_memory_load_p (insn)))
9311 cirrus_reorg (insn);
9313 if (GET_CODE (insn) == BARRIER)
9314 push_minipool_barrier (insn, address);
9315 else if (INSN_P (insn))
9317 rtx table;
9319 note_invalid_constants (insn, address, true);
9320 address += get_attr_length (insn);
9322 /* If the insn is a vector jump, add the size of the table
9323 and skip the table. */
9324 if ((table = is_jump_table (insn)) != NULL)
9326 address += get_jump_table_size (table);
9327 insn = table;
9332 fix = minipool_fix_head;
9334 /* Now scan the fixups and perform the required changes. */
9335 while (fix)
9337 Mfix * ftmp;
9338 Mfix * fdel;
9339 Mfix * last_added_fix;
9340 Mfix * last_barrier = NULL;
9341 Mfix * this_fix;
9343 /* Skip any further barriers before the next fix. */
9344 while (fix && GET_CODE (fix->insn) == BARRIER)
9345 fix = fix->next;
9347 /* No more fixes. */
9348 if (fix == NULL)
9349 break;
9351 last_added_fix = NULL;
9353 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9355 if (GET_CODE (ftmp->insn) == BARRIER)
9357 if (ftmp->address >= minipool_vector_head->max_address)
9358 break;
9360 last_barrier = ftmp;
9362 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9363 break;
9365 last_added_fix = ftmp; /* Keep track of the last fix added. */
9368 /* If we found a barrier, drop back to that; any fixes that we
9369 could have reached but come after the barrier will now go in
9370 the next mini-pool. */
9371 if (last_barrier != NULL)
9373 /* Reduce the refcount for those fixes that won't go into this
9374 pool after all. */
9375 for (fdel = last_barrier->next;
9376 fdel && fdel != ftmp;
9377 fdel = fdel->next)
9379 fdel->minipool->refcount--;
9380 fdel->minipool = NULL;
9383 ftmp = last_barrier;
9385 else
9387 /* ftmp is first fix that we can't fit into this pool and
9388 there no natural barriers that we could use. Insert a
9389 new barrier in the code somewhere between the previous
9390 fix and this one, and arrange to jump around it. */
9391 HOST_WIDE_INT max_address;
9393 /* The last item on the list of fixes must be a barrier, so
9394 we can never run off the end of the list of fixes without
9395 last_barrier being set. */
9396 gcc_assert (ftmp);
9398 max_address = minipool_vector_head->max_address;
9399 /* Check that there isn't another fix that is in range that
9400 we couldn't fit into this pool because the pool was
9401 already too large: we need to put the pool before such an
9402 instruction. The pool itself may come just after the
9403 fix because create_fix_barrier also allows space for a
9404 jump instruction. */
9405 if (ftmp->address < max_address)
9406 max_address = ftmp->address + 1;
9408 last_barrier = create_fix_barrier (last_added_fix, max_address);
9411 assign_minipool_offsets (last_barrier);
9413 while (ftmp)
9415 if (GET_CODE (ftmp->insn) != BARRIER
9416 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9417 == NULL))
9418 break;
9420 ftmp = ftmp->next;
9423 /* Scan over the fixes we have identified for this pool, fixing them
9424 up and adding the constants to the pool itself. */
9425 for (this_fix = fix; this_fix && ftmp != this_fix;
9426 this_fix = this_fix->next)
9427 if (GET_CODE (this_fix->insn) != BARRIER)
9429 rtx addr
9430 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9431 minipool_vector_label),
9432 this_fix->minipool->offset);
9433 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9436 dump_minipool (last_barrier->insn);
9437 fix = ftmp;
9440 /* From now on we must synthesize any constants that we can't handle
9441 directly. This can happen if the RTL gets split during final
9442 instruction generation. */
9443 after_arm_reorg = 1;
9445 /* Free the minipool memory. */
9446 obstack_free (&minipool_obstack, minipool_startobj);
9449 /* Routines to output assembly language. */
9451 /* If the rtx is the correct value then return the string of the number.
9452 In this way we can ensure that valid double constants are generated even
9453 when cross compiling. */
9454 const char *
9455 fp_immediate_constant (rtx x)
9457 REAL_VALUE_TYPE r;
9458 int i;
9460 if (!fp_consts_inited)
9461 init_fp_table ();
9463 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9464 for (i = 0; i < 8; i++)
9465 if (REAL_VALUES_EQUAL (r, values_fp[i]))
9466 return strings_fp[i];
9468 gcc_unreachable ();
9471 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9472 static const char *
9473 fp_const_from_val (REAL_VALUE_TYPE *r)
9475 int i;
9477 if (!fp_consts_inited)
9478 init_fp_table ();
9480 for (i = 0; i < 8; i++)
9481 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
9482 return strings_fp[i];
9484 gcc_unreachable ();
9487 /* Output the operands of a LDM/STM instruction to STREAM.
9488 MASK is the ARM register set mask of which only bits 0-15 are important.
9489 REG is the base register, either the frame pointer or the stack pointer,
9490 INSTR is the possibly suffixed load or store instruction.
9491 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9493 static void
9494 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
9495 unsigned long mask, int rfe)
9497 unsigned i;
9498 bool not_first = FALSE;
9500 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
9501 fputc ('\t', stream);
9502 asm_fprintf (stream, instr, reg);
9503 fputc ('{', stream);
9505 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9506 if (mask & (1 << i))
9508 if (not_first)
9509 fprintf (stream, ", ");
9511 asm_fprintf (stream, "%r", i);
9512 not_first = TRUE;
9515 if (rfe)
9516 fprintf (stream, "}^\n");
9517 else
9518 fprintf (stream, "}\n");
9522 /* Output a FLDMD instruction to STREAM.
9523 BASE if the register containing the address.
9524 REG and COUNT specify the register range.
9525 Extra registers may be added to avoid hardware bugs.
9527 We output FLDMD even for ARMv5 VFP implementations. Although
9528 FLDMD is technically not supported until ARMv6, it is believed
9529 that all VFP implementations support its use in this context. */
9531 static void
9532 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
9534 int i;
9536 /* Workaround ARM10 VFPr1 bug. */
9537 if (count == 2 && !arm_arch6)
9539 if (reg == 15)
9540 reg--;
9541 count++;
9544 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9545 load into multiple parts if we have to handle more than 16 registers. */
9546 if (count > 16)
9548 vfp_output_fldmd (stream, base, reg, 16);
9549 vfp_output_fldmd (stream, base, reg + 16, count - 16);
9550 return;
9553 fputc ('\t', stream);
9554 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
9556 for (i = reg; i < reg + count; i++)
9558 if (i > reg)
9559 fputs (", ", stream);
9560 asm_fprintf (stream, "d%d", i);
9562 fputs ("}\n", stream);
9567 /* Output the assembly for a store multiple. */
9569 const char *
9570 vfp_output_fstmd (rtx * operands)
9572 char pattern[100];
9573 int p;
9574 int base;
9575 int i;
9577 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
9578 p = strlen (pattern);
9580 gcc_assert (GET_CODE (operands[1]) == REG);
9582 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
9583 for (i = 1; i < XVECLEN (operands[2], 0); i++)
9585 p += sprintf (&pattern[p], ", d%d", base + i);
9587 strcpy (&pattern[p], "}");
9589 output_asm_insn (pattern, operands);
9590 return "";
9594 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9595 number of bytes pushed. */
9597 static int
9598 vfp_emit_fstmd (int base_reg, int count)
9600 rtx par;
9601 rtx dwarf;
9602 rtx tmp, reg;
9603 int i;
9605 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9606 register pairs are stored by a store multiple insn. We avoid this
9607 by pushing an extra pair. */
9608 if (count == 2 && !arm_arch6)
9610 if (base_reg == LAST_VFP_REGNUM - 3)
9611 base_reg -= 2;
9612 count++;
9615 /* FSTMD may not store more than 16 doubleword registers at once. Split
9616 larger stores into multiple parts (up to a maximum of two, in
9617 practice). */
9618 if (count > 16)
9620 int saved;
9621 /* NOTE: base_reg is an internal register number, so each D register
9622 counts as 2. */
9623 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
9624 saved += vfp_emit_fstmd (base_reg, 16);
9625 return saved;
9628 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9629 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9631 reg = gen_rtx_REG (DFmode, base_reg);
9632 base_reg += 2;
9634 XVECEXP (par, 0, 0)
9635 = gen_rtx_SET (VOIDmode,
9636 gen_frame_mem (BLKmode,
9637 gen_rtx_PRE_DEC (BLKmode,
9638 stack_pointer_rtx)),
9639 gen_rtx_UNSPEC (BLKmode,
9640 gen_rtvec (1, reg),
9641 UNSPEC_PUSH_MULT));
9643 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9644 plus_constant (stack_pointer_rtx, -(count * 8)));
9645 RTX_FRAME_RELATED_P (tmp) = 1;
9646 XVECEXP (dwarf, 0, 0) = tmp;
9648 tmp = gen_rtx_SET (VOIDmode,
9649 gen_frame_mem (DFmode, stack_pointer_rtx),
9650 reg);
9651 RTX_FRAME_RELATED_P (tmp) = 1;
9652 XVECEXP (dwarf, 0, 1) = tmp;
9654 for (i = 1; i < count; i++)
9656 reg = gen_rtx_REG (DFmode, base_reg);
9657 base_reg += 2;
9658 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9660 tmp = gen_rtx_SET (VOIDmode,
9661 gen_frame_mem (DFmode,
9662 plus_constant (stack_pointer_rtx,
9663 i * 8)),
9664 reg);
9665 RTX_FRAME_RELATED_P (tmp) = 1;
9666 XVECEXP (dwarf, 0, i + 1) = tmp;
9669 par = emit_insn (par);
9670 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9671 REG_NOTES (par));
9672 RTX_FRAME_RELATED_P (par) = 1;
9674 return count * 8;
9677 /* Emit a call instruction with pattern PAT. ADDR is the address of
9678 the call target. */
9680 void
9681 arm_emit_call_insn (rtx pat, rtx addr)
9683 rtx insn;
9685 insn = emit_call_insn (pat);
9687 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9688 If the call might use such an entry, add a use of the PIC register
9689 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9690 if (TARGET_VXWORKS_RTP
9691 && flag_pic
9692 && GET_CODE (addr) == SYMBOL_REF
9693 && (SYMBOL_REF_DECL (addr)
9694 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
9695 : !SYMBOL_REF_LOCAL_P (addr)))
9697 require_pic_register ();
9698 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
9702 /* Output a 'call' insn. */
9703 const char *
9704 output_call (rtx *operands)
9706 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
9708 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9709 if (REGNO (operands[0]) == LR_REGNUM)
9711 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
9712 output_asm_insn ("mov%?\t%0, %|lr", operands);
9715 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9717 if (TARGET_INTERWORK || arm_arch4t)
9718 output_asm_insn ("bx%?\t%0", operands);
9719 else
9720 output_asm_insn ("mov%?\t%|pc, %0", operands);
9722 return "";
9725 /* Output a 'call' insn that is a reference in memory. */
9726 const char *
9727 output_call_mem (rtx *operands)
9729 if (TARGET_INTERWORK && !arm_arch5)
9731 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9732 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9733 output_asm_insn ("bx%?\t%|ip", operands);
9735 else if (regno_use_in (LR_REGNUM, operands[0]))
9737 /* LR is used in the memory address. We load the address in the
9738 first instruction. It's safe to use IP as the target of the
9739 load since the call will kill it anyway. */
9740 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9741 if (arm_arch5)
9742 output_asm_insn ("blx%?\t%|ip", operands);
9743 else
9745 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9746 if (arm_arch4t)
9747 output_asm_insn ("bx%?\t%|ip", operands);
9748 else
9749 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
9752 else
9754 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9755 output_asm_insn ("ldr%?\t%|pc, %0", operands);
9758 return "";
9762 /* Output a move from arm registers to an fpa registers.
9763 OPERANDS[0] is an fpa register.
9764 OPERANDS[1] is the first registers of an arm register pair. */
9765 const char *
9766 output_mov_long_double_fpa_from_arm (rtx *operands)
9768 int arm_reg0 = REGNO (operands[1]);
9769 rtx ops[3];
9771 gcc_assert (arm_reg0 != IP_REGNUM);
9773 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9774 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9775 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9777 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9778 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
9780 return "";
9783 /* Output a move from an fpa register to arm registers.
9784 OPERANDS[0] is the first registers of an arm register pair.
9785 OPERANDS[1] is an fpa register. */
9786 const char *
9787 output_mov_long_double_arm_from_fpa (rtx *operands)
9789 int arm_reg0 = REGNO (operands[0]);
9790 rtx ops[3];
9792 gcc_assert (arm_reg0 != IP_REGNUM);
9794 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9795 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9796 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9798 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
9799 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9800 return "";
9803 /* Output a move from arm registers to arm registers of a long double
9804 OPERANDS[0] is the destination.
9805 OPERANDS[1] is the source. */
9806 const char *
9807 output_mov_long_double_arm_from_arm (rtx *operands)
9809 /* We have to be careful here because the two might overlap. */
9810 int dest_start = REGNO (operands[0]);
9811 int src_start = REGNO (operands[1]);
9812 rtx ops[2];
9813 int i;
9815 if (dest_start < src_start)
9817 for (i = 0; i < 3; i++)
9819 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9820 ops[1] = gen_rtx_REG (SImode, src_start + i);
9821 output_asm_insn ("mov%?\t%0, %1", ops);
9824 else
9826 for (i = 2; i >= 0; i--)
9828 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9829 ops[1] = gen_rtx_REG (SImode, src_start + i);
9830 output_asm_insn ("mov%?\t%0, %1", ops);
9834 return "";
9838 /* Output a move from arm registers to an fpa registers.
9839 OPERANDS[0] is an fpa register.
9840 OPERANDS[1] is the first registers of an arm register pair. */
9841 const char *
9842 output_mov_double_fpa_from_arm (rtx *operands)
9844 int arm_reg0 = REGNO (operands[1]);
9845 rtx ops[2];
9847 gcc_assert (arm_reg0 != IP_REGNUM);
9849 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9850 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9851 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
9852 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
9853 return "";
9856 /* Output a move from an fpa register to arm registers.
9857 OPERANDS[0] is the first registers of an arm register pair.
9858 OPERANDS[1] is an fpa register. */
9859 const char *
9860 output_mov_double_arm_from_fpa (rtx *operands)
9862 int arm_reg0 = REGNO (operands[0]);
9863 rtx ops[2];
9865 gcc_assert (arm_reg0 != IP_REGNUM);
9867 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9868 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9869 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
9870 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
9871 return "";
9874 /* Output a move between double words.
9875 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9876 or MEM<-REG and all MEMs must be offsettable addresses. */
9877 const char *
9878 output_move_double (rtx *operands)
9880 enum rtx_code code0 = GET_CODE (operands[0]);
9881 enum rtx_code code1 = GET_CODE (operands[1]);
9882 rtx otherops[3];
9884 if (code0 == REG)
9886 int reg0 = REGNO (operands[0]);
9888 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9890 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
9892 switch (GET_CODE (XEXP (operands[1], 0)))
9894 case REG:
9895 if (TARGET_LDRD)
9896 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
9897 else
9898 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
9899 break;
9901 case PRE_INC:
9902 gcc_assert (TARGET_LDRD);
9903 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
9904 break;
9906 case PRE_DEC:
9907 if (TARGET_LDRD)
9908 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
9909 else
9910 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
9911 break;
9913 case POST_INC:
9914 if (TARGET_LDRD)
9915 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
9916 else
9917 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
9918 break;
9920 case POST_DEC:
9921 gcc_assert (TARGET_LDRD);
9922 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
9923 break;
9925 case PRE_MODIFY:
9926 case POST_MODIFY:
9927 otherops[0] = operands[0];
9928 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
9929 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
9931 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
9933 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9935 /* Registers overlap so split out the increment. */
9936 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9937 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
9939 else
9941 /* IWMMXT allows offsets larger than ldrd can handle,
9942 fix these up with a pair of ldr. */
9943 if (GET_CODE (otherops[2]) == CONST_INT
9944 && (INTVAL(otherops[2]) <= -256
9945 || INTVAL(otherops[2]) >= 256))
9947 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
9948 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9949 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9951 else
9952 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
9955 else
9957 /* IWMMXT allows offsets larger than ldrd can handle,
9958 fix these up with a pair of ldr. */
9959 if (GET_CODE (otherops[2]) == CONST_INT
9960 && (INTVAL(otherops[2]) <= -256
9961 || INTVAL(otherops[2]) >= 256))
9963 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9964 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9965 otherops[0] = operands[0];
9966 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
9968 else
9969 /* We only allow constant increments, so this is safe. */
9970 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
9972 break;
9974 case LABEL_REF:
9975 case CONST:
9976 /* We might be able to use ldrd %0, %1 here. However the range is
9977 different to ldr/adr, and it is broken on some ARMv7-M
9978 implementations. */
9979 output_asm_insn ("adr%?\t%0, %1", operands);
9980 if (TARGET_LDRD)
9981 output_asm_insn ("ldr%(d%)\t%0, [%0]", operands);
9982 else
9983 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
9984 break;
9986 /* ??? This needs checking for thumb2. */
9987 default:
9988 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
9989 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
9991 otherops[0] = operands[0];
9992 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
9993 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
9995 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
9997 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
9999 switch ((int) INTVAL (otherops[2]))
10001 case -8:
10002 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
10003 return "";
10004 case -4:
10005 if (TARGET_THUMB2)
10006 break;
10007 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
10008 return "";
10009 case 4:
10010 if (TARGET_THUMB2)
10011 break;
10012 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10013 return "";
10016 if (TARGET_LDRD
10017 && (GET_CODE (otherops[2]) == REG
10018 || (GET_CODE (otherops[2]) == CONST_INT
10019 && INTVAL (otherops[2]) > -256
10020 && INTVAL (otherops[2]) < 256)))
10022 if (reg_overlap_mentioned_p (otherops[0],
10023 otherops[2]))
10025 /* Swap base and index registers over to
10026 avoid a conflict. */
10027 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
10028 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
10030 /* If both registers conflict, it will usually
10031 have been fixed by a splitter. */
10032 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10034 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10035 output_asm_insn ("ldr%(d%)\t%0, [%1]",
10036 otherops);
10038 else
10039 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10040 return "";
10043 if (GET_CODE (otherops[2]) == CONST_INT)
10045 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10046 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10047 else
10048 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10050 else
10051 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10053 else
10054 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10056 if (TARGET_LDRD)
10057 return "ldr%(d%)\t%0, [%0]";
10059 return "ldm%(ia%)\t%0, %M0";
10061 else
10063 otherops[1] = adjust_address (operands[1], SImode, 4);
10064 /* Take care of overlapping base/data reg. */
10065 if (reg_mentioned_p (operands[0], operands[1]))
10067 output_asm_insn ("ldr%?\t%0, %1", otherops);
10068 output_asm_insn ("ldr%?\t%0, %1", operands);
10070 else
10072 output_asm_insn ("ldr%?\t%0, %1", operands);
10073 output_asm_insn ("ldr%?\t%0, %1", otherops);
10078 else
10080 /* Constraints should ensure this. */
10081 gcc_assert (code0 == MEM && code1 == REG);
10082 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10084 switch (GET_CODE (XEXP (operands[0], 0)))
10086 case REG:
10087 if (TARGET_LDRD)
10088 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
10089 else
10090 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10091 break;
10093 case PRE_INC:
10094 gcc_assert (TARGET_LDRD);
10095 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10096 break;
10098 case PRE_DEC:
10099 if (TARGET_LDRD)
10100 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10101 else
10102 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10103 break;
10105 case POST_INC:
10106 if (TARGET_LDRD)
10107 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
10108 else
10109 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10110 break;
10112 case POST_DEC:
10113 gcc_assert (TARGET_LDRD);
10114 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10115 break;
10117 case PRE_MODIFY:
10118 case POST_MODIFY:
10119 otherops[0] = operands[1];
10120 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10121 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10123 /* IWMMXT allows offsets larger than ldrd can handle,
10124 fix these up with a pair of ldr. */
10125 if (GET_CODE (otherops[2]) == CONST_INT
10126 && (INTVAL(otherops[2]) <= -256
10127 || INTVAL(otherops[2]) >= 256))
10129 rtx reg1;
10130 reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10131 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10133 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10134 otherops[0] = reg1;
10135 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10137 else
10139 otherops[0] = reg1;
10140 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10141 otherops[0] = operands[1];
10142 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10145 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10146 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10147 else
10148 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10149 break;
10151 case PLUS:
10152 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10153 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10155 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10157 case -8:
10158 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10159 return "";
10161 case -4:
10162 if (TARGET_THUMB2)
10163 break;
10164 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10165 return "";
10167 case 4:
10168 if (TARGET_THUMB2)
10169 break;
10170 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10171 return "";
10174 if (TARGET_LDRD
10175 && (GET_CODE (otherops[2]) == REG
10176 || (GET_CODE (otherops[2]) == CONST_INT
10177 && INTVAL (otherops[2]) > -256
10178 && INTVAL (otherops[2]) < 256)))
10180 otherops[0] = operands[1];
10181 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10182 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10183 return "";
10185 /* Fall through */
10187 default:
10188 otherops[0] = adjust_address (operands[0], SImode, 4);
10189 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10190 output_asm_insn ("str%?\t%1, %0", operands);
10191 output_asm_insn ("str%?\t%1, %0", otherops);
10195 return "";
10198 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10199 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10201 const char *
10202 output_move_quad (rtx *operands)
10204 if (REG_P (operands[0]))
10206 /* Load, or reg->reg move. */
10208 if (MEM_P (operands[1]))
10210 switch (GET_CODE (XEXP (operands[1], 0)))
10212 case REG:
10213 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10214 break;
10216 case LABEL_REF:
10217 case CONST:
10218 output_asm_insn ("adr%?\t%0, %1", operands);
10219 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10220 break;
10222 default:
10223 gcc_unreachable ();
10226 else
10228 rtx ops[2];
10229 int dest, src, i;
10231 gcc_assert (REG_P (operands[1]));
10233 dest = REGNO (operands[0]);
10234 src = REGNO (operands[1]);
10236 /* This seems pretty dumb, but hopefully GCC won't try to do it
10237 very often. */
10238 if (dest < src)
10239 for (i = 0; i < 4; i++)
10241 ops[0] = gen_rtx_REG (SImode, dest + i);
10242 ops[1] = gen_rtx_REG (SImode, src + i);
10243 output_asm_insn ("mov%?\t%0, %1", ops);
10245 else
10246 for (i = 3; i >= 0; i--)
10248 ops[0] = gen_rtx_REG (SImode, dest + i);
10249 ops[1] = gen_rtx_REG (SImode, src + i);
10250 output_asm_insn ("mov%?\t%0, %1", ops);
10254 else
10256 gcc_assert (MEM_P (operands[0]));
10257 gcc_assert (REG_P (operands[1]));
10258 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10260 switch (GET_CODE (XEXP (operands[0], 0)))
10262 case REG:
10263 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10264 break;
10266 default:
10267 gcc_unreachable ();
10271 return "";
10274 /* Output a VFP load or store instruction. */
10276 const char *
10277 output_move_vfp (rtx *operands)
10279 rtx reg, mem, addr, ops[2];
10280 int load = REG_P (operands[0]);
10281 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10282 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10283 const char *template;
10284 char buff[50];
10285 enum machine_mode mode;
10287 reg = operands[!load];
10288 mem = operands[load];
10290 mode = GET_MODE (reg);
10292 gcc_assert (REG_P (reg));
10293 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10294 gcc_assert (mode == SFmode
10295 || mode == DFmode
10296 || mode == SImode
10297 || mode == DImode
10298 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10299 gcc_assert (MEM_P (mem));
10301 addr = XEXP (mem, 0);
10303 switch (GET_CODE (addr))
10305 case PRE_DEC:
10306 template = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10307 ops[0] = XEXP (addr, 0);
10308 ops[1] = reg;
10309 break;
10311 case POST_INC:
10312 template = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10313 ops[0] = XEXP (addr, 0);
10314 ops[1] = reg;
10315 break;
10317 default:
10318 template = "f%s%c%%?\t%%%s0, %%1%s";
10319 ops[0] = reg;
10320 ops[1] = mem;
10321 break;
10324 sprintf (buff, template,
10325 load ? "ld" : "st",
10326 dp ? 'd' : 's',
10327 dp ? "P" : "",
10328 integer_p ? "\t%@ int" : "");
10329 output_asm_insn (buff, ops);
10331 return "";
10334 /* Output a Neon quad-word load or store, or a load or store for
10335 larger structure modes. We could also support post-modify forms using
10336 VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
10337 yet.
10338 WARNING: The ordering of elements in memory is weird in big-endian mode,
10339 because we use VSTM instead of VST1, to make it easy to make vector stores
10340 via ARM registers write values in the same order as stores direct from Neon
10341 registers. For example, the byte ordering of a quadword vector with 16-byte
10342 elements like this:
10344 [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
10346 will be (with lowest address first, h = most-significant byte,
10347 l = least-significant byte of element):
10349 [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
10350 e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
10352 When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
10353 rN in the order:
10355 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10357 So that STM/LDM can be used on vectors in ARM registers, and the same memory
10358 layout will result as if VSTM/VLDM were used. */
10360 const char *
10361 output_move_neon (rtx *operands)
10363 rtx reg, mem, addr, ops[2];
10364 int regno, load = REG_P (operands[0]);
10365 const char *template;
10366 char buff[50];
10367 enum machine_mode mode;
10369 reg = operands[!load];
10370 mem = operands[load];
10372 mode = GET_MODE (reg);
10374 gcc_assert (REG_P (reg));
10375 regno = REGNO (reg);
10376 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10377 || NEON_REGNO_OK_FOR_QUAD (regno));
10378 gcc_assert (VALID_NEON_DREG_MODE (mode)
10379 || VALID_NEON_QREG_MODE (mode)
10380 || VALID_NEON_STRUCT_MODE (mode));
10381 gcc_assert (MEM_P (mem));
10383 addr = XEXP (mem, 0);
10385 /* Strip off const from addresses like (const (plus (...))). */
10386 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10387 addr = XEXP (addr, 0);
10389 switch (GET_CODE (addr))
10391 case POST_INC:
10392 template = "v%smia%%?\t%%0!, %%h1";
10393 ops[0] = XEXP (addr, 0);
10394 ops[1] = reg;
10395 break;
10397 case POST_MODIFY:
10398 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10399 gcc_unreachable ();
10401 case LABEL_REF:
10402 case PLUS:
10404 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10405 int i;
10406 int overlap = -1;
10407 for (i = 0; i < nregs; i++)
10409 /* We're only using DImode here because it's a convenient size. */
10410 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10411 ops[1] = adjust_address (mem, SImode, 8 * i);
10412 if (reg_overlap_mentioned_p (ops[0], mem))
10414 gcc_assert (overlap == -1);
10415 overlap = i;
10417 else
10419 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10420 output_asm_insn (buff, ops);
10423 if (overlap != -1)
10425 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10426 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10427 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10428 output_asm_insn (buff, ops);
10431 return "";
10434 default:
10435 template = "v%smia%%?\t%%m0, %%h1";
10436 ops[0] = mem;
10437 ops[1] = reg;
10440 sprintf (buff, template, load ? "ld" : "st");
10441 output_asm_insn (buff, ops);
10443 return "";
10446 /* Output an ADD r, s, #n where n may be too big for one instruction.
10447 If adding zero to one register, output nothing. */
10448 const char *
10449 output_add_immediate (rtx *operands)
10451 HOST_WIDE_INT n = INTVAL (operands[2]);
10453 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
10455 if (n < 0)
10456 output_multi_immediate (operands,
10457 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10458 -n);
10459 else
10460 output_multi_immediate (operands,
10461 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10465 return "";
10468 /* Output a multiple immediate operation.
10469 OPERANDS is the vector of operands referred to in the output patterns.
10470 INSTR1 is the output pattern to use for the first constant.
10471 INSTR2 is the output pattern to use for subsequent constants.
10472 IMMED_OP is the index of the constant slot in OPERANDS.
10473 N is the constant value. */
10474 static const char *
10475 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
10476 int immed_op, HOST_WIDE_INT n)
10478 #if HOST_BITS_PER_WIDE_INT > 32
10479 n &= 0xffffffff;
10480 #endif
10482 if (n == 0)
10484 /* Quick and easy output. */
10485 operands[immed_op] = const0_rtx;
10486 output_asm_insn (instr1, operands);
10488 else
10490 int i;
10491 const char * instr = instr1;
10493 /* Note that n is never zero here (which would give no output). */
10494 for (i = 0; i < 32; i += 2)
10496 if (n & (3 << i))
10498 operands[immed_op] = GEN_INT (n & (255 << i));
10499 output_asm_insn (instr, operands);
10500 instr = instr2;
10501 i += 6;
10506 return "";
10509 /* Return the name of a shifter operation. */
10510 static const char *
10511 arm_shift_nmem(enum rtx_code code)
10513 switch (code)
10515 case ASHIFT:
10516 return ARM_LSL_NAME;
10518 case ASHIFTRT:
10519 return "asr";
10521 case LSHIFTRT:
10522 return "lsr";
10524 case ROTATERT:
10525 return "ror";
10527 default:
10528 abort();
10532 /* Return the appropriate ARM instruction for the operation code.
10533 The returned result should not be overwritten. OP is the rtx of the
10534 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10535 was shifted. */
10536 const char *
10537 arithmetic_instr (rtx op, int shift_first_arg)
10539 switch (GET_CODE (op))
10541 case PLUS:
10542 return "add";
10544 case MINUS:
10545 return shift_first_arg ? "rsb" : "sub";
10547 case IOR:
10548 return "orr";
10550 case XOR:
10551 return "eor";
10553 case AND:
10554 return "and";
10556 case ASHIFT:
10557 case ASHIFTRT:
10558 case LSHIFTRT:
10559 case ROTATERT:
10560 return arm_shift_nmem(GET_CODE(op));
10562 default:
10563 gcc_unreachable ();
10567 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10568 for the operation code. The returned result should not be overwritten.
10569 OP is the rtx code of the shift.
10570 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10571 shift. */
10572 static const char *
10573 shift_op (rtx op, HOST_WIDE_INT *amountp)
10575 const char * mnem;
10576 enum rtx_code code = GET_CODE (op);
10578 switch (GET_CODE (XEXP (op, 1)))
10580 case REG:
10581 case SUBREG:
10582 *amountp = -1;
10583 break;
10585 case CONST_INT:
10586 *amountp = INTVAL (XEXP (op, 1));
10587 break;
10589 default:
10590 gcc_unreachable ();
10593 switch (code)
10595 case ROTATE:
10596 gcc_assert (*amountp != -1);
10597 *amountp = 32 - *amountp;
10598 code = ROTATERT;
10600 /* Fall through. */
10602 case ASHIFT:
10603 case ASHIFTRT:
10604 case LSHIFTRT:
10605 case ROTATERT:
10606 mnem = arm_shift_nmem(code);
10607 break;
10609 case MULT:
10610 /* We never have to worry about the amount being other than a
10611 power of 2, since this case can never be reloaded from a reg. */
10612 gcc_assert (*amountp != -1);
10613 *amountp = int_log2 (*amountp);
10614 return ARM_LSL_NAME;
10616 default:
10617 gcc_unreachable ();
10620 if (*amountp != -1)
10622 /* This is not 100% correct, but follows from the desire to merge
10623 multiplication by a power of 2 with the recognizer for a
10624 shift. >=32 is not a valid shift for "lsl", so we must try and
10625 output a shift that produces the correct arithmetical result.
10626 Using lsr #32 is identical except for the fact that the carry bit
10627 is not set correctly if we set the flags; but we never use the
10628 carry bit from such an operation, so we can ignore that. */
10629 if (code == ROTATERT)
10630 /* Rotate is just modulo 32. */
10631 *amountp &= 31;
10632 else if (*amountp != (*amountp & 31))
10634 if (code == ASHIFT)
10635 mnem = "lsr";
10636 *amountp = 32;
10639 /* Shifts of 0 are no-ops. */
10640 if (*amountp == 0)
10641 return NULL;
10644 return mnem;
10647 /* Obtain the shift from the POWER of two. */
10649 static HOST_WIDE_INT
10650 int_log2 (HOST_WIDE_INT power)
10652 HOST_WIDE_INT shift = 0;
10654 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
10656 gcc_assert (shift <= 31);
10657 shift++;
10660 return shift;
10663 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10664 because /bin/as is horribly restrictive. The judgement about
10665 whether or not each character is 'printable' (and can be output as
10666 is) or not (and must be printed with an octal escape) must be made
10667 with reference to the *host* character set -- the situation is
10668 similar to that discussed in the comments above pp_c_char in
10669 c-pretty-print.c. */
10671 #define MAX_ASCII_LEN 51
10673 void
10674 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
10676 int i;
10677 int len_so_far = 0;
10679 fputs ("\t.ascii\t\"", stream);
10681 for (i = 0; i < len; i++)
10683 int c = p[i];
10685 if (len_so_far >= MAX_ASCII_LEN)
10687 fputs ("\"\n\t.ascii\t\"", stream);
10688 len_so_far = 0;
10691 if (ISPRINT (c))
10693 if (c == '\\' || c == '\"')
10695 putc ('\\', stream);
10696 len_so_far++;
10698 putc (c, stream);
10699 len_so_far++;
10701 else
10703 fprintf (stream, "\\%03o", c);
10704 len_so_far += 4;
10708 fputs ("\"\n", stream);
10711 /* Compute the register save mask for registers 0 through 12
10712 inclusive. This code is used by arm_compute_save_reg_mask. */
10714 static unsigned long
10715 arm_compute_save_reg0_reg12_mask (void)
10717 unsigned long func_type = arm_current_func_type ();
10718 unsigned long save_reg_mask = 0;
10719 unsigned int reg;
10721 if (IS_INTERRUPT (func_type))
10723 unsigned int max_reg;
10724 /* Interrupt functions must not corrupt any registers,
10725 even call clobbered ones. If this is a leaf function
10726 we can just examine the registers used by the RTL, but
10727 otherwise we have to assume that whatever function is
10728 called might clobber anything, and so we have to save
10729 all the call-clobbered registers as well. */
10730 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
10731 /* FIQ handlers have registers r8 - r12 banked, so
10732 we only need to check r0 - r7, Normal ISRs only
10733 bank r14 and r15, so we must check up to r12.
10734 r13 is the stack pointer which is always preserved,
10735 so we do not need to consider it here. */
10736 max_reg = 7;
10737 else
10738 max_reg = 12;
10740 for (reg = 0; reg <= max_reg; reg++)
10741 if (df_regs_ever_live_p (reg)
10742 || (! current_function_is_leaf && call_used_regs[reg]))
10743 save_reg_mask |= (1 << reg);
10745 /* Also save the pic base register if necessary. */
10746 if (flag_pic
10747 && !TARGET_SINGLE_PIC_BASE
10748 && arm_pic_register != INVALID_REGNUM
10749 && crtl->uses_pic_offset_table)
10750 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10752 else
10754 /* In the normal case we only need to save those registers
10755 which are call saved and which are used by this function. */
10756 for (reg = 0; reg <= 11; reg++)
10757 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
10758 save_reg_mask |= (1 << reg);
10760 /* Handle the frame pointer as a special case. */
10761 if (frame_pointer_needed)
10762 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10764 /* If we aren't loading the PIC register,
10765 don't stack it even though it may be live. */
10766 if (flag_pic
10767 && !TARGET_SINGLE_PIC_BASE
10768 && arm_pic_register != INVALID_REGNUM
10769 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
10770 || crtl->uses_pic_offset_table))
10771 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10773 /* The prologue will copy SP into R0, so save it. */
10774 if (IS_STACKALIGN (func_type))
10775 save_reg_mask |= 1;
10778 /* Save registers so the exception handler can modify them. */
10779 if (crtl->calls_eh_return)
10781 unsigned int i;
10783 for (i = 0; ; i++)
10785 reg = EH_RETURN_DATA_REGNO (i);
10786 if (reg == INVALID_REGNUM)
10787 break;
10788 save_reg_mask |= 1 << reg;
10792 return save_reg_mask;
10796 /* Compute a bit mask of which registers need to be
10797 saved on the stack for the current function.
10798 This is used by arm_get_frame_offsets, which may add extra registers. */
10800 static unsigned long
10801 arm_compute_save_reg_mask (void)
10803 unsigned int save_reg_mask = 0;
10804 unsigned long func_type = arm_current_func_type ();
10805 unsigned int reg;
10807 if (IS_NAKED (func_type))
10808 /* This should never really happen. */
10809 return 0;
10811 /* If we are creating a stack frame, then we must save the frame pointer,
10812 IP (which will hold the old stack pointer), LR and the PC. */
10813 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
10814 save_reg_mask |=
10815 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
10816 | (1 << IP_REGNUM)
10817 | (1 << LR_REGNUM)
10818 | (1 << PC_REGNUM);
10820 /* Volatile functions do not return, so there
10821 is no need to save any other registers. */
10822 if (IS_VOLATILE (func_type))
10823 return save_reg_mask;
10825 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
10827 /* Decide if we need to save the link register.
10828 Interrupt routines have their own banked link register,
10829 so they never need to save it.
10830 Otherwise if we do not use the link register we do not need to save
10831 it. If we are pushing other registers onto the stack however, we
10832 can save an instruction in the epilogue by pushing the link register
10833 now and then popping it back into the PC. This incurs extra memory
10834 accesses though, so we only do it when optimizing for size, and only
10835 if we know that we will not need a fancy return sequence. */
10836 if (df_regs_ever_live_p (LR_REGNUM)
10837 || (save_reg_mask
10838 && optimize_size
10839 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10840 && !crtl->calls_eh_return))
10841 save_reg_mask |= 1 << LR_REGNUM;
10843 if (cfun->machine->lr_save_eliminated)
10844 save_reg_mask &= ~ (1 << LR_REGNUM);
10846 if (TARGET_REALLY_IWMMXT
10847 && ((bit_count (save_reg_mask)
10848 + ARM_NUM_INTS (crtl->args.pretend_args_size)) % 2) != 0)
10850 /* The total number of registers that are going to be pushed
10851 onto the stack is odd. We need to ensure that the stack
10852 is 64-bit aligned before we start to save iWMMXt registers,
10853 and also before we start to create locals. (A local variable
10854 might be a double or long long which we will load/store using
10855 an iWMMXt instruction). Therefore we need to push another
10856 ARM register, so that the stack will be 64-bit aligned. We
10857 try to avoid using the arg registers (r0 -r3) as they might be
10858 used to pass values in a tail call. */
10859 for (reg = 4; reg <= 12; reg++)
10860 if ((save_reg_mask & (1 << reg)) == 0)
10861 break;
10863 if (reg <= 12)
10864 save_reg_mask |= (1 << reg);
10865 else
10867 cfun->machine->sibcall_blocked = 1;
10868 save_reg_mask |= (1 << 3);
10872 /* We may need to push an additional register for use initializing the
10873 PIC base register. */
10874 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
10875 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
10877 reg = thumb_find_work_register (1 << 4);
10878 if (!call_used_regs[reg])
10879 save_reg_mask |= (1 << reg);
10882 return save_reg_mask;
10886 /* Compute a bit mask of which registers need to be
10887 saved on the stack for the current function. */
10888 static unsigned long
10889 thumb1_compute_save_reg_mask (void)
10891 unsigned long mask;
10892 unsigned reg;
10894 mask = 0;
10895 for (reg = 0; reg < 12; reg ++)
10896 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
10897 mask |= 1 << reg;
10899 if (flag_pic
10900 && !TARGET_SINGLE_PIC_BASE
10901 && arm_pic_register != INVALID_REGNUM
10902 && crtl->uses_pic_offset_table)
10903 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10905 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
10906 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
10907 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
10909 /* LR will also be pushed if any lo regs are pushed. */
10910 if (mask & 0xff || thumb_force_lr_save ())
10911 mask |= (1 << LR_REGNUM);
10913 /* Make sure we have a low work register if we need one.
10914 We will need one if we are going to push a high register,
10915 but we are not currently intending to push a low register. */
10916 if ((mask & 0xff) == 0
10917 && ((mask & 0x0f00) || TARGET_BACKTRACE))
10919 /* Use thumb_find_work_register to choose which register
10920 we will use. If the register is live then we will
10921 have to push it. Use LAST_LO_REGNUM as our fallback
10922 choice for the register to select. */
10923 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
10924 /* Make sure the register returned by thumb_find_work_register is
10925 not part of the return value. */
10926 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
10927 reg = LAST_LO_REGNUM;
10929 if (! call_used_regs[reg])
10930 mask |= 1 << reg;
10933 return mask;
10937 /* Return the number of bytes required to save VFP registers. */
10938 static int
10939 arm_get_vfp_saved_size (void)
10941 unsigned int regno;
10942 int count;
10943 int saved;
10945 saved = 0;
10946 /* Space for saved VFP registers. */
10947 if (TARGET_HARD_FLOAT && TARGET_VFP)
10949 count = 0;
10950 for (regno = FIRST_VFP_REGNUM;
10951 regno < LAST_VFP_REGNUM;
10952 regno += 2)
10954 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
10955 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
10957 if (count > 0)
10959 /* Workaround ARM10 VFPr1 bug. */
10960 if (count == 2 && !arm_arch6)
10961 count++;
10962 saved += count * 8;
10964 count = 0;
10966 else
10967 count++;
10969 if (count > 0)
10971 if (count == 2 && !arm_arch6)
10972 count++;
10973 saved += count * 8;
10976 return saved;
10980 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
10981 everything bar the final return instruction. */
10982 const char *
10983 output_return_instruction (rtx operand, int really_return, int reverse)
10985 char conditional[10];
10986 char instr[100];
10987 unsigned reg;
10988 unsigned long live_regs_mask;
10989 unsigned long func_type;
10990 arm_stack_offsets *offsets;
10992 func_type = arm_current_func_type ();
10994 if (IS_NAKED (func_type))
10995 return "";
10997 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
10999 /* If this function was declared non-returning, and we have
11000 found a tail call, then we have to trust that the called
11001 function won't return. */
11002 if (really_return)
11004 rtx ops[2];
11006 /* Otherwise, trap an attempted return by aborting. */
11007 ops[0] = operand;
11008 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
11009 : "abort");
11010 assemble_external_libcall (ops[1]);
11011 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11014 return "";
11017 gcc_assert (!cfun->calls_alloca || really_return);
11019 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11021 return_used_this_function = 1;
11023 offsets = arm_get_frame_offsets ();
11024 live_regs_mask = offsets->saved_regs_mask;
11026 if (live_regs_mask)
11028 const char * return_reg;
11030 /* If we do not have any special requirements for function exit
11031 (e.g. interworking) then we can load the return address
11032 directly into the PC. Otherwise we must load it into LR. */
11033 if (really_return
11034 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11035 return_reg = reg_names[PC_REGNUM];
11036 else
11037 return_reg = reg_names[LR_REGNUM];
11039 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11041 /* There are three possible reasons for the IP register
11042 being saved. 1) a stack frame was created, in which case
11043 IP contains the old stack pointer, or 2) an ISR routine
11044 corrupted it, or 3) it was saved to align the stack on
11045 iWMMXt. In case 1, restore IP into SP, otherwise just
11046 restore IP. */
11047 if (frame_pointer_needed)
11049 live_regs_mask &= ~ (1 << IP_REGNUM);
11050 live_regs_mask |= (1 << SP_REGNUM);
11052 else
11053 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11056 /* On some ARM architectures it is faster to use LDR rather than
11057 LDM to load a single register. On other architectures, the
11058 cost is the same. In 26 bit mode, or for exception handlers,
11059 we have to use LDM to load the PC so that the CPSR is also
11060 restored. */
11061 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11062 if (live_regs_mask == (1U << reg))
11063 break;
11065 if (reg <= LAST_ARM_REGNUM
11066 && (reg != LR_REGNUM
11067 || ! really_return
11068 || ! IS_INTERRUPT (func_type)))
11070 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11071 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11073 else
11075 char *p;
11076 int first = 1;
11078 /* Generate the load multiple instruction to restore the
11079 registers. Note we can get here, even if
11080 frame_pointer_needed is true, but only if sp already
11081 points to the base of the saved core registers. */
11082 if (live_regs_mask & (1 << SP_REGNUM))
11084 unsigned HOST_WIDE_INT stack_adjust;
11086 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11087 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11089 if (stack_adjust && arm_arch5 && TARGET_ARM)
11090 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11091 else
11093 /* If we can't use ldmib (SA110 bug),
11094 then try to pop r3 instead. */
11095 if (stack_adjust)
11096 live_regs_mask |= 1 << 3;
11097 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11100 else
11101 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11103 p = instr + strlen (instr);
11105 for (reg = 0; reg <= SP_REGNUM; reg++)
11106 if (live_regs_mask & (1 << reg))
11108 int l = strlen (reg_names[reg]);
11110 if (first)
11111 first = 0;
11112 else
11114 memcpy (p, ", ", 2);
11115 p += 2;
11118 memcpy (p, "%|", 2);
11119 memcpy (p + 2, reg_names[reg], l);
11120 p += l + 2;
11123 if (live_regs_mask & (1 << LR_REGNUM))
11125 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11126 /* If returning from an interrupt, restore the CPSR. */
11127 if (IS_INTERRUPT (func_type))
11128 strcat (p, "^");
11130 else
11131 strcpy (p, "}");
11134 output_asm_insn (instr, & operand);
11136 /* See if we need to generate an extra instruction to
11137 perform the actual function return. */
11138 if (really_return
11139 && func_type != ARM_FT_INTERWORKED
11140 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11142 /* The return has already been handled
11143 by loading the LR into the PC. */
11144 really_return = 0;
11148 if (really_return)
11150 switch ((int) ARM_FUNC_TYPE (func_type))
11152 case ARM_FT_ISR:
11153 case ARM_FT_FIQ:
11154 /* ??? This is wrong for unified assembly syntax. */
11155 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11156 break;
11158 case ARM_FT_INTERWORKED:
11159 sprintf (instr, "bx%s\t%%|lr", conditional);
11160 break;
11162 case ARM_FT_EXCEPTION:
11163 /* ??? This is wrong for unified assembly syntax. */
11164 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11165 break;
11167 default:
11168 /* Use bx if it's available. */
11169 if (arm_arch5 || arm_arch4t)
11170 sprintf (instr, "bx%s\t%%|lr", conditional);
11171 else
11172 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11173 break;
11176 output_asm_insn (instr, & operand);
11179 return "";
11182 /* Write the function name into the code section, directly preceding
11183 the function prologue.
11185 Code will be output similar to this:
11187 .ascii "arm_poke_function_name", 0
11188 .align
11190 .word 0xff000000 + (t1 - t0)
11191 arm_poke_function_name
11192 mov ip, sp
11193 stmfd sp!, {fp, ip, lr, pc}
11194 sub fp, ip, #4
11196 When performing a stack backtrace, code can inspect the value
11197 of 'pc' stored at 'fp' + 0. If the trace function then looks
11198 at location pc - 12 and the top 8 bits are set, then we know
11199 that there is a function name embedded immediately preceding this
11200 location and has length ((pc[-3]) & 0xff000000).
11202 We assume that pc is declared as a pointer to an unsigned long.
11204 It is of no benefit to output the function name if we are assembling
11205 a leaf function. These function types will not contain a stack
11206 backtrace structure, therefore it is not possible to determine the
11207 function name. */
11208 void
11209 arm_poke_function_name (FILE *stream, const char *name)
11211 unsigned long alignlength;
11212 unsigned long length;
11213 rtx x;
11215 length = strlen (name) + 1;
11216 alignlength = ROUND_UP_WORD (length);
11218 ASM_OUTPUT_ASCII (stream, name, length);
11219 ASM_OUTPUT_ALIGN (stream, 2);
11220 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11221 assemble_aligned_integer (UNITS_PER_WORD, x);
11224 /* Place some comments into the assembler stream
11225 describing the current function. */
11226 static void
11227 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11229 unsigned long func_type;
11231 if (TARGET_THUMB1)
11233 thumb1_output_function_prologue (f, frame_size);
11234 return;
11237 /* Sanity check. */
11238 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11240 func_type = arm_current_func_type ();
11242 switch ((int) ARM_FUNC_TYPE (func_type))
11244 default:
11245 case ARM_FT_NORMAL:
11246 break;
11247 case ARM_FT_INTERWORKED:
11248 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11249 break;
11250 case ARM_FT_ISR:
11251 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11252 break;
11253 case ARM_FT_FIQ:
11254 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11255 break;
11256 case ARM_FT_EXCEPTION:
11257 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11258 break;
11261 if (IS_NAKED (func_type))
11262 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11264 if (IS_VOLATILE (func_type))
11265 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11267 if (IS_NESTED (func_type))
11268 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11269 if (IS_STACKALIGN (func_type))
11270 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11272 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11273 crtl->args.size,
11274 crtl->args.pretend_args_size, frame_size);
11276 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11277 frame_pointer_needed,
11278 cfun->machine->uses_anonymous_args);
11280 if (cfun->machine->lr_save_eliminated)
11281 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11283 if (crtl->calls_eh_return)
11284 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11286 return_used_this_function = 0;
11289 const char *
11290 arm_output_epilogue (rtx sibling)
11292 int reg;
11293 unsigned long saved_regs_mask;
11294 unsigned long func_type;
11295 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11296 frame that is $fp + 4 for a non-variadic function. */
11297 int floats_offset = 0;
11298 rtx operands[3];
11299 FILE * f = asm_out_file;
11300 unsigned int lrm_count = 0;
11301 int really_return = (sibling == NULL);
11302 int start_reg;
11303 arm_stack_offsets *offsets;
11305 /* If we have already generated the return instruction
11306 then it is futile to generate anything else. */
11307 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11308 return "";
11310 func_type = arm_current_func_type ();
11312 if (IS_NAKED (func_type))
11313 /* Naked functions don't have epilogues. */
11314 return "";
11316 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11318 rtx op;
11320 /* A volatile function should never return. Call abort. */
11321 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11322 assemble_external_libcall (op);
11323 output_asm_insn ("bl\t%a0", &op);
11325 return "";
11328 /* If we are throwing an exception, then we really must be doing a
11329 return, so we can't tail-call. */
11330 gcc_assert (!crtl->calls_eh_return || really_return);
11332 offsets = arm_get_frame_offsets ();
11333 saved_regs_mask = offsets->saved_regs_mask;
11335 if (TARGET_IWMMXT)
11336 lrm_count = bit_count (saved_regs_mask);
11338 floats_offset = offsets->saved_args;
11339 /* Compute how far away the floats will be. */
11340 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11341 if (saved_regs_mask & (1 << reg))
11342 floats_offset += 4;
11344 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11346 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11347 int vfp_offset = offsets->frame;
11349 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11351 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11352 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11354 floats_offset += 12;
11355 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11356 reg, FP_REGNUM, floats_offset - vfp_offset);
11359 else
11361 start_reg = LAST_FPA_REGNUM;
11363 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11365 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11367 floats_offset += 12;
11369 /* We can't unstack more than four registers at once. */
11370 if (start_reg - reg == 3)
11372 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11373 reg, FP_REGNUM, floats_offset - vfp_offset);
11374 start_reg = reg - 1;
11377 else
11379 if (reg != start_reg)
11380 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11381 reg + 1, start_reg - reg,
11382 FP_REGNUM, floats_offset - vfp_offset);
11383 start_reg = reg - 1;
11387 /* Just in case the last register checked also needs unstacking. */
11388 if (reg != start_reg)
11389 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11390 reg + 1, start_reg - reg,
11391 FP_REGNUM, floats_offset - vfp_offset);
11394 if (TARGET_HARD_FLOAT && TARGET_VFP)
11396 int saved_size;
11398 /* The fldmd insns do not have base+offset addressing
11399 modes, so we use IP to hold the address. */
11400 saved_size = arm_get_vfp_saved_size ();
11402 if (saved_size > 0)
11404 floats_offset += saved_size;
11405 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
11406 FP_REGNUM, floats_offset - vfp_offset);
11408 start_reg = FIRST_VFP_REGNUM;
11409 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11411 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11412 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11414 if (start_reg != reg)
11415 vfp_output_fldmd (f, IP_REGNUM,
11416 (start_reg - FIRST_VFP_REGNUM) / 2,
11417 (reg - start_reg) / 2);
11418 start_reg = reg + 2;
11421 if (start_reg != reg)
11422 vfp_output_fldmd (f, IP_REGNUM,
11423 (start_reg - FIRST_VFP_REGNUM) / 2,
11424 (reg - start_reg) / 2);
11427 if (TARGET_IWMMXT)
11429 /* The frame pointer is guaranteed to be non-double-word aligned.
11430 This is because it is set to (old_stack_pointer - 4) and the
11431 old_stack_pointer was double word aligned. Thus the offset to
11432 the iWMMXt registers to be loaded must also be non-double-word
11433 sized, so that the resultant address *is* double-word aligned.
11434 We can ignore floats_offset since that was already included in
11435 the live_regs_mask. */
11436 lrm_count += (lrm_count % 2 ? 2 : 1);
11438 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
11439 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11441 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
11442 reg, FP_REGNUM, lrm_count * 4);
11443 lrm_count += 2;
11447 /* saved_regs_mask should contain the IP, which at the time of stack
11448 frame generation actually contains the old stack pointer. So a
11449 quick way to unwind the stack is just pop the IP register directly
11450 into the stack pointer. */
11451 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
11452 saved_regs_mask &= ~ (1 << IP_REGNUM);
11453 saved_regs_mask |= (1 << SP_REGNUM);
11455 /* There are two registers left in saved_regs_mask - LR and PC. We
11456 only need to restore the LR register (the return address), but to
11457 save time we can load it directly into the PC, unless we need a
11458 special function exit sequence, or we are not really returning. */
11459 if (really_return
11460 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11461 && !crtl->calls_eh_return)
11462 /* Delete the LR from the register mask, so that the LR on
11463 the stack is loaded into the PC in the register mask. */
11464 saved_regs_mask &= ~ (1 << LR_REGNUM);
11465 else
11466 saved_regs_mask &= ~ (1 << PC_REGNUM);
11468 /* We must use SP as the base register, because SP is one of the
11469 registers being restored. If an interrupt or page fault
11470 happens in the ldm instruction, the SP might or might not
11471 have been restored. That would be bad, as then SP will no
11472 longer indicate the safe area of stack, and we can get stack
11473 corruption. Using SP as the base register means that it will
11474 be reset correctly to the original value, should an interrupt
11475 occur. If the stack pointer already points at the right
11476 place, then omit the subtraction. */
11477 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
11478 || cfun->calls_alloca)
11479 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
11480 4 * bit_count (saved_regs_mask));
11481 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
11483 if (IS_INTERRUPT (func_type))
11484 /* Interrupt handlers will have pushed the
11485 IP onto the stack, so restore it now. */
11486 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
11488 else
11490 /* This branch is executed for ARM mode (non-apcs frames) and
11491 Thumb-2 mode. Frame layout is essentially the same for those
11492 cases, except that in ARM mode frame pointer points to the
11493 first saved register, while in Thumb-2 mode the frame pointer points
11494 to the last saved register.
11496 It is possible to make frame pointer point to last saved
11497 register in both cases, and remove some conditionals below.
11498 That means that fp setup in prologue would be just "mov fp, sp"
11499 and sp restore in epilogue would be just "mov sp, fp", whereas
11500 now we have to use add/sub in those cases. However, the value
11501 of that would be marginal, as both mov and add/sub are 32-bit
11502 in ARM mode, and it would require extra conditionals
11503 in arm_expand_prologue to distingish ARM-apcs-frame case
11504 (where frame pointer is required to point at first register)
11505 and ARM-non-apcs-frame. Therefore, such change is postponed
11506 until real need arise. */
11507 HOST_WIDE_INT amount;
11508 int rfe;
11509 /* Restore stack pointer if necessary. */
11510 if (TARGET_ARM && frame_pointer_needed)
11512 operands[0] = stack_pointer_rtx;
11513 operands[1] = hard_frame_pointer_rtx;
11515 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
11516 output_add_immediate (operands);
11518 else
11520 if (frame_pointer_needed)
11522 /* For Thumb-2 restore sp from the frame pointer.
11523 Operand restrictions mean we have to incrememnt FP, then copy
11524 to SP. */
11525 amount = offsets->locals_base - offsets->saved_regs;
11526 operands[0] = hard_frame_pointer_rtx;
11528 else
11530 unsigned long count;
11531 operands[0] = stack_pointer_rtx;
11532 amount = offsets->outgoing_args - offsets->saved_regs;
11533 /* pop call clobbered registers if it avoids a
11534 separate stack adjustment. */
11535 count = offsets->saved_regs - offsets->saved_args;
11536 if (optimize_size
11537 && count != 0
11538 && !crtl->calls_eh_return
11539 && bit_count(saved_regs_mask) * 4 == count
11540 && !IS_INTERRUPT (func_type)
11541 && !crtl->tail_call_emit)
11543 unsigned long mask;
11544 mask = (1 << (arm_size_return_regs() / 4)) - 1;
11545 mask ^= 0xf;
11546 mask &= ~saved_regs_mask;
11547 reg = 0;
11548 while (bit_count (mask) * 4 > amount)
11550 while ((mask & (1 << reg)) == 0)
11551 reg++;
11552 mask &= ~(1 << reg);
11554 if (bit_count (mask) * 4 == amount) {
11555 amount = 0;
11556 saved_regs_mask |= mask;
11561 if (amount)
11563 operands[1] = operands[0];
11564 operands[2] = GEN_INT (amount);
11565 output_add_immediate (operands);
11567 if (frame_pointer_needed)
11568 asm_fprintf (f, "\tmov\t%r, %r\n",
11569 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
11572 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11574 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11575 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11576 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
11577 reg, SP_REGNUM);
11579 else
11581 start_reg = FIRST_FPA_REGNUM;
11583 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11585 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11587 if (reg - start_reg == 3)
11589 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
11590 start_reg, SP_REGNUM);
11591 start_reg = reg + 1;
11594 else
11596 if (reg != start_reg)
11597 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11598 start_reg, reg - start_reg,
11599 SP_REGNUM);
11601 start_reg = reg + 1;
11605 /* Just in case the last register checked also needs unstacking. */
11606 if (reg != start_reg)
11607 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11608 start_reg, reg - start_reg, SP_REGNUM);
11611 if (TARGET_HARD_FLOAT && TARGET_VFP)
11613 start_reg = FIRST_VFP_REGNUM;
11614 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11616 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11617 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11619 if (start_reg != reg)
11620 vfp_output_fldmd (f, SP_REGNUM,
11621 (start_reg - FIRST_VFP_REGNUM) / 2,
11622 (reg - start_reg) / 2);
11623 start_reg = reg + 2;
11626 if (start_reg != reg)
11627 vfp_output_fldmd (f, SP_REGNUM,
11628 (start_reg - FIRST_VFP_REGNUM) / 2,
11629 (reg - start_reg) / 2);
11631 if (TARGET_IWMMXT)
11632 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
11633 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11634 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
11636 /* If we can, restore the LR into the PC. */
11637 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
11638 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
11639 && !IS_STACKALIGN (func_type)
11640 && really_return
11641 && crtl->args.pretend_args_size == 0
11642 && saved_regs_mask & (1 << LR_REGNUM)
11643 && !crtl->calls_eh_return)
11645 saved_regs_mask &= ~ (1 << LR_REGNUM);
11646 saved_regs_mask |= (1 << PC_REGNUM);
11647 rfe = IS_INTERRUPT (func_type);
11649 else
11650 rfe = 0;
11652 /* Load the registers off the stack. If we only have one register
11653 to load use the LDR instruction - it is faster. For Thumb-2
11654 always use pop and the assembler will pick the best instruction.*/
11655 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
11656 && !IS_INTERRUPT(func_type))
11658 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
11660 else if (saved_regs_mask)
11662 if (saved_regs_mask & (1 << SP_REGNUM))
11663 /* Note - write back to the stack register is not enabled
11664 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11665 in the list of registers and if we add writeback the
11666 instruction becomes UNPREDICTABLE. */
11667 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
11668 rfe);
11669 else if (TARGET_ARM)
11670 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
11671 rfe);
11672 else
11673 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
11676 if (crtl->args.pretend_args_size)
11678 /* Unwind the pre-pushed regs. */
11679 operands[0] = operands[1] = stack_pointer_rtx;
11680 operands[2] = GEN_INT (crtl->args.pretend_args_size);
11681 output_add_immediate (operands);
11685 /* We may have already restored PC directly from the stack. */
11686 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
11687 return "";
11689 /* Stack adjustment for exception handler. */
11690 if (crtl->calls_eh_return)
11691 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
11692 ARM_EH_STACKADJ_REGNUM);
11694 /* Generate the return instruction. */
11695 switch ((int) ARM_FUNC_TYPE (func_type))
11697 case ARM_FT_ISR:
11698 case ARM_FT_FIQ:
11699 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
11700 break;
11702 case ARM_FT_EXCEPTION:
11703 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11704 break;
11706 case ARM_FT_INTERWORKED:
11707 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11708 break;
11710 default:
11711 if (IS_STACKALIGN (func_type))
11713 /* See comment in arm_expand_prologue. */
11714 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
11716 if (arm_arch5 || arm_arch4t)
11717 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11718 else
11719 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11720 break;
11723 return "";
11726 static void
11727 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11728 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
11730 arm_stack_offsets *offsets;
11732 if (TARGET_THUMB1)
11734 int regno;
11736 /* Emit any call-via-reg trampolines that are needed for v4t support
11737 of call_reg and call_value_reg type insns. */
11738 for (regno = 0; regno < LR_REGNUM; regno++)
11740 rtx label = cfun->machine->call_via[regno];
11742 if (label != NULL)
11744 switch_to_section (function_section (current_function_decl));
11745 targetm.asm_out.internal_label (asm_out_file, "L",
11746 CODE_LABEL_NUMBER (label));
11747 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
11751 /* ??? Probably not safe to set this here, since it assumes that a
11752 function will be emitted as assembly immediately after we generate
11753 RTL for it. This does not happen for inline functions. */
11754 return_used_this_function = 0;
11756 else /* TARGET_32BIT */
11758 /* We need to take into account any stack-frame rounding. */
11759 offsets = arm_get_frame_offsets ();
11761 gcc_assert (!use_return_insn (FALSE, NULL)
11762 || !return_used_this_function
11763 || offsets->saved_regs == offsets->outgoing_args
11764 || frame_pointer_needed);
11766 /* Reset the ARM-specific per-function variables. */
11767 after_arm_reorg = 0;
11771 /* Generate and emit an insn that we will recognize as a push_multi.
11772 Unfortunately, since this insn does not reflect very well the actual
11773 semantics of the operation, we need to annotate the insn for the benefit
11774 of DWARF2 frame unwind information. */
11775 static rtx
11776 emit_multi_reg_push (unsigned long mask)
11778 int num_regs = 0;
11779 int num_dwarf_regs;
11780 int i, j;
11781 rtx par;
11782 rtx dwarf;
11783 int dwarf_par_index;
11784 rtx tmp, reg;
11786 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11787 if (mask & (1 << i))
11788 num_regs++;
11790 gcc_assert (num_regs && num_regs <= 16);
11792 /* We don't record the PC in the dwarf frame information. */
11793 num_dwarf_regs = num_regs;
11794 if (mask & (1 << PC_REGNUM))
11795 num_dwarf_regs--;
11797 /* For the body of the insn we are going to generate an UNSPEC in
11798 parallel with several USEs. This allows the insn to be recognized
11799 by the push_multi pattern in the arm.md file. The insn looks
11800 something like this:
11802 (parallel [
11803 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11804 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11805 (use (reg:SI 11 fp))
11806 (use (reg:SI 12 ip))
11807 (use (reg:SI 14 lr))
11808 (use (reg:SI 15 pc))
11811 For the frame note however, we try to be more explicit and actually
11812 show each register being stored into the stack frame, plus a (single)
11813 decrement of the stack pointer. We do it this way in order to be
11814 friendly to the stack unwinding code, which only wants to see a single
11815 stack decrement per instruction. The RTL we generate for the note looks
11816 something like this:
11818 (sequence [
11819 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11820 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11821 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11822 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11823 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11826 This sequence is used both by the code to support stack unwinding for
11827 exceptions handlers and the code to generate dwarf2 frame debugging. */
11829 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
11830 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
11831 dwarf_par_index = 1;
11833 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11835 if (mask & (1 << i))
11837 reg = gen_rtx_REG (SImode, i);
11839 XVECEXP (par, 0, 0)
11840 = gen_rtx_SET (VOIDmode,
11841 gen_frame_mem (BLKmode,
11842 gen_rtx_PRE_DEC (BLKmode,
11843 stack_pointer_rtx)),
11844 gen_rtx_UNSPEC (BLKmode,
11845 gen_rtvec (1, reg),
11846 UNSPEC_PUSH_MULT));
11848 if (i != PC_REGNUM)
11850 tmp = gen_rtx_SET (VOIDmode,
11851 gen_frame_mem (SImode, stack_pointer_rtx),
11852 reg);
11853 RTX_FRAME_RELATED_P (tmp) = 1;
11854 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
11855 dwarf_par_index++;
11858 break;
11862 for (j = 1, i++; j < num_regs; i++)
11864 if (mask & (1 << i))
11866 reg = gen_rtx_REG (SImode, i);
11868 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
11870 if (i != PC_REGNUM)
11873 = gen_rtx_SET (VOIDmode,
11874 gen_frame_mem (SImode,
11875 plus_constant (stack_pointer_rtx,
11876 4 * j)),
11877 reg);
11878 RTX_FRAME_RELATED_P (tmp) = 1;
11879 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
11882 j++;
11886 par = emit_insn (par);
11888 tmp = gen_rtx_SET (VOIDmode,
11889 stack_pointer_rtx,
11890 plus_constant (stack_pointer_rtx, -4 * num_regs));
11891 RTX_FRAME_RELATED_P (tmp) = 1;
11892 XVECEXP (dwarf, 0, 0) = tmp;
11894 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11895 REG_NOTES (par));
11896 return par;
11899 /* Calculate the size of the return value that is passed in registers. */
11900 static unsigned
11901 arm_size_return_regs (void)
11903 enum machine_mode mode;
11905 if (crtl->return_rtx != 0)
11906 mode = GET_MODE (crtl->return_rtx);
11907 else
11908 mode = DECL_MODE (DECL_RESULT (current_function_decl));
11910 return GET_MODE_SIZE (mode);
11913 static rtx
11914 emit_sfm (int base_reg, int count)
11916 rtx par;
11917 rtx dwarf;
11918 rtx tmp, reg;
11919 int i;
11921 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11922 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11924 reg = gen_rtx_REG (XFmode, base_reg++);
11926 XVECEXP (par, 0, 0)
11927 = gen_rtx_SET (VOIDmode,
11928 gen_frame_mem (BLKmode,
11929 gen_rtx_PRE_DEC (BLKmode,
11930 stack_pointer_rtx)),
11931 gen_rtx_UNSPEC (BLKmode,
11932 gen_rtvec (1, reg),
11933 UNSPEC_PUSH_MULT));
11934 tmp = gen_rtx_SET (VOIDmode,
11935 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
11936 RTX_FRAME_RELATED_P (tmp) = 1;
11937 XVECEXP (dwarf, 0, 1) = tmp;
11939 for (i = 1; i < count; i++)
11941 reg = gen_rtx_REG (XFmode, base_reg++);
11942 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11944 tmp = gen_rtx_SET (VOIDmode,
11945 gen_frame_mem (XFmode,
11946 plus_constant (stack_pointer_rtx,
11947 i * 12)),
11948 reg);
11949 RTX_FRAME_RELATED_P (tmp) = 1;
11950 XVECEXP (dwarf, 0, i + 1) = tmp;
11953 tmp = gen_rtx_SET (VOIDmode,
11954 stack_pointer_rtx,
11955 plus_constant (stack_pointer_rtx, -12 * count));
11957 RTX_FRAME_RELATED_P (tmp) = 1;
11958 XVECEXP (dwarf, 0, 0) = tmp;
11960 par = emit_insn (par);
11961 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11962 REG_NOTES (par));
11963 return par;
11967 /* Return true if the current function needs to save/restore LR. */
11969 static bool
11970 thumb_force_lr_save (void)
11972 return !cfun->machine->lr_save_eliminated
11973 && (!leaf_function_p ()
11974 || thumb_far_jump_used_p ()
11975 || df_regs_ever_live_p (LR_REGNUM));
11979 /* Compute the distance from register FROM to register TO.
11980 These can be the arg pointer (26), the soft frame pointer (25),
11981 the stack pointer (13) or the hard frame pointer (11).
11982 In thumb mode r7 is used as the soft frame pointer, if needed.
11983 Typical stack layout looks like this:
11985 old stack pointer -> | |
11986 ----
11987 | | \
11988 | | saved arguments for
11989 | | vararg functions
11990 | | /
11992 hard FP & arg pointer -> | | \
11993 | | stack
11994 | | frame
11995 | | /
11997 | | \
11998 | | call saved
11999 | | registers
12000 soft frame pointer -> | | /
12002 | | \
12003 | | local
12004 | | variables
12005 locals base pointer -> | | /
12007 | | \
12008 | | outgoing
12009 | | arguments
12010 current stack pointer -> | | /
12013 For a given function some or all of these stack components
12014 may not be needed, giving rise to the possibility of
12015 eliminating some of the registers.
12017 The values returned by this function must reflect the behavior
12018 of arm_expand_prologue() and arm_compute_save_reg_mask().
12020 The sign of the number returned reflects the direction of stack
12021 growth, so the values are positive for all eliminations except
12022 from the soft frame pointer to the hard frame pointer.
12024 SFP may point just inside the local variables block to ensure correct
12025 alignment. */
12028 /* Calculate stack offsets. These are used to calculate register elimination
12029 offsets and in prologue/epilogue code. Also calculates which registers
12030 should be saved. */
12032 static arm_stack_offsets *
12033 arm_get_frame_offsets (void)
12035 struct arm_stack_offsets *offsets;
12036 unsigned long func_type;
12037 int leaf;
12038 int saved;
12039 int core_saved;
12040 HOST_WIDE_INT frame_size;
12041 int i;
12043 offsets = &cfun->machine->stack_offsets;
12045 /* We need to know if we are a leaf function. Unfortunately, it
12046 is possible to be called after start_sequence has been called,
12047 which causes get_insns to return the insns for the sequence,
12048 not the function, which will cause leaf_function_p to return
12049 the incorrect result.
12051 to know about leaf functions once reload has completed, and the
12052 frame size cannot be changed after that time, so we can safely
12053 use the cached value. */
12055 if (reload_completed)
12056 return offsets;
12058 /* Initially this is the size of the local variables. It will translated
12059 into an offset once we have determined the size of preceding data. */
12060 frame_size = ROUND_UP_WORD (get_frame_size ());
12062 leaf = leaf_function_p ();
12064 /* Space for variadic functions. */
12065 offsets->saved_args = crtl->args.pretend_args_size;
12067 /* In Thumb mode this is incorrect, but never used. */
12068 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
12070 if (TARGET_32BIT)
12072 unsigned int regno;
12074 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12075 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12076 saved = core_saved;
12078 /* We know that SP will be doubleword aligned on entry, and we must
12079 preserve that condition at any subroutine call. We also require the
12080 soft frame pointer to be doubleword aligned. */
12082 if (TARGET_REALLY_IWMMXT)
12084 /* Check for the call-saved iWMMXt registers. */
12085 for (regno = FIRST_IWMMXT_REGNUM;
12086 regno <= LAST_IWMMXT_REGNUM;
12087 regno++)
12088 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12089 saved += 8;
12092 func_type = arm_current_func_type ();
12093 if (! IS_VOLATILE (func_type))
12095 /* Space for saved FPA registers. */
12096 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12097 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12098 saved += 12;
12100 /* Space for saved VFP registers. */
12101 if (TARGET_HARD_FLOAT && TARGET_VFP)
12102 saved += arm_get_vfp_saved_size ();
12105 else /* TARGET_THUMB1 */
12107 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12108 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12109 saved = core_saved;
12110 if (TARGET_BACKTRACE)
12111 saved += 16;
12114 /* Saved registers include the stack frame. */
12115 offsets->saved_regs = offsets->saved_args + saved;
12116 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12117 /* A leaf function does not need any stack alignment if it has nothing
12118 on the stack. */
12119 if (leaf && frame_size == 0)
12121 offsets->outgoing_args = offsets->soft_frame;
12122 offsets->locals_base = offsets->soft_frame;
12123 return offsets;
12126 /* Ensure SFP has the correct alignment. */
12127 if (ARM_DOUBLEWORD_ALIGN
12128 && (offsets->soft_frame & 7))
12130 offsets->soft_frame += 4;
12131 /* Try to align stack by pushing an extra reg. Don't bother doing this
12132 when there is a stack frame as the alignment will be rolled into
12133 the normal stack adjustment. */
12134 if (frame_size + crtl->outgoing_args_size == 0)
12136 int reg = -1;
12138 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12140 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12142 reg = i;
12143 break;
12147 if (reg == -1 && arm_size_return_regs () <= 12
12148 && !crtl->tail_call_emit)
12150 /* Push/pop an argument register (r3) if all callee saved
12151 registers are already being pushed. */
12152 reg = 3;
12155 if (reg != -1)
12157 offsets->saved_regs += 4;
12158 offsets->saved_regs_mask |= (1 << reg);
12163 offsets->locals_base = offsets->soft_frame + frame_size;
12164 offsets->outgoing_args = (offsets->locals_base
12165 + crtl->outgoing_args_size);
12167 if (ARM_DOUBLEWORD_ALIGN)
12169 /* Ensure SP remains doubleword aligned. */
12170 if (offsets->outgoing_args & 7)
12171 offsets->outgoing_args += 4;
12172 gcc_assert (!(offsets->outgoing_args & 7));
12175 return offsets;
12179 /* Calculate the relative offsets for the different stack pointers. Positive
12180 offsets are in the direction of stack growth. */
12182 HOST_WIDE_INT
12183 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12185 arm_stack_offsets *offsets;
12187 offsets = arm_get_frame_offsets ();
12189 /* OK, now we have enough information to compute the distances.
12190 There must be an entry in these switch tables for each pair
12191 of registers in ELIMINABLE_REGS, even if some of the entries
12192 seem to be redundant or useless. */
12193 switch (from)
12195 case ARG_POINTER_REGNUM:
12196 switch (to)
12198 case THUMB_HARD_FRAME_POINTER_REGNUM:
12199 return 0;
12201 case FRAME_POINTER_REGNUM:
12202 /* This is the reverse of the soft frame pointer
12203 to hard frame pointer elimination below. */
12204 return offsets->soft_frame - offsets->saved_args;
12206 case ARM_HARD_FRAME_POINTER_REGNUM:
12207 /* If there is no stack frame then the hard
12208 frame pointer and the arg pointer coincide. */
12209 if (offsets->frame == offsets->saved_regs)
12210 return 0;
12211 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
12212 return (frame_pointer_needed
12213 && cfun->static_chain_decl != NULL
12214 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
12216 case STACK_POINTER_REGNUM:
12217 /* If nothing has been pushed on the stack at all
12218 then this will return -4. This *is* correct! */
12219 return offsets->outgoing_args - (offsets->saved_args + 4);
12221 default:
12222 gcc_unreachable ();
12224 gcc_unreachable ();
12226 case FRAME_POINTER_REGNUM:
12227 switch (to)
12229 case THUMB_HARD_FRAME_POINTER_REGNUM:
12230 return 0;
12232 case ARM_HARD_FRAME_POINTER_REGNUM:
12233 /* The hard frame pointer points to the top entry in the
12234 stack frame. The soft frame pointer to the bottom entry
12235 in the stack frame. If there is no stack frame at all,
12236 then they are identical. */
12238 return offsets->frame - offsets->soft_frame;
12240 case STACK_POINTER_REGNUM:
12241 return offsets->outgoing_args - offsets->soft_frame;
12243 default:
12244 gcc_unreachable ();
12246 gcc_unreachable ();
12248 default:
12249 /* You cannot eliminate from the stack pointer.
12250 In theory you could eliminate from the hard frame
12251 pointer to the stack pointer, but this will never
12252 happen, since if a stack frame is not needed the
12253 hard frame pointer will never be used. */
12254 gcc_unreachable ();
12259 /* Emit RTL to save coprocessor registers on function entry. Returns the
12260 number of bytes pushed. */
12262 static int
12263 arm_save_coproc_regs(void)
12265 int saved_size = 0;
12266 unsigned reg;
12267 unsigned start_reg;
12268 rtx insn;
12270 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12271 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12273 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12274 insn = gen_rtx_MEM (V2SImode, insn);
12275 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12276 RTX_FRAME_RELATED_P (insn) = 1;
12277 saved_size += 8;
12280 /* Save any floating point call-saved registers used by this
12281 function. */
12282 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12284 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12285 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12287 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12288 insn = gen_rtx_MEM (XFmode, insn);
12289 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12290 RTX_FRAME_RELATED_P (insn) = 1;
12291 saved_size += 12;
12294 else
12296 start_reg = LAST_FPA_REGNUM;
12298 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12300 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12302 if (start_reg - reg == 3)
12304 insn = emit_sfm (reg, 4);
12305 RTX_FRAME_RELATED_P (insn) = 1;
12306 saved_size += 48;
12307 start_reg = reg - 1;
12310 else
12312 if (start_reg != reg)
12314 insn = emit_sfm (reg + 1, start_reg - reg);
12315 RTX_FRAME_RELATED_P (insn) = 1;
12316 saved_size += (start_reg - reg) * 12;
12318 start_reg = reg - 1;
12322 if (start_reg != reg)
12324 insn = emit_sfm (reg + 1, start_reg - reg);
12325 saved_size += (start_reg - reg) * 12;
12326 RTX_FRAME_RELATED_P (insn) = 1;
12329 if (TARGET_HARD_FLOAT && TARGET_VFP)
12331 start_reg = FIRST_VFP_REGNUM;
12333 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12335 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12336 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12338 if (start_reg != reg)
12339 saved_size += vfp_emit_fstmd (start_reg,
12340 (reg - start_reg) / 2);
12341 start_reg = reg + 2;
12344 if (start_reg != reg)
12345 saved_size += vfp_emit_fstmd (start_reg,
12346 (reg - start_reg) / 2);
12348 return saved_size;
12352 /* Set the Thumb frame pointer from the stack pointer. */
12354 static void
12355 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12357 HOST_WIDE_INT amount;
12358 rtx insn, dwarf;
12360 amount = offsets->outgoing_args - offsets->locals_base;
12361 if (amount < 1024)
12362 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12363 stack_pointer_rtx, GEN_INT (amount)));
12364 else
12366 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12367 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
12368 expects the first two operands to be the same. */
12369 if (TARGET_THUMB2)
12371 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12372 stack_pointer_rtx,
12373 hard_frame_pointer_rtx));
12375 else
12377 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12378 hard_frame_pointer_rtx,
12379 stack_pointer_rtx));
12381 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12382 plus_constant (stack_pointer_rtx, amount));
12383 RTX_FRAME_RELATED_P (dwarf) = 1;
12384 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12385 REG_NOTES (insn));
12388 RTX_FRAME_RELATED_P (insn) = 1;
12391 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12392 function. */
12393 void
12394 arm_expand_prologue (void)
12396 rtx amount;
12397 rtx insn;
12398 rtx ip_rtx;
12399 unsigned long live_regs_mask;
12400 unsigned long func_type;
12401 int fp_offset = 0;
12402 int saved_pretend_args = 0;
12403 int saved_regs = 0;
12404 unsigned HOST_WIDE_INT args_to_push;
12405 arm_stack_offsets *offsets;
12407 func_type = arm_current_func_type ();
12409 /* Naked functions don't have prologues. */
12410 if (IS_NAKED (func_type))
12411 return;
12413 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12414 args_to_push = crtl->args.pretend_args_size;
12416 /* Compute which register we will have to save onto the stack. */
12417 offsets = arm_get_frame_offsets ();
12418 live_regs_mask = offsets->saved_regs_mask;
12420 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
12422 if (IS_STACKALIGN (func_type))
12424 rtx dwarf;
12425 rtx r0;
12426 rtx r1;
12427 /* Handle a word-aligned stack pointer. We generate the following:
12429 mov r0, sp
12430 bic r1, r0, #7
12431 mov sp, r1
12432 <save and restore r0 in normal prologue/epilogue>
12433 mov sp, r0
12434 bx lr
12436 The unwinder doesn't need to know about the stack realignment.
12437 Just tell it we saved SP in r0. */
12438 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
12440 r0 = gen_rtx_REG (SImode, 0);
12441 r1 = gen_rtx_REG (SImode, 1);
12442 dwarf = gen_rtx_UNSPEC (SImode, NULL_RTVEC, UNSPEC_STACK_ALIGN);
12443 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
12444 insn = gen_movsi (r0, stack_pointer_rtx);
12445 RTX_FRAME_RELATED_P (insn) = 1;
12446 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12447 dwarf, REG_NOTES (insn));
12448 emit_insn (insn);
12449 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
12450 emit_insn (gen_movsi (stack_pointer_rtx, r1));
12453 /* For APCS frames, if IP register is clobbered
12454 when creating frame, save that register in a special
12455 way. */
12456 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12458 if (IS_INTERRUPT (func_type))
12460 /* Interrupt functions must not corrupt any registers.
12461 Creating a frame pointer however, corrupts the IP
12462 register, so we must push it first. */
12463 insn = emit_multi_reg_push (1 << IP_REGNUM);
12465 /* Do not set RTX_FRAME_RELATED_P on this insn.
12466 The dwarf stack unwinding code only wants to see one
12467 stack decrement per function, and this is not it. If
12468 this instruction is labeled as being part of the frame
12469 creation sequence then dwarf2out_frame_debug_expr will
12470 die when it encounters the assignment of IP to FP
12471 later on, since the use of SP here establishes SP as
12472 the CFA register and not IP.
12474 Anyway this instruction is not really part of the stack
12475 frame creation although it is part of the prologue. */
12477 else if (IS_NESTED (func_type))
12479 /* The Static chain register is the same as the IP register
12480 used as a scratch register during stack frame creation.
12481 To get around this need to find somewhere to store IP
12482 whilst the frame is being created. We try the following
12483 places in order:
12485 1. The last argument register.
12486 2. A slot on the stack above the frame. (This only
12487 works if the function is not a varargs function).
12488 3. Register r3, after pushing the argument registers
12489 onto the stack.
12491 Note - we only need to tell the dwarf2 backend about the SP
12492 adjustment in the second variant; the static chain register
12493 doesn't need to be unwound, as it doesn't contain a value
12494 inherited from the caller. */
12496 if (df_regs_ever_live_p (3) == false)
12497 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12498 else if (args_to_push == 0)
12500 rtx dwarf;
12502 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
12503 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
12504 fp_offset = 4;
12506 /* Just tell the dwarf backend that we adjusted SP. */
12507 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12508 plus_constant (stack_pointer_rtx,
12509 -fp_offset));
12510 RTX_FRAME_RELATED_P (insn) = 1;
12511 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12512 dwarf, REG_NOTES (insn));
12514 else
12516 /* Store the args on the stack. */
12517 if (cfun->machine->uses_anonymous_args)
12518 insn = emit_multi_reg_push
12519 ((0xf0 >> (args_to_push / 4)) & 0xf);
12520 else
12521 insn = emit_insn
12522 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12523 GEN_INT (- args_to_push)));
12525 RTX_FRAME_RELATED_P (insn) = 1;
12527 saved_pretend_args = 1;
12528 fp_offset = args_to_push;
12529 args_to_push = 0;
12531 /* Now reuse r3 to preserve IP. */
12532 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12536 insn = emit_set_insn (ip_rtx,
12537 plus_constant (stack_pointer_rtx, fp_offset));
12538 RTX_FRAME_RELATED_P (insn) = 1;
12541 if (args_to_push)
12543 /* Push the argument registers, or reserve space for them. */
12544 if (cfun->machine->uses_anonymous_args)
12545 insn = emit_multi_reg_push
12546 ((0xf0 >> (args_to_push / 4)) & 0xf);
12547 else
12548 insn = emit_insn
12549 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12550 GEN_INT (- args_to_push)));
12551 RTX_FRAME_RELATED_P (insn) = 1;
12554 /* If this is an interrupt service routine, and the link register
12555 is going to be pushed, and we're not generating extra
12556 push of IP (needed when frame is needed and frame layout if apcs),
12557 subtracting four from LR now will mean that the function return
12558 can be done with a single instruction. */
12559 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
12560 && (live_regs_mask & (1 << LR_REGNUM)) != 0
12561 && !(frame_pointer_needed && TARGET_APCS_FRAME)
12562 && TARGET_ARM)
12564 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
12566 emit_set_insn (lr, plus_constant (lr, -4));
12569 if (live_regs_mask)
12571 saved_regs += bit_count (live_regs_mask) * 4;
12572 if (optimize_size && !frame_pointer_needed
12573 && saved_regs == offsets->saved_regs - offsets->saved_args)
12575 /* If no coprocessor registers are being pushed and we don't have
12576 to worry about a frame pointer then push extra registers to
12577 create the stack frame. This is done is a way that does not
12578 alter the frame layout, so is independent of the epilogue. */
12579 int n;
12580 int frame;
12581 n = 0;
12582 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
12583 n++;
12584 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
12585 if (frame && n * 4 >= frame)
12587 n = frame / 4;
12588 live_regs_mask |= (1 << n) - 1;
12589 saved_regs += frame;
12592 insn = emit_multi_reg_push (live_regs_mask);
12593 RTX_FRAME_RELATED_P (insn) = 1;
12596 if (! IS_VOLATILE (func_type))
12597 saved_regs += arm_save_coproc_regs ();
12599 if (frame_pointer_needed && TARGET_ARM)
12601 /* Create the new frame pointer. */
12602 if (TARGET_APCS_FRAME)
12604 insn = GEN_INT (-(4 + args_to_push + fp_offset));
12605 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
12606 RTX_FRAME_RELATED_P (insn) = 1;
12608 if (IS_NESTED (func_type))
12610 /* Recover the static chain register. */
12611 if (!df_regs_ever_live_p (3)
12612 || saved_pretend_args)
12613 insn = gen_rtx_REG (SImode, 3);
12614 else /* if (crtl->args.pretend_args_size == 0) */
12616 insn = plus_constant (hard_frame_pointer_rtx, 4);
12617 insn = gen_frame_mem (SImode, insn);
12619 emit_set_insn (ip_rtx, insn);
12620 /* Add a USE to stop propagate_one_insn() from barfing. */
12621 emit_insn (gen_prologue_use (ip_rtx));
12624 else
12626 insn = GEN_INT (saved_regs - 4);
12627 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12628 stack_pointer_rtx, insn));
12629 RTX_FRAME_RELATED_P (insn) = 1;
12633 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
12635 /* This add can produce multiple insns for a large constant, so we
12636 need to get tricky. */
12637 rtx last = get_last_insn ();
12639 amount = GEN_INT (offsets->saved_args + saved_regs
12640 - offsets->outgoing_args);
12642 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12643 amount));
12646 last = last ? NEXT_INSN (last) : get_insns ();
12647 RTX_FRAME_RELATED_P (last) = 1;
12649 while (last != insn);
12651 /* If the frame pointer is needed, emit a special barrier that
12652 will prevent the scheduler from moving stores to the frame
12653 before the stack adjustment. */
12654 if (frame_pointer_needed)
12655 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
12656 hard_frame_pointer_rtx));
12660 if (frame_pointer_needed && TARGET_THUMB2)
12661 thumb_set_frame_pointer (offsets);
12663 if (flag_pic && arm_pic_register != INVALID_REGNUM)
12665 unsigned long mask;
12667 mask = live_regs_mask;
12668 mask &= THUMB2_WORK_REGS;
12669 if (!IS_NESTED (func_type))
12670 mask |= (1 << IP_REGNUM);
12671 arm_load_pic_register (mask);
12674 /* If we are profiling, make sure no instructions are scheduled before
12675 the call to mcount. Similarly if the user has requested no
12676 scheduling in the prolog. Similarly if we want non-call exceptions
12677 using the EABI unwinder, to prevent faulting instructions from being
12678 swapped with a stack adjustment. */
12679 if (crtl->profile || !TARGET_SCHED_PROLOG
12680 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
12681 emit_insn (gen_blockage ());
12683 /* If the link register is being kept alive, with the return address in it,
12684 then make sure that it does not get reused by the ce2 pass. */
12685 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
12686 cfun->machine->lr_save_eliminated = 1;
12689 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12690 static void
12691 arm_print_condition (FILE *stream)
12693 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
12695 /* Branch conversion is not implemented for Thumb-2. */
12696 if (TARGET_THUMB)
12698 output_operand_lossage ("predicated Thumb instruction");
12699 return;
12701 if (current_insn_predicate != NULL)
12703 output_operand_lossage
12704 ("predicated instruction in conditional sequence");
12705 return;
12708 fputs (arm_condition_codes[arm_current_cc], stream);
12710 else if (current_insn_predicate)
12712 enum arm_cond_code code;
12714 if (TARGET_THUMB1)
12716 output_operand_lossage ("predicated Thumb instruction");
12717 return;
12720 code = get_arm_condition_code (current_insn_predicate);
12721 fputs (arm_condition_codes[code], stream);
12726 /* If CODE is 'd', then the X is a condition operand and the instruction
12727 should only be executed if the condition is true.
12728 if CODE is 'D', then the X is a condition operand and the instruction
12729 should only be executed if the condition is false: however, if the mode
12730 of the comparison is CCFPEmode, then always execute the instruction -- we
12731 do this because in these circumstances !GE does not necessarily imply LT;
12732 in these cases the instruction pattern will take care to make sure that
12733 an instruction containing %d will follow, thereby undoing the effects of
12734 doing this instruction unconditionally.
12735 If CODE is 'N' then X is a floating point operand that must be negated
12736 before output.
12737 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12738 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12739 void
12740 arm_print_operand (FILE *stream, rtx x, int code)
12742 switch (code)
12744 case '@':
12745 fputs (ASM_COMMENT_START, stream);
12746 return;
12748 case '_':
12749 fputs (user_label_prefix, stream);
12750 return;
12752 case '|':
12753 fputs (REGISTER_PREFIX, stream);
12754 return;
12756 case '?':
12757 arm_print_condition (stream);
12758 return;
12760 case '(':
12761 /* Nothing in unified syntax, otherwise the current condition code. */
12762 if (!TARGET_UNIFIED_ASM)
12763 arm_print_condition (stream);
12764 break;
12766 case ')':
12767 /* The current condition code in unified syntax, otherwise nothing. */
12768 if (TARGET_UNIFIED_ASM)
12769 arm_print_condition (stream);
12770 break;
12772 case '.':
12773 /* The current condition code for a condition code setting instruction.
12774 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12775 if (TARGET_UNIFIED_ASM)
12777 fputc('s', stream);
12778 arm_print_condition (stream);
12780 else
12782 arm_print_condition (stream);
12783 fputc('s', stream);
12785 return;
12787 case '!':
12788 /* If the instruction is conditionally executed then print
12789 the current condition code, otherwise print 's'. */
12790 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
12791 if (current_insn_predicate)
12792 arm_print_condition (stream);
12793 else
12794 fputc('s', stream);
12795 break;
12797 /* %# is a "break" sequence. It doesn't output anything, but is used to
12798 separate e.g. operand numbers from following text, if that text consists
12799 of further digits which we don't want to be part of the operand
12800 number. */
12801 case '#':
12802 return;
12804 case 'N':
12806 REAL_VALUE_TYPE r;
12807 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12808 r = REAL_VALUE_NEGATE (r);
12809 fprintf (stream, "%s", fp_const_from_val (&r));
12811 return;
12813 /* An integer without a preceding # sign. */
12814 case 'c':
12815 gcc_assert (GET_CODE (x) == CONST_INT);
12816 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12817 return;
12819 case 'B':
12820 if (GET_CODE (x) == CONST_INT)
12822 HOST_WIDE_INT val;
12823 val = ARM_SIGN_EXTEND (~INTVAL (x));
12824 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
12826 else
12828 putc ('~', stream);
12829 output_addr_const (stream, x);
12831 return;
12833 case 'L':
12834 /* The low 16 bits of an immediate constant. */
12835 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
12836 return;
12838 case 'i':
12839 fprintf (stream, "%s", arithmetic_instr (x, 1));
12840 return;
12842 /* Truncate Cirrus shift counts. */
12843 case 's':
12844 if (GET_CODE (x) == CONST_INT)
12846 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
12847 return;
12849 arm_print_operand (stream, x, 0);
12850 return;
12852 case 'I':
12853 fprintf (stream, "%s", arithmetic_instr (x, 0));
12854 return;
12856 case 'S':
12858 HOST_WIDE_INT val;
12859 const char *shift;
12861 if (!shift_operator (x, SImode))
12863 output_operand_lossage ("invalid shift operand");
12864 break;
12867 shift = shift_op (x, &val);
12869 if (shift)
12871 fprintf (stream, ", %s ", shift);
12872 if (val == -1)
12873 arm_print_operand (stream, XEXP (x, 1), 0);
12874 else
12875 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
12878 return;
12880 /* An explanation of the 'Q', 'R' and 'H' register operands:
12882 In a pair of registers containing a DI or DF value the 'Q'
12883 operand returns the register number of the register containing
12884 the least significant part of the value. The 'R' operand returns
12885 the register number of the register containing the most
12886 significant part of the value.
12888 The 'H' operand returns the higher of the two register numbers.
12889 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
12890 same as the 'Q' operand, since the most significant part of the
12891 value is held in the lower number register. The reverse is true
12892 on systems where WORDS_BIG_ENDIAN is false.
12894 The purpose of these operands is to distinguish between cases
12895 where the endian-ness of the values is important (for example
12896 when they are added together), and cases where the endian-ness
12897 is irrelevant, but the order of register operations is important.
12898 For example when loading a value from memory into a register
12899 pair, the endian-ness does not matter. Provided that the value
12900 from the lower memory address is put into the lower numbered
12901 register, and the value from the higher address is put into the
12902 higher numbered register, the load will work regardless of whether
12903 the value being loaded is big-wordian or little-wordian. The
12904 order of the two register loads can matter however, if the address
12905 of the memory location is actually held in one of the registers
12906 being overwritten by the load. */
12907 case 'Q':
12908 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12910 output_operand_lossage ("invalid operand for code '%c'", code);
12911 return;
12914 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
12915 return;
12917 case 'R':
12918 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12920 output_operand_lossage ("invalid operand for code '%c'", code);
12921 return;
12924 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
12925 return;
12927 case 'H':
12928 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12930 output_operand_lossage ("invalid operand for code '%c'", code);
12931 return;
12934 asm_fprintf (stream, "%r", REGNO (x) + 1);
12935 return;
12937 case 'J':
12938 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12940 output_operand_lossage ("invalid operand for code '%c'", code);
12941 return;
12944 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
12945 return;
12947 case 'K':
12948 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12950 output_operand_lossage ("invalid operand for code '%c'", code);
12951 return;
12954 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
12955 return;
12957 case 'm':
12958 asm_fprintf (stream, "%r",
12959 GET_CODE (XEXP (x, 0)) == REG
12960 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
12961 return;
12963 case 'M':
12964 asm_fprintf (stream, "{%r-%r}",
12965 REGNO (x),
12966 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
12967 return;
12969 /* Like 'M', but writing doubleword vector registers, for use by Neon
12970 insns. */
12971 case 'h':
12973 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
12974 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
12975 if (numregs == 1)
12976 asm_fprintf (stream, "{d%d}", regno);
12977 else
12978 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
12980 return;
12982 case 'd':
12983 /* CONST_TRUE_RTX means always -- that's the default. */
12984 if (x == const_true_rtx)
12985 return;
12987 if (!COMPARISON_P (x))
12989 output_operand_lossage ("invalid operand for code '%c'", code);
12990 return;
12993 fputs (arm_condition_codes[get_arm_condition_code (x)],
12994 stream);
12995 return;
12997 case 'D':
12998 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
12999 want to do that. */
13000 if (x == const_true_rtx)
13002 output_operand_lossage ("instruction never executed");
13003 return;
13005 if (!COMPARISON_P (x))
13007 output_operand_lossage ("invalid operand for code '%c'", code);
13008 return;
13011 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
13012 (get_arm_condition_code (x))],
13013 stream);
13014 return;
13016 /* Cirrus registers can be accessed in a variety of ways:
13017 single floating point (f)
13018 double floating point (d)
13019 32bit integer (fx)
13020 64bit integer (dx). */
13021 case 'W': /* Cirrus register in F mode. */
13022 case 'X': /* Cirrus register in D mode. */
13023 case 'Y': /* Cirrus register in FX mode. */
13024 case 'Z': /* Cirrus register in DX mode. */
13025 gcc_assert (GET_CODE (x) == REG
13026 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
13028 fprintf (stream, "mv%s%s",
13029 code == 'W' ? "f"
13030 : code == 'X' ? "d"
13031 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
13033 return;
13035 /* Print cirrus register in the mode specified by the register's mode. */
13036 case 'V':
13038 int mode = GET_MODE (x);
13040 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
13042 output_operand_lossage ("invalid operand for code '%c'", code);
13043 return;
13046 fprintf (stream, "mv%s%s",
13047 mode == DFmode ? "d"
13048 : mode == SImode ? "fx"
13049 : mode == DImode ? "dx"
13050 : "f", reg_names[REGNO (x)] + 2);
13052 return;
13055 case 'U':
13056 if (GET_CODE (x) != REG
13057 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13058 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13059 /* Bad value for wCG register number. */
13061 output_operand_lossage ("invalid operand for code '%c'", code);
13062 return;
13065 else
13066 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13067 return;
13069 /* Print an iWMMXt control register name. */
13070 case 'w':
13071 if (GET_CODE (x) != CONST_INT
13072 || INTVAL (x) < 0
13073 || INTVAL (x) >= 16)
13074 /* Bad value for wC register number. */
13076 output_operand_lossage ("invalid operand for code '%c'", code);
13077 return;
13080 else
13082 static const char * wc_reg_names [16] =
13084 "wCID", "wCon", "wCSSF", "wCASF",
13085 "wC4", "wC5", "wC6", "wC7",
13086 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13087 "wC12", "wC13", "wC14", "wC15"
13090 fprintf (stream, wc_reg_names [INTVAL (x)]);
13092 return;
13094 /* Print a VFP/Neon double precision or quad precision register name. */
13095 case 'P':
13096 case 'q':
13098 int mode = GET_MODE (x);
13099 int is_quad = (code == 'q');
13100 int regno;
13102 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13104 output_operand_lossage ("invalid operand for code '%c'", code);
13105 return;
13108 if (GET_CODE (x) != REG
13109 || !IS_VFP_REGNUM (REGNO (x)))
13111 output_operand_lossage ("invalid operand for code '%c'", code);
13112 return;
13115 regno = REGNO (x);
13116 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13117 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13119 output_operand_lossage ("invalid operand for code '%c'", code);
13120 return;
13123 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13124 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13126 return;
13128 /* These two codes print the low/high doubleword register of a Neon quad
13129 register, respectively. For pair-structure types, can also print
13130 low/high quadword registers. */
13131 case 'e':
13132 case 'f':
13134 int mode = GET_MODE (x);
13135 int regno;
13137 if ((GET_MODE_SIZE (mode) != 16
13138 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13140 output_operand_lossage ("invalid operand for code '%c'", code);
13141 return;
13144 regno = REGNO (x);
13145 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13147 output_operand_lossage ("invalid operand for code '%c'", code);
13148 return;
13151 if (GET_MODE_SIZE (mode) == 16)
13152 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13153 + (code == 'f' ? 1 : 0));
13154 else
13155 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13156 + (code == 'f' ? 1 : 0));
13158 return;
13160 /* Print a VFPv3 floating-point constant, represented as an integer
13161 index. */
13162 case 'G':
13164 int index = vfp3_const_double_index (x);
13165 gcc_assert (index != -1);
13166 fprintf (stream, "%d", index);
13168 return;
13170 /* Print bits representing opcode features for Neon.
13172 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13173 and polynomials as unsigned.
13175 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13177 Bit 2 is 1 for rounding functions, 0 otherwise. */
13179 /* Identify the type as 's', 'u', 'p' or 'f'. */
13180 case 'T':
13182 HOST_WIDE_INT bits = INTVAL (x);
13183 fputc ("uspf"[bits & 3], stream);
13185 return;
13187 /* Likewise, but signed and unsigned integers are both 'i'. */
13188 case 'F':
13190 HOST_WIDE_INT bits = INTVAL (x);
13191 fputc ("iipf"[bits & 3], stream);
13193 return;
13195 /* As for 'T', but emit 'u' instead of 'p'. */
13196 case 't':
13198 HOST_WIDE_INT bits = INTVAL (x);
13199 fputc ("usuf"[bits & 3], stream);
13201 return;
13203 /* Bit 2: rounding (vs none). */
13204 case 'O':
13206 HOST_WIDE_INT bits = INTVAL (x);
13207 fputs ((bits & 4) != 0 ? "r" : "", stream);
13209 return;
13211 default:
13212 if (x == 0)
13214 output_operand_lossage ("missing operand");
13215 return;
13218 switch (GET_CODE (x))
13220 case REG:
13221 asm_fprintf (stream, "%r", REGNO (x));
13222 break;
13224 case MEM:
13225 output_memory_reference_mode = GET_MODE (x);
13226 output_address (XEXP (x, 0));
13227 break;
13229 case CONST_DOUBLE:
13230 if (TARGET_NEON)
13232 char fpstr[20];
13233 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13234 sizeof (fpstr), 0, 1);
13235 fprintf (stream, "#%s", fpstr);
13237 else
13238 fprintf (stream, "#%s", fp_immediate_constant (x));
13239 break;
13241 default:
13242 gcc_assert (GET_CODE (x) != NEG);
13243 fputc ('#', stream);
13244 output_addr_const (stream, x);
13245 break;
13250 /* Target hook for assembling integer objects. The ARM version needs to
13251 handle word-sized values specially. */
13252 static bool
13253 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13255 enum machine_mode mode;
13257 if (size == UNITS_PER_WORD && aligned_p)
13259 fputs ("\t.word\t", asm_out_file);
13260 output_addr_const (asm_out_file, x);
13262 /* Mark symbols as position independent. We only do this in the
13263 .text segment, not in the .data segment. */
13264 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13265 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13267 /* See legitimize_pic_address for an explanation of the
13268 TARGET_VXWORKS_RTP check. */
13269 if (TARGET_VXWORKS_RTP
13270 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13271 fputs ("(GOT)", asm_out_file);
13272 else
13273 fputs ("(GOTOFF)", asm_out_file);
13275 fputc ('\n', asm_out_file);
13276 return true;
13279 mode = GET_MODE (x);
13281 if (arm_vector_mode_supported_p (mode))
13283 int i, units;
13284 unsigned int invmask = 0, parts_per_word;
13286 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13288 units = CONST_VECTOR_NUNITS (x);
13289 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13291 /* For big-endian Neon vectors, we must permute the vector to the form
13292 which, when loaded by a VLDR or VLDM instruction, will give a vector
13293 with the elements in the right order. */
13294 if (TARGET_NEON && WORDS_BIG_ENDIAN)
13296 parts_per_word = UNITS_PER_WORD / size;
13297 /* FIXME: This might be wrong for 64-bit vector elements, but we don't
13298 support those anywhere yet. */
13299 invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
13302 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13303 for (i = 0; i < units; i++)
13305 rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
13306 assemble_integer
13307 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13309 else
13310 for (i = 0; i < units; i++)
13312 rtx elt = CONST_VECTOR_ELT (x, i);
13313 REAL_VALUE_TYPE rval;
13315 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13317 assemble_real
13318 (rval, GET_MODE_INNER (mode),
13319 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13322 return true;
13325 return default_assemble_integer (x, size, aligned_p);
13328 static void
13329 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13331 section *s;
13333 if (!TARGET_AAPCS_BASED)
13335 (is_ctor ?
13336 default_named_section_asm_out_constructor
13337 : default_named_section_asm_out_destructor) (symbol, priority);
13338 return;
13341 /* Put these in the .init_array section, using a special relocation. */
13342 if (priority != DEFAULT_INIT_PRIORITY)
13344 char buf[18];
13345 sprintf (buf, "%s.%.5u",
13346 is_ctor ? ".init_array" : ".fini_array",
13347 priority);
13348 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13350 else if (is_ctor)
13351 s = ctors_section;
13352 else
13353 s = dtors_section;
13355 switch_to_section (s);
13356 assemble_align (POINTER_SIZE);
13357 fputs ("\t.word\t", asm_out_file);
13358 output_addr_const (asm_out_file, symbol);
13359 fputs ("(target1)\n", asm_out_file);
13362 /* Add a function to the list of static constructors. */
13364 static void
13365 arm_elf_asm_constructor (rtx symbol, int priority)
13367 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13370 /* Add a function to the list of static destructors. */
13372 static void
13373 arm_elf_asm_destructor (rtx symbol, int priority)
13375 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13378 /* A finite state machine takes care of noticing whether or not instructions
13379 can be conditionally executed, and thus decrease execution time and code
13380 size by deleting branch instructions. The fsm is controlled by
13381 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13383 /* The state of the fsm controlling condition codes are:
13384 0: normal, do nothing special
13385 1: make ASM_OUTPUT_OPCODE not output this instruction
13386 2: make ASM_OUTPUT_OPCODE not output this instruction
13387 3: make instructions conditional
13388 4: make instructions conditional
13390 State transitions (state->state by whom under condition):
13391 0 -> 1 final_prescan_insn if the `target' is a label
13392 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13393 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13394 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13395 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13396 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13397 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13398 (the target insn is arm_target_insn).
13400 If the jump clobbers the conditions then we use states 2 and 4.
13402 A similar thing can be done with conditional return insns.
13404 XXX In case the `target' is an unconditional branch, this conditionalising
13405 of the instructions always reduces code size, but not always execution
13406 time. But then, I want to reduce the code size to somewhere near what
13407 /bin/cc produces. */
13409 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13410 instructions. When a COND_EXEC instruction is seen the subsequent
13411 instructions are scanned so that multiple conditional instructions can be
13412 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13413 specify the length and true/false mask for the IT block. These will be
13414 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13416 /* Returns the index of the ARM condition code string in
13417 `arm_condition_codes'. COMPARISON should be an rtx like
13418 `(eq (...) (...))'. */
13419 static enum arm_cond_code
13420 get_arm_condition_code (rtx comparison)
13422 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
13423 int code;
13424 enum rtx_code comp_code = GET_CODE (comparison);
13426 if (GET_MODE_CLASS (mode) != MODE_CC)
13427 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
13428 XEXP (comparison, 1));
13430 switch (mode)
13432 case CC_DNEmode: code = ARM_NE; goto dominance;
13433 case CC_DEQmode: code = ARM_EQ; goto dominance;
13434 case CC_DGEmode: code = ARM_GE; goto dominance;
13435 case CC_DGTmode: code = ARM_GT; goto dominance;
13436 case CC_DLEmode: code = ARM_LE; goto dominance;
13437 case CC_DLTmode: code = ARM_LT; goto dominance;
13438 case CC_DGEUmode: code = ARM_CS; goto dominance;
13439 case CC_DGTUmode: code = ARM_HI; goto dominance;
13440 case CC_DLEUmode: code = ARM_LS; goto dominance;
13441 case CC_DLTUmode: code = ARM_CC;
13443 dominance:
13444 gcc_assert (comp_code == EQ || comp_code == NE);
13446 if (comp_code == EQ)
13447 return ARM_INVERSE_CONDITION_CODE (code);
13448 return code;
13450 case CC_NOOVmode:
13451 switch (comp_code)
13453 case NE: return ARM_NE;
13454 case EQ: return ARM_EQ;
13455 case GE: return ARM_PL;
13456 case LT: return ARM_MI;
13457 default: gcc_unreachable ();
13460 case CC_Zmode:
13461 switch (comp_code)
13463 case NE: return ARM_NE;
13464 case EQ: return ARM_EQ;
13465 default: gcc_unreachable ();
13468 case CC_Nmode:
13469 switch (comp_code)
13471 case NE: return ARM_MI;
13472 case EQ: return ARM_PL;
13473 default: gcc_unreachable ();
13476 case CCFPEmode:
13477 case CCFPmode:
13478 /* These encodings assume that AC=1 in the FPA system control
13479 byte. This allows us to handle all cases except UNEQ and
13480 LTGT. */
13481 switch (comp_code)
13483 case GE: return ARM_GE;
13484 case GT: return ARM_GT;
13485 case LE: return ARM_LS;
13486 case LT: return ARM_MI;
13487 case NE: return ARM_NE;
13488 case EQ: return ARM_EQ;
13489 case ORDERED: return ARM_VC;
13490 case UNORDERED: return ARM_VS;
13491 case UNLT: return ARM_LT;
13492 case UNLE: return ARM_LE;
13493 case UNGT: return ARM_HI;
13494 case UNGE: return ARM_PL;
13495 /* UNEQ and LTGT do not have a representation. */
13496 case UNEQ: /* Fall through. */
13497 case LTGT: /* Fall through. */
13498 default: gcc_unreachable ();
13501 case CC_SWPmode:
13502 switch (comp_code)
13504 case NE: return ARM_NE;
13505 case EQ: return ARM_EQ;
13506 case GE: return ARM_LE;
13507 case GT: return ARM_LT;
13508 case LE: return ARM_GE;
13509 case LT: return ARM_GT;
13510 case GEU: return ARM_LS;
13511 case GTU: return ARM_CC;
13512 case LEU: return ARM_CS;
13513 case LTU: return ARM_HI;
13514 default: gcc_unreachable ();
13517 case CC_Cmode:
13518 switch (comp_code)
13520 case LTU: return ARM_CS;
13521 case GEU: return ARM_CC;
13522 default: gcc_unreachable ();
13525 case CCmode:
13526 switch (comp_code)
13528 case NE: return ARM_NE;
13529 case EQ: return ARM_EQ;
13530 case GE: return ARM_GE;
13531 case GT: return ARM_GT;
13532 case LE: return ARM_LE;
13533 case LT: return ARM_LT;
13534 case GEU: return ARM_CS;
13535 case GTU: return ARM_HI;
13536 case LEU: return ARM_LS;
13537 case LTU: return ARM_CC;
13538 default: gcc_unreachable ();
13541 default: gcc_unreachable ();
13545 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13546 instructions. */
13547 void
13548 thumb2_final_prescan_insn (rtx insn)
13550 rtx first_insn = insn;
13551 rtx body = PATTERN (insn);
13552 rtx predicate;
13553 enum arm_cond_code code;
13554 int n;
13555 int mask;
13557 /* Remove the previous insn from the count of insns to be output. */
13558 if (arm_condexec_count)
13559 arm_condexec_count--;
13561 /* Nothing to do if we are already inside a conditional block. */
13562 if (arm_condexec_count)
13563 return;
13565 if (GET_CODE (body) != COND_EXEC)
13566 return;
13568 /* Conditional jumps are implemented directly. */
13569 if (GET_CODE (insn) == JUMP_INSN)
13570 return;
13572 predicate = COND_EXEC_TEST (body);
13573 arm_current_cc = get_arm_condition_code (predicate);
13575 n = get_attr_ce_count (insn);
13576 arm_condexec_count = 1;
13577 arm_condexec_mask = (1 << n) - 1;
13578 arm_condexec_masklen = n;
13579 /* See if subsequent instructions can be combined into the same block. */
13580 for (;;)
13582 insn = next_nonnote_insn (insn);
13584 /* Jumping into the middle of an IT block is illegal, so a label or
13585 barrier terminates the block. */
13586 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
13587 break;
13589 body = PATTERN (insn);
13590 /* USE and CLOBBER aren't really insns, so just skip them. */
13591 if (GET_CODE (body) == USE
13592 || GET_CODE (body) == CLOBBER)
13593 continue;
13595 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13596 if (GET_CODE (body) != COND_EXEC)
13597 break;
13598 /* Allow up to 4 conditionally executed instructions in a block. */
13599 n = get_attr_ce_count (insn);
13600 if (arm_condexec_masklen + n > 4)
13601 break;
13603 predicate = COND_EXEC_TEST (body);
13604 code = get_arm_condition_code (predicate);
13605 mask = (1 << n) - 1;
13606 if (arm_current_cc == code)
13607 arm_condexec_mask |= (mask << arm_condexec_masklen);
13608 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
13609 break;
13611 arm_condexec_count++;
13612 arm_condexec_masklen += n;
13614 /* A jump must be the last instruction in a conditional block. */
13615 if (GET_CODE(insn) == JUMP_INSN)
13616 break;
13618 /* Restore recog_data (getting the attributes of other insns can
13619 destroy this array, but final.c assumes that it remains intact
13620 across this call). */
13621 extract_constrain_insn_cached (first_insn);
13624 void
13625 arm_final_prescan_insn (rtx insn)
13627 /* BODY will hold the body of INSN. */
13628 rtx body = PATTERN (insn);
13630 /* This will be 1 if trying to repeat the trick, and things need to be
13631 reversed if it appears to fail. */
13632 int reverse = 0;
13634 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13635 taken are clobbered, even if the rtl suggests otherwise. It also
13636 means that we have to grub around within the jump expression to find
13637 out what the conditions are when the jump isn't taken. */
13638 int jump_clobbers = 0;
13640 /* If we start with a return insn, we only succeed if we find another one. */
13641 int seeking_return = 0;
13643 /* START_INSN will hold the insn from where we start looking. This is the
13644 first insn after the following code_label if REVERSE is true. */
13645 rtx start_insn = insn;
13647 /* If in state 4, check if the target branch is reached, in order to
13648 change back to state 0. */
13649 if (arm_ccfsm_state == 4)
13651 if (insn == arm_target_insn)
13653 arm_target_insn = NULL;
13654 arm_ccfsm_state = 0;
13656 return;
13659 /* If in state 3, it is possible to repeat the trick, if this insn is an
13660 unconditional branch to a label, and immediately following this branch
13661 is the previous target label which is only used once, and the label this
13662 branch jumps to is not too far off. */
13663 if (arm_ccfsm_state == 3)
13665 if (simplejump_p (insn))
13667 start_insn = next_nonnote_insn (start_insn);
13668 if (GET_CODE (start_insn) == BARRIER)
13670 /* XXX Isn't this always a barrier? */
13671 start_insn = next_nonnote_insn (start_insn);
13673 if (GET_CODE (start_insn) == CODE_LABEL
13674 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13675 && LABEL_NUSES (start_insn) == 1)
13676 reverse = TRUE;
13677 else
13678 return;
13680 else if (GET_CODE (body) == RETURN)
13682 start_insn = next_nonnote_insn (start_insn);
13683 if (GET_CODE (start_insn) == BARRIER)
13684 start_insn = next_nonnote_insn (start_insn);
13685 if (GET_CODE (start_insn) == CODE_LABEL
13686 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13687 && LABEL_NUSES (start_insn) == 1)
13689 reverse = TRUE;
13690 seeking_return = 1;
13692 else
13693 return;
13695 else
13696 return;
13699 gcc_assert (!arm_ccfsm_state || reverse);
13700 if (GET_CODE (insn) != JUMP_INSN)
13701 return;
13703 /* This jump might be paralleled with a clobber of the condition codes
13704 the jump should always come first */
13705 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
13706 body = XVECEXP (body, 0, 0);
13708 if (reverse
13709 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
13710 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
13712 int insns_skipped;
13713 int fail = FALSE, succeed = FALSE;
13714 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13715 int then_not_else = TRUE;
13716 rtx this_insn = start_insn, label = 0;
13718 /* If the jump cannot be done with one instruction, we cannot
13719 conditionally execute the instruction in the inverse case. */
13720 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
13722 jump_clobbers = 1;
13723 return;
13726 /* Register the insn jumped to. */
13727 if (reverse)
13729 if (!seeking_return)
13730 label = XEXP (SET_SRC (body), 0);
13732 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
13733 label = XEXP (XEXP (SET_SRC (body), 1), 0);
13734 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
13736 label = XEXP (XEXP (SET_SRC (body), 2), 0);
13737 then_not_else = FALSE;
13739 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
13740 seeking_return = 1;
13741 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
13743 seeking_return = 1;
13744 then_not_else = FALSE;
13746 else
13747 gcc_unreachable ();
13749 /* See how many insns this branch skips, and what kind of insns. If all
13750 insns are okay, and the label or unconditional branch to the same
13751 label is not too far away, succeed. */
13752 for (insns_skipped = 0;
13753 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
13755 rtx scanbody;
13757 this_insn = next_nonnote_insn (this_insn);
13758 if (!this_insn)
13759 break;
13761 switch (GET_CODE (this_insn))
13763 case CODE_LABEL:
13764 /* Succeed if it is the target label, otherwise fail since
13765 control falls in from somewhere else. */
13766 if (this_insn == label)
13768 if (jump_clobbers)
13770 arm_ccfsm_state = 2;
13771 this_insn = next_nonnote_insn (this_insn);
13773 else
13774 arm_ccfsm_state = 1;
13775 succeed = TRUE;
13777 else
13778 fail = TRUE;
13779 break;
13781 case BARRIER:
13782 /* Succeed if the following insn is the target label.
13783 Otherwise fail.
13784 If return insns are used then the last insn in a function
13785 will be a barrier. */
13786 this_insn = next_nonnote_insn (this_insn);
13787 if (this_insn && this_insn == label)
13789 if (jump_clobbers)
13791 arm_ccfsm_state = 2;
13792 this_insn = next_nonnote_insn (this_insn);
13794 else
13795 arm_ccfsm_state = 1;
13796 succeed = TRUE;
13798 else
13799 fail = TRUE;
13800 break;
13802 case CALL_INSN:
13803 /* The AAPCS says that conditional calls should not be
13804 used since they make interworking inefficient (the
13805 linker can't transform BL<cond> into BLX). That's
13806 only a problem if the machine has BLX. */
13807 if (arm_arch5)
13809 fail = TRUE;
13810 break;
13813 /* Succeed if the following insn is the target label, or
13814 if the following two insns are a barrier and the
13815 target label. */
13816 this_insn = next_nonnote_insn (this_insn);
13817 if (this_insn && GET_CODE (this_insn) == BARRIER)
13818 this_insn = next_nonnote_insn (this_insn);
13820 if (this_insn && this_insn == label
13821 && insns_skipped < max_insns_skipped)
13823 if (jump_clobbers)
13825 arm_ccfsm_state = 2;
13826 this_insn = next_nonnote_insn (this_insn);
13828 else
13829 arm_ccfsm_state = 1;
13830 succeed = TRUE;
13832 else
13833 fail = TRUE;
13834 break;
13836 case JUMP_INSN:
13837 /* If this is an unconditional branch to the same label, succeed.
13838 If it is to another label, do nothing. If it is conditional,
13839 fail. */
13840 /* XXX Probably, the tests for SET and the PC are
13841 unnecessary. */
13843 scanbody = PATTERN (this_insn);
13844 if (GET_CODE (scanbody) == SET
13845 && GET_CODE (SET_DEST (scanbody)) == PC)
13847 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
13848 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
13850 arm_ccfsm_state = 2;
13851 succeed = TRUE;
13853 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
13854 fail = TRUE;
13856 /* Fail if a conditional return is undesirable (e.g. on a
13857 StrongARM), but still allow this if optimizing for size. */
13858 else if (GET_CODE (scanbody) == RETURN
13859 && !use_return_insn (TRUE, NULL)
13860 && !optimize_size)
13861 fail = TRUE;
13862 else if (GET_CODE (scanbody) == RETURN
13863 && seeking_return)
13865 arm_ccfsm_state = 2;
13866 succeed = TRUE;
13868 else if (GET_CODE (scanbody) == PARALLEL)
13870 switch (get_attr_conds (this_insn))
13872 case CONDS_NOCOND:
13873 break;
13874 default:
13875 fail = TRUE;
13876 break;
13879 else
13880 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
13882 break;
13884 case INSN:
13885 /* Instructions using or affecting the condition codes make it
13886 fail. */
13887 scanbody = PATTERN (this_insn);
13888 if (!(GET_CODE (scanbody) == SET
13889 || GET_CODE (scanbody) == PARALLEL)
13890 || get_attr_conds (this_insn) != CONDS_NOCOND)
13891 fail = TRUE;
13893 /* A conditional cirrus instruction must be followed by
13894 a non Cirrus instruction. However, since we
13895 conditionalize instructions in this function and by
13896 the time we get here we can't add instructions
13897 (nops), because shorten_branches() has already been
13898 called, we will disable conditionalizing Cirrus
13899 instructions to be safe. */
13900 if (GET_CODE (scanbody) != USE
13901 && GET_CODE (scanbody) != CLOBBER
13902 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
13903 fail = TRUE;
13904 break;
13906 default:
13907 break;
13910 if (succeed)
13912 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
13913 arm_target_label = CODE_LABEL_NUMBER (label);
13914 else
13916 gcc_assert (seeking_return || arm_ccfsm_state == 2);
13918 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
13920 this_insn = next_nonnote_insn (this_insn);
13921 gcc_assert (!this_insn
13922 || (GET_CODE (this_insn) != BARRIER
13923 && GET_CODE (this_insn) != CODE_LABEL));
13925 if (!this_insn)
13927 /* Oh, dear! we ran off the end.. give up. */
13928 extract_constrain_insn_cached (insn);
13929 arm_ccfsm_state = 0;
13930 arm_target_insn = NULL;
13931 return;
13933 arm_target_insn = this_insn;
13935 if (jump_clobbers)
13937 gcc_assert (!reverse);
13938 arm_current_cc =
13939 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
13940 0), 0), 1));
13941 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
13942 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13943 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
13944 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13946 else
13948 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
13949 what it was. */
13950 if (!reverse)
13951 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
13952 0));
13955 if (reverse || then_not_else)
13956 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13959 /* Restore recog_data (getting the attributes of other insns can
13960 destroy this array, but final.c assumes that it remains intact
13961 across this call. */
13962 extract_constrain_insn_cached (insn);
13966 /* Output IT instructions. */
13967 void
13968 thumb2_asm_output_opcode (FILE * stream)
13970 char buff[5];
13971 int n;
13973 if (arm_condexec_mask)
13975 for (n = 0; n < arm_condexec_masklen; n++)
13976 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
13977 buff[n] = 0;
13978 asm_fprintf(stream, "i%s\t%s\n\t", buff,
13979 arm_condition_codes[arm_current_cc]);
13980 arm_condexec_mask = 0;
13984 /* Returns true if REGNO is a valid register
13985 for holding a quantity of type MODE. */
13987 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
13989 if (GET_MODE_CLASS (mode) == MODE_CC)
13990 return (regno == CC_REGNUM
13991 || (TARGET_HARD_FLOAT && TARGET_VFP
13992 && regno == VFPCC_REGNUM));
13994 if (TARGET_THUMB1)
13995 /* For the Thumb we only allow values bigger than SImode in
13996 registers 0 - 6, so that there is always a second low
13997 register available to hold the upper part of the value.
13998 We probably we ought to ensure that the register is the
13999 start of an even numbered register pair. */
14000 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
14002 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
14003 && IS_CIRRUS_REGNUM (regno))
14004 /* We have outlawed SI values in Cirrus registers because they
14005 reside in the lower 32 bits, but SF values reside in the
14006 upper 32 bits. This causes gcc all sorts of grief. We can't
14007 even split the registers into pairs because Cirrus SI values
14008 get sign extended to 64bits-- aldyh. */
14009 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
14011 if (TARGET_HARD_FLOAT && TARGET_VFP
14012 && IS_VFP_REGNUM (regno))
14014 if (mode == SFmode || mode == SImode)
14015 return VFP_REGNO_OK_FOR_SINGLE (regno);
14017 if (mode == DFmode)
14018 return VFP_REGNO_OK_FOR_DOUBLE (regno);
14020 if (TARGET_NEON)
14021 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
14022 || (VALID_NEON_QREG_MODE (mode)
14023 && NEON_REGNO_OK_FOR_QUAD (regno))
14024 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
14025 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
14026 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
14027 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
14028 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
14030 return FALSE;
14033 if (TARGET_REALLY_IWMMXT)
14035 if (IS_IWMMXT_GR_REGNUM (regno))
14036 return mode == SImode;
14038 if (IS_IWMMXT_REGNUM (regno))
14039 return VALID_IWMMXT_REG_MODE (mode);
14042 /* We allow any value to be stored in the general registers.
14043 Restrict doubleword quantities to even register pairs so that we can
14044 use ldrd. Do not allow Neon structure opaque modes in general registers;
14045 they would use too many. */
14046 if (regno <= LAST_ARM_REGNUM)
14047 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14048 && !VALID_NEON_STRUCT_MODE (mode);
14050 if (regno == FRAME_POINTER_REGNUM
14051 || regno == ARG_POINTER_REGNUM)
14052 /* We only allow integers in the fake hard registers. */
14053 return GET_MODE_CLASS (mode) == MODE_INT;
14055 /* The only registers left are the FPA registers
14056 which we only allow to hold FP values. */
14057 return (TARGET_HARD_FLOAT && TARGET_FPA
14058 && GET_MODE_CLASS (mode) == MODE_FLOAT
14059 && regno >= FIRST_FPA_REGNUM
14060 && regno <= LAST_FPA_REGNUM);
14063 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14064 not used in arm mode. */
14066 arm_regno_class (int regno)
14068 if (TARGET_THUMB1)
14070 if (regno == STACK_POINTER_REGNUM)
14071 return STACK_REG;
14072 if (regno == CC_REGNUM)
14073 return CC_REG;
14074 if (regno < 8)
14075 return LO_REGS;
14076 return HI_REGS;
14079 if (TARGET_THUMB2 && regno < 8)
14080 return LO_REGS;
14082 if ( regno <= LAST_ARM_REGNUM
14083 || regno == FRAME_POINTER_REGNUM
14084 || regno == ARG_POINTER_REGNUM)
14085 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14087 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14088 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14090 if (IS_CIRRUS_REGNUM (regno))
14091 return CIRRUS_REGS;
14093 if (IS_VFP_REGNUM (regno))
14095 if (regno <= D7_VFP_REGNUM)
14096 return VFP_D0_D7_REGS;
14097 else if (regno <= LAST_LO_VFP_REGNUM)
14098 return VFP_LO_REGS;
14099 else
14100 return VFP_HI_REGS;
14103 if (IS_IWMMXT_REGNUM (regno))
14104 return IWMMXT_REGS;
14106 if (IS_IWMMXT_GR_REGNUM (regno))
14107 return IWMMXT_GR_REGS;
14109 return FPA_REGS;
14112 /* Handle a special case when computing the offset
14113 of an argument from the frame pointer. */
14115 arm_debugger_arg_offset (int value, rtx addr)
14117 rtx insn;
14119 /* We are only interested if dbxout_parms() failed to compute the offset. */
14120 if (value != 0)
14121 return 0;
14123 /* We can only cope with the case where the address is held in a register. */
14124 if (GET_CODE (addr) != REG)
14125 return 0;
14127 /* If we are using the frame pointer to point at the argument, then
14128 an offset of 0 is correct. */
14129 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14130 return 0;
14132 /* If we are using the stack pointer to point at the
14133 argument, then an offset of 0 is correct. */
14134 /* ??? Check this is consistent with thumb2 frame layout. */
14135 if ((TARGET_THUMB || !frame_pointer_needed)
14136 && REGNO (addr) == SP_REGNUM)
14137 return 0;
14139 /* Oh dear. The argument is pointed to by a register rather
14140 than being held in a register, or being stored at a known
14141 offset from the frame pointer. Since GDB only understands
14142 those two kinds of argument we must translate the address
14143 held in the register into an offset from the frame pointer.
14144 We do this by searching through the insns for the function
14145 looking to see where this register gets its value. If the
14146 register is initialized from the frame pointer plus an offset
14147 then we are in luck and we can continue, otherwise we give up.
14149 This code is exercised by producing debugging information
14150 for a function with arguments like this:
14152 double func (double a, double b, int c, double d) {return d;}
14154 Without this code the stab for parameter 'd' will be set to
14155 an offset of 0 from the frame pointer, rather than 8. */
14157 /* The if() statement says:
14159 If the insn is a normal instruction
14160 and if the insn is setting the value in a register
14161 and if the register being set is the register holding the address of the argument
14162 and if the address is computing by an addition
14163 that involves adding to a register
14164 which is the frame pointer
14165 a constant integer
14167 then... */
14169 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14171 if ( GET_CODE (insn) == INSN
14172 && GET_CODE (PATTERN (insn)) == SET
14173 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14174 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14175 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14176 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14177 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14180 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14182 break;
14186 if (value == 0)
14188 debug_rtx (addr);
14189 warning (0, "unable to compute real location of stacked parameter");
14190 value = 8; /* XXX magic hack */
14193 return value;
14196 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14197 do \
14199 if ((MASK) & insn_flags) \
14200 add_builtin_function ((NAME), (TYPE), (CODE), \
14201 BUILT_IN_MD, NULL, NULL_TREE); \
14203 while (0)
14205 struct builtin_description
14207 const unsigned int mask;
14208 const enum insn_code icode;
14209 const char * const name;
14210 const enum arm_builtins code;
14211 const enum rtx_code comparison;
14212 const unsigned int flag;
14215 static const struct builtin_description bdesc_2arg[] =
14217 #define IWMMXT_BUILTIN(code, string, builtin) \
14218 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14219 ARM_BUILTIN_##builtin, 0, 0 },
14221 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14222 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14223 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14224 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14225 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14226 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14227 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14228 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14229 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14230 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14231 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14232 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14233 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14234 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14235 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14236 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14237 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14238 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14239 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14240 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14241 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14242 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14243 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14244 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14245 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14246 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14247 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14248 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14249 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14250 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14251 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14252 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14253 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14254 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14255 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14256 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14257 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14258 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14259 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14260 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14261 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14262 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14263 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14264 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14265 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14266 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14267 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14268 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14269 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14270 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14271 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14272 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14273 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14274 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14275 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14276 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14277 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14278 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14280 #define IWMMXT_BUILTIN2(code, builtin) \
14281 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14283 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14284 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14285 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14286 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14287 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14288 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14289 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14290 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14291 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14292 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14293 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14294 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14295 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14296 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14297 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14298 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14299 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14300 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14301 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14302 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14303 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14304 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14305 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14306 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14307 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14308 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14309 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14310 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14311 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14312 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14313 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14314 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14317 static const struct builtin_description bdesc_1arg[] =
14319 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14320 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14321 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14322 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14323 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14324 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14325 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14326 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14327 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14328 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14329 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14330 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14331 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14332 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14333 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14334 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14335 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14336 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14339 /* Set up all the iWMMXt builtins. This is
14340 not called if TARGET_IWMMXT is zero. */
14342 static void
14343 arm_init_iwmmxt_builtins (void)
14345 const struct builtin_description * d;
14346 size_t i;
14347 tree endlink = void_list_node;
14349 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14350 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14351 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14353 tree int_ftype_int
14354 = build_function_type (integer_type_node,
14355 tree_cons (NULL_TREE, integer_type_node, endlink));
14356 tree v8qi_ftype_v8qi_v8qi_int
14357 = build_function_type (V8QI_type_node,
14358 tree_cons (NULL_TREE, V8QI_type_node,
14359 tree_cons (NULL_TREE, V8QI_type_node,
14360 tree_cons (NULL_TREE,
14361 integer_type_node,
14362 endlink))));
14363 tree v4hi_ftype_v4hi_int
14364 = build_function_type (V4HI_type_node,
14365 tree_cons (NULL_TREE, V4HI_type_node,
14366 tree_cons (NULL_TREE, integer_type_node,
14367 endlink)));
14368 tree v2si_ftype_v2si_int
14369 = build_function_type (V2SI_type_node,
14370 tree_cons (NULL_TREE, V2SI_type_node,
14371 tree_cons (NULL_TREE, integer_type_node,
14372 endlink)));
14373 tree v2si_ftype_di_di
14374 = build_function_type (V2SI_type_node,
14375 tree_cons (NULL_TREE, long_long_integer_type_node,
14376 tree_cons (NULL_TREE, long_long_integer_type_node,
14377 endlink)));
14378 tree di_ftype_di_int
14379 = build_function_type (long_long_integer_type_node,
14380 tree_cons (NULL_TREE, long_long_integer_type_node,
14381 tree_cons (NULL_TREE, integer_type_node,
14382 endlink)));
14383 tree di_ftype_di_int_int
14384 = build_function_type (long_long_integer_type_node,
14385 tree_cons (NULL_TREE, long_long_integer_type_node,
14386 tree_cons (NULL_TREE, integer_type_node,
14387 tree_cons (NULL_TREE,
14388 integer_type_node,
14389 endlink))));
14390 tree int_ftype_v8qi
14391 = build_function_type (integer_type_node,
14392 tree_cons (NULL_TREE, V8QI_type_node,
14393 endlink));
14394 tree int_ftype_v4hi
14395 = build_function_type (integer_type_node,
14396 tree_cons (NULL_TREE, V4HI_type_node,
14397 endlink));
14398 tree int_ftype_v2si
14399 = build_function_type (integer_type_node,
14400 tree_cons (NULL_TREE, V2SI_type_node,
14401 endlink));
14402 tree int_ftype_v8qi_int
14403 = build_function_type (integer_type_node,
14404 tree_cons (NULL_TREE, V8QI_type_node,
14405 tree_cons (NULL_TREE, integer_type_node,
14406 endlink)));
14407 tree int_ftype_v4hi_int
14408 = build_function_type (integer_type_node,
14409 tree_cons (NULL_TREE, V4HI_type_node,
14410 tree_cons (NULL_TREE, integer_type_node,
14411 endlink)));
14412 tree int_ftype_v2si_int
14413 = build_function_type (integer_type_node,
14414 tree_cons (NULL_TREE, V2SI_type_node,
14415 tree_cons (NULL_TREE, integer_type_node,
14416 endlink)));
14417 tree v8qi_ftype_v8qi_int_int
14418 = build_function_type (V8QI_type_node,
14419 tree_cons (NULL_TREE, V8QI_type_node,
14420 tree_cons (NULL_TREE, integer_type_node,
14421 tree_cons (NULL_TREE,
14422 integer_type_node,
14423 endlink))));
14424 tree v4hi_ftype_v4hi_int_int
14425 = build_function_type (V4HI_type_node,
14426 tree_cons (NULL_TREE, V4HI_type_node,
14427 tree_cons (NULL_TREE, integer_type_node,
14428 tree_cons (NULL_TREE,
14429 integer_type_node,
14430 endlink))));
14431 tree v2si_ftype_v2si_int_int
14432 = build_function_type (V2SI_type_node,
14433 tree_cons (NULL_TREE, V2SI_type_node,
14434 tree_cons (NULL_TREE, integer_type_node,
14435 tree_cons (NULL_TREE,
14436 integer_type_node,
14437 endlink))));
14438 /* Miscellaneous. */
14439 tree v8qi_ftype_v4hi_v4hi
14440 = build_function_type (V8QI_type_node,
14441 tree_cons (NULL_TREE, V4HI_type_node,
14442 tree_cons (NULL_TREE, V4HI_type_node,
14443 endlink)));
14444 tree v4hi_ftype_v2si_v2si
14445 = build_function_type (V4HI_type_node,
14446 tree_cons (NULL_TREE, V2SI_type_node,
14447 tree_cons (NULL_TREE, V2SI_type_node,
14448 endlink)));
14449 tree v2si_ftype_v4hi_v4hi
14450 = build_function_type (V2SI_type_node,
14451 tree_cons (NULL_TREE, V4HI_type_node,
14452 tree_cons (NULL_TREE, V4HI_type_node,
14453 endlink)));
14454 tree v2si_ftype_v8qi_v8qi
14455 = build_function_type (V2SI_type_node,
14456 tree_cons (NULL_TREE, V8QI_type_node,
14457 tree_cons (NULL_TREE, V8QI_type_node,
14458 endlink)));
14459 tree v4hi_ftype_v4hi_di
14460 = build_function_type (V4HI_type_node,
14461 tree_cons (NULL_TREE, V4HI_type_node,
14462 tree_cons (NULL_TREE,
14463 long_long_integer_type_node,
14464 endlink)));
14465 tree v2si_ftype_v2si_di
14466 = build_function_type (V2SI_type_node,
14467 tree_cons (NULL_TREE, V2SI_type_node,
14468 tree_cons (NULL_TREE,
14469 long_long_integer_type_node,
14470 endlink)));
14471 tree void_ftype_int_int
14472 = build_function_type (void_type_node,
14473 tree_cons (NULL_TREE, integer_type_node,
14474 tree_cons (NULL_TREE, integer_type_node,
14475 endlink)));
14476 tree di_ftype_void
14477 = build_function_type (long_long_unsigned_type_node, endlink);
14478 tree di_ftype_v8qi
14479 = build_function_type (long_long_integer_type_node,
14480 tree_cons (NULL_TREE, V8QI_type_node,
14481 endlink));
14482 tree di_ftype_v4hi
14483 = build_function_type (long_long_integer_type_node,
14484 tree_cons (NULL_TREE, V4HI_type_node,
14485 endlink));
14486 tree di_ftype_v2si
14487 = build_function_type (long_long_integer_type_node,
14488 tree_cons (NULL_TREE, V2SI_type_node,
14489 endlink));
14490 tree v2si_ftype_v4hi
14491 = build_function_type (V2SI_type_node,
14492 tree_cons (NULL_TREE, V4HI_type_node,
14493 endlink));
14494 tree v4hi_ftype_v8qi
14495 = build_function_type (V4HI_type_node,
14496 tree_cons (NULL_TREE, V8QI_type_node,
14497 endlink));
14499 tree di_ftype_di_v4hi_v4hi
14500 = build_function_type (long_long_unsigned_type_node,
14501 tree_cons (NULL_TREE,
14502 long_long_unsigned_type_node,
14503 tree_cons (NULL_TREE, V4HI_type_node,
14504 tree_cons (NULL_TREE,
14505 V4HI_type_node,
14506 endlink))));
14508 tree di_ftype_v4hi_v4hi
14509 = build_function_type (long_long_unsigned_type_node,
14510 tree_cons (NULL_TREE, V4HI_type_node,
14511 tree_cons (NULL_TREE, V4HI_type_node,
14512 endlink)));
14514 /* Normal vector binops. */
14515 tree v8qi_ftype_v8qi_v8qi
14516 = build_function_type (V8QI_type_node,
14517 tree_cons (NULL_TREE, V8QI_type_node,
14518 tree_cons (NULL_TREE, V8QI_type_node,
14519 endlink)));
14520 tree v4hi_ftype_v4hi_v4hi
14521 = build_function_type (V4HI_type_node,
14522 tree_cons (NULL_TREE, V4HI_type_node,
14523 tree_cons (NULL_TREE, V4HI_type_node,
14524 endlink)));
14525 tree v2si_ftype_v2si_v2si
14526 = build_function_type (V2SI_type_node,
14527 tree_cons (NULL_TREE, V2SI_type_node,
14528 tree_cons (NULL_TREE, V2SI_type_node,
14529 endlink)));
14530 tree di_ftype_di_di
14531 = build_function_type (long_long_unsigned_type_node,
14532 tree_cons (NULL_TREE, long_long_unsigned_type_node,
14533 tree_cons (NULL_TREE,
14534 long_long_unsigned_type_node,
14535 endlink)));
14537 /* Add all builtins that are more or less simple operations on two
14538 operands. */
14539 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14541 /* Use one of the operands; the target can have a different mode for
14542 mask-generating compares. */
14543 enum machine_mode mode;
14544 tree type;
14546 if (d->name == 0)
14547 continue;
14549 mode = insn_data[d->icode].operand[1].mode;
14551 switch (mode)
14553 case V8QImode:
14554 type = v8qi_ftype_v8qi_v8qi;
14555 break;
14556 case V4HImode:
14557 type = v4hi_ftype_v4hi_v4hi;
14558 break;
14559 case V2SImode:
14560 type = v2si_ftype_v2si_v2si;
14561 break;
14562 case DImode:
14563 type = di_ftype_di_di;
14564 break;
14566 default:
14567 gcc_unreachable ();
14570 def_mbuiltin (d->mask, d->name, type, d->code);
14573 /* Add the remaining MMX insns with somewhat more complicated types. */
14574 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
14575 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
14576 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
14578 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
14579 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
14580 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
14581 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
14582 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
14583 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
14585 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
14586 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
14587 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
14588 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
14589 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
14590 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
14592 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
14593 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
14594 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
14595 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
14596 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
14597 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
14599 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
14600 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
14601 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
14602 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
14603 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
14604 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
14606 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
14608 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
14609 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
14610 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
14611 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
14613 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
14614 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
14615 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
14616 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
14617 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
14618 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
14619 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
14620 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
14621 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
14623 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
14624 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
14625 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
14627 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
14628 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
14629 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
14631 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
14632 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
14633 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
14634 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
14635 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
14636 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
14638 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
14639 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
14640 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
14641 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
14642 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
14643 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
14644 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
14645 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
14646 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
14647 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
14648 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
14649 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
14651 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
14652 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
14653 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
14654 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
14656 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
14657 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
14658 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
14659 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
14660 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
14661 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
14662 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
14665 static void
14666 arm_init_tls_builtins (void)
14668 tree ftype, decl;
14670 ftype = build_function_type (ptr_type_node, void_list_node);
14671 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
14672 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
14673 NULL, NULL_TREE);
14674 TREE_NOTHROW (decl) = 1;
14675 TREE_READONLY (decl) = 1;
14678 typedef enum {
14679 T_V8QI = 0x0001,
14680 T_V4HI = 0x0002,
14681 T_V2SI = 0x0004,
14682 T_V2SF = 0x0008,
14683 T_DI = 0x0010,
14684 T_V16QI = 0x0020,
14685 T_V8HI = 0x0040,
14686 T_V4SI = 0x0080,
14687 T_V4SF = 0x0100,
14688 T_V2DI = 0x0200,
14689 T_TI = 0x0400,
14690 T_EI = 0x0800,
14691 T_OI = 0x1000
14692 } neon_builtin_type_bits;
14694 #define v8qi_UP T_V8QI
14695 #define v4hi_UP T_V4HI
14696 #define v2si_UP T_V2SI
14697 #define v2sf_UP T_V2SF
14698 #define di_UP T_DI
14699 #define v16qi_UP T_V16QI
14700 #define v8hi_UP T_V8HI
14701 #define v4si_UP T_V4SI
14702 #define v4sf_UP T_V4SF
14703 #define v2di_UP T_V2DI
14704 #define ti_UP T_TI
14705 #define ei_UP T_EI
14706 #define oi_UP T_OI
14708 #define UP(X) X##_UP
14710 #define T_MAX 13
14712 typedef enum {
14713 NEON_BINOP,
14714 NEON_TERNOP,
14715 NEON_UNOP,
14716 NEON_GETLANE,
14717 NEON_SETLANE,
14718 NEON_CREATE,
14719 NEON_DUP,
14720 NEON_DUPLANE,
14721 NEON_COMBINE,
14722 NEON_SPLIT,
14723 NEON_LANEMUL,
14724 NEON_LANEMULL,
14725 NEON_LANEMULH,
14726 NEON_LANEMAC,
14727 NEON_SCALARMUL,
14728 NEON_SCALARMULL,
14729 NEON_SCALARMULH,
14730 NEON_SCALARMAC,
14731 NEON_CONVERT,
14732 NEON_FIXCONV,
14733 NEON_SELECT,
14734 NEON_RESULTPAIR,
14735 NEON_REINTERP,
14736 NEON_VTBL,
14737 NEON_VTBX,
14738 NEON_LOAD1,
14739 NEON_LOAD1LANE,
14740 NEON_STORE1,
14741 NEON_STORE1LANE,
14742 NEON_LOADSTRUCT,
14743 NEON_LOADSTRUCTLANE,
14744 NEON_STORESTRUCT,
14745 NEON_STORESTRUCTLANE,
14746 NEON_LOGICBINOP,
14747 NEON_SHIFTINSERT,
14748 NEON_SHIFTIMM,
14749 NEON_SHIFTACC
14750 } neon_itype;
14752 typedef struct {
14753 const char *name;
14754 const neon_itype itype;
14755 const neon_builtin_type_bits bits;
14756 const enum insn_code codes[T_MAX];
14757 const unsigned int num_vars;
14758 unsigned int base_fcode;
14759 } neon_builtin_datum;
14761 #define CF(N,X) CODE_FOR_neon_##N##X
14763 #define VAR1(T, N, A) \
14764 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14765 #define VAR2(T, N, A, B) \
14766 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14767 #define VAR3(T, N, A, B, C) \
14768 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14769 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14770 #define VAR4(T, N, A, B, C, D) \
14771 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14772 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14773 #define VAR5(T, N, A, B, C, D, E) \
14774 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14775 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14776 #define VAR6(T, N, A, B, C, D, E, F) \
14777 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14778 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14779 #define VAR7(T, N, A, B, C, D, E, F, G) \
14780 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14781 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14782 CF (N, G) }, 7, 0
14783 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14784 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14785 | UP (H), \
14786 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14787 CF (N, G), CF (N, H) }, 8, 0
14788 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14789 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14790 | UP (H) | UP (I), \
14791 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14792 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14793 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14794 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14795 | UP (H) | UP (I) | UP (J), \
14796 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14797 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14799 /* The mode entries in the following table correspond to the "key" type of the
14800 instruction variant, i.e. equivalent to that which would be specified after
14801 the assembler mnemonic, which usually refers to the last vector operand.
14802 (Signed/unsigned/polynomial types are not differentiated between though, and
14803 are all mapped onto the same mode for a given element size.) The modes
14804 listed per instruction should be the same as those defined for that
14805 instruction's pattern in neon.md.
14806 WARNING: Variants should be listed in the same increasing order as
14807 neon_builtin_type_bits. */
14809 static neon_builtin_datum neon_builtin_data[] =
14811 { VAR10 (BINOP, vadd,
14812 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14813 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
14814 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
14815 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14816 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14817 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
14818 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14819 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14820 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
14821 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14822 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
14823 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
14824 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
14825 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
14826 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
14827 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
14828 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
14829 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
14830 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
14831 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
14832 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
14833 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
14834 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14835 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14836 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14837 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
14838 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
14839 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
14840 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14841 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14842 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14843 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
14844 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14845 { VAR10 (BINOP, vsub,
14846 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14847 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
14848 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
14849 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14850 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14851 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
14852 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14853 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14854 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14855 { VAR2 (BINOP, vcage, v2sf, v4sf) },
14856 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
14857 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14858 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14859 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
14860 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14861 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
14862 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14863 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14864 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
14865 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14866 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14867 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
14868 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
14869 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
14870 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
14871 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14872 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14873 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14874 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14875 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14876 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14877 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14878 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14879 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
14880 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
14881 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
14882 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14883 /* FIXME: vget_lane supports more variants than this! */
14884 { VAR10 (GETLANE, vget_lane,
14885 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14886 { VAR10 (SETLANE, vset_lane,
14887 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14888 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
14889 { VAR10 (DUP, vdup_n,
14890 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14891 { VAR10 (DUPLANE, vdup_lane,
14892 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14893 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
14894 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
14895 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
14896 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
14897 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
14898 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
14899 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
14900 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14901 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14902 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
14903 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
14904 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14905 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
14906 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
14907 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14908 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14909 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
14910 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
14911 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14912 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
14913 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
14914 { VAR10 (BINOP, vext,
14915 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14916 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14917 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
14918 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
14919 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
14920 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
14921 { VAR10 (SELECT, vbsl,
14922 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14923 { VAR1 (VTBL, vtbl1, v8qi) },
14924 { VAR1 (VTBL, vtbl2, v8qi) },
14925 { VAR1 (VTBL, vtbl3, v8qi) },
14926 { VAR1 (VTBL, vtbl4, v8qi) },
14927 { VAR1 (VTBX, vtbx1, v8qi) },
14928 { VAR1 (VTBX, vtbx2, v8qi) },
14929 { VAR1 (VTBX, vtbx3, v8qi) },
14930 { VAR1 (VTBX, vtbx4, v8qi) },
14931 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14932 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14933 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14934 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
14935 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
14936 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
14937 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
14938 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
14939 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
14940 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
14941 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
14942 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
14943 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
14944 { VAR10 (LOAD1, vld1,
14945 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14946 { VAR10 (LOAD1LANE, vld1_lane,
14947 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14948 { VAR10 (LOAD1, vld1_dup,
14949 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14950 { VAR10 (STORE1, vst1,
14951 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14952 { VAR10 (STORE1LANE, vst1_lane,
14953 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14954 { VAR9 (LOADSTRUCT,
14955 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14956 { VAR7 (LOADSTRUCTLANE, vld2_lane,
14957 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14958 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
14959 { VAR9 (STORESTRUCT, vst2,
14960 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14961 { VAR7 (STORESTRUCTLANE, vst2_lane,
14962 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14963 { VAR9 (LOADSTRUCT,
14964 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14965 { VAR7 (LOADSTRUCTLANE, vld3_lane,
14966 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14967 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
14968 { VAR9 (STORESTRUCT, vst3,
14969 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14970 { VAR7 (STORESTRUCTLANE, vst3_lane,
14971 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14972 { VAR9 (LOADSTRUCT, vld4,
14973 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14974 { VAR7 (LOADSTRUCTLANE, vld4_lane,
14975 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14976 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
14977 { VAR9 (STORESTRUCT, vst4,
14978 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14979 { VAR7 (STORESTRUCTLANE, vst4_lane,
14980 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14981 { VAR10 (LOGICBINOP, vand,
14982 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14983 { VAR10 (LOGICBINOP, vorr,
14984 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14985 { VAR10 (BINOP, veor,
14986 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14987 { VAR10 (LOGICBINOP, vbic,
14988 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14989 { VAR10 (LOGICBINOP, vorn,
14990 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
14993 #undef CF
14994 #undef VAR1
14995 #undef VAR2
14996 #undef VAR3
14997 #undef VAR4
14998 #undef VAR5
14999 #undef VAR6
15000 #undef VAR7
15001 #undef VAR8
15002 #undef VAR9
15003 #undef VAR10
15005 static void
15006 arm_init_neon_builtins (void)
15008 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
15010 /* Create distinguished type nodes for NEON vector element types,
15011 and pointers to values of such types, so we can detect them later. */
15012 tree neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15013 tree neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15014 tree neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15015 tree neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15016 tree neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
15017 tree neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
15018 tree neon_float_type_node = make_node (REAL_TYPE);
15020 tree intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
15021 tree intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
15022 tree intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
15023 tree intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
15024 tree float_pointer_node = build_pointer_type (neon_float_type_node);
15026 /* Next create constant-qualified versions of the above types. */
15027 tree const_intQI_node = build_qualified_type (neon_intQI_type_node,
15028 TYPE_QUAL_CONST);
15029 tree const_intHI_node = build_qualified_type (neon_intHI_type_node,
15030 TYPE_QUAL_CONST);
15031 tree const_intSI_node = build_qualified_type (neon_intSI_type_node,
15032 TYPE_QUAL_CONST);
15033 tree const_intDI_node = build_qualified_type (neon_intDI_type_node,
15034 TYPE_QUAL_CONST);
15035 tree const_float_node = build_qualified_type (neon_float_type_node,
15036 TYPE_QUAL_CONST);
15038 tree const_intQI_pointer_node = build_pointer_type (const_intQI_node);
15039 tree const_intHI_pointer_node = build_pointer_type (const_intHI_node);
15040 tree const_intSI_pointer_node = build_pointer_type (const_intSI_node);
15041 tree const_intDI_pointer_node = build_pointer_type (const_intDI_node);
15042 tree const_float_pointer_node = build_pointer_type (const_float_node);
15044 /* Now create vector types based on our NEON element types. */
15045 /* 64-bit vectors. */
15046 tree V8QI_type_node =
15047 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
15048 tree V4HI_type_node =
15049 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
15050 tree V2SI_type_node =
15051 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
15052 tree V2SF_type_node =
15053 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
15054 /* 128-bit vectors. */
15055 tree V16QI_type_node =
15056 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
15057 tree V8HI_type_node =
15058 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
15059 tree V4SI_type_node =
15060 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
15061 tree V4SF_type_node =
15062 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
15063 tree V2DI_type_node =
15064 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
15066 /* Unsigned integer types for various mode sizes. */
15067 tree intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
15068 tree intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
15069 tree intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
15070 tree intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
15072 /* Opaque integer types for structures of vectors. */
15073 tree intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
15074 tree intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
15075 tree intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
15076 tree intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
15078 /* Pointers to vector types. */
15079 tree V8QI_pointer_node = build_pointer_type (V8QI_type_node);
15080 tree V4HI_pointer_node = build_pointer_type (V4HI_type_node);
15081 tree V2SI_pointer_node = build_pointer_type (V2SI_type_node);
15082 tree V2SF_pointer_node = build_pointer_type (V2SF_type_node);
15083 tree V16QI_pointer_node = build_pointer_type (V16QI_type_node);
15084 tree V8HI_pointer_node = build_pointer_type (V8HI_type_node);
15085 tree V4SI_pointer_node = build_pointer_type (V4SI_type_node);
15086 tree V4SF_pointer_node = build_pointer_type (V4SF_type_node);
15087 tree V2DI_pointer_node = build_pointer_type (V2DI_type_node);
15089 /* Operations which return results as pairs. */
15090 tree void_ftype_pv8qi_v8qi_v8qi =
15091 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
15092 V8QI_type_node, NULL);
15093 tree void_ftype_pv4hi_v4hi_v4hi =
15094 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
15095 V4HI_type_node, NULL);
15096 tree void_ftype_pv2si_v2si_v2si =
15097 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
15098 V2SI_type_node, NULL);
15099 tree void_ftype_pv2sf_v2sf_v2sf =
15100 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
15101 V2SF_type_node, NULL);
15102 tree void_ftype_pdi_di_di =
15103 build_function_type_list (void_type_node, intDI_pointer_node,
15104 neon_intDI_type_node, neon_intDI_type_node, NULL);
15105 tree void_ftype_pv16qi_v16qi_v16qi =
15106 build_function_type_list (void_type_node, V16QI_pointer_node,
15107 V16QI_type_node, V16QI_type_node, NULL);
15108 tree void_ftype_pv8hi_v8hi_v8hi =
15109 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
15110 V8HI_type_node, NULL);
15111 tree void_ftype_pv4si_v4si_v4si =
15112 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
15113 V4SI_type_node, NULL);
15114 tree void_ftype_pv4sf_v4sf_v4sf =
15115 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
15116 V4SF_type_node, NULL);
15117 tree void_ftype_pv2di_v2di_v2di =
15118 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
15119 V2DI_type_node, NULL);
15121 tree reinterp_ftype_dreg[5][5];
15122 tree reinterp_ftype_qreg[5][5];
15123 tree dreg_types[5], qreg_types[5];
15125 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15126 layout_type (neon_float_type_node);
15128 /* Define typedefs which exactly correspond to the modes we are basing vector
15129 types on. If you change these names you'll need to change
15130 the table used by arm_mangle_type too. */
15131 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15132 "__builtin_neon_qi");
15133 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15134 "__builtin_neon_hi");
15135 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15136 "__builtin_neon_si");
15137 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15138 "__builtin_neon_sf");
15139 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15140 "__builtin_neon_di");
15142 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15143 "__builtin_neon_poly8");
15144 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15145 "__builtin_neon_poly16");
15146 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
15147 "__builtin_neon_uqi");
15148 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
15149 "__builtin_neon_uhi");
15150 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
15151 "__builtin_neon_usi");
15152 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
15153 "__builtin_neon_udi");
15155 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
15156 "__builtin_neon_ti");
15157 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
15158 "__builtin_neon_ei");
15159 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
15160 "__builtin_neon_oi");
15161 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
15162 "__builtin_neon_ci");
15163 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
15164 "__builtin_neon_xi");
15166 dreg_types[0] = V8QI_type_node;
15167 dreg_types[1] = V4HI_type_node;
15168 dreg_types[2] = V2SI_type_node;
15169 dreg_types[3] = V2SF_type_node;
15170 dreg_types[4] = neon_intDI_type_node;
15172 qreg_types[0] = V16QI_type_node;
15173 qreg_types[1] = V8HI_type_node;
15174 qreg_types[2] = V4SI_type_node;
15175 qreg_types[3] = V4SF_type_node;
15176 qreg_types[4] = V2DI_type_node;
15178 for (i = 0; i < 5; i++)
15180 int j;
15181 for (j = 0; j < 5; j++)
15183 reinterp_ftype_dreg[i][j]
15184 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15185 reinterp_ftype_qreg[i][j]
15186 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15190 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15192 neon_builtin_datum *d = &neon_builtin_data[i];
15193 unsigned int j, codeidx = 0;
15195 d->base_fcode = fcode;
15197 for (j = 0; j < T_MAX; j++)
15199 const char* const modenames[] = {
15200 "v8qi", "v4hi", "v2si", "v2sf", "di",
15201 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15203 char namebuf[60];
15204 tree ftype = NULL;
15205 enum insn_code icode;
15206 int is_load = 0, is_store = 0;
15208 if ((d->bits & (1 << j)) == 0)
15209 continue;
15211 icode = d->codes[codeidx++];
15213 switch (d->itype)
15215 case NEON_LOAD1:
15216 case NEON_LOAD1LANE:
15217 case NEON_LOADSTRUCT:
15218 case NEON_LOADSTRUCTLANE:
15219 is_load = 1;
15220 /* Fall through. */
15221 case NEON_STORE1:
15222 case NEON_STORE1LANE:
15223 case NEON_STORESTRUCT:
15224 case NEON_STORESTRUCTLANE:
15225 if (!is_load)
15226 is_store = 1;
15227 /* Fall through. */
15228 case NEON_UNOP:
15229 case NEON_BINOP:
15230 case NEON_LOGICBINOP:
15231 case NEON_SHIFTINSERT:
15232 case NEON_TERNOP:
15233 case NEON_GETLANE:
15234 case NEON_SETLANE:
15235 case NEON_CREATE:
15236 case NEON_DUP:
15237 case NEON_DUPLANE:
15238 case NEON_SHIFTIMM:
15239 case NEON_SHIFTACC:
15240 case NEON_COMBINE:
15241 case NEON_SPLIT:
15242 case NEON_CONVERT:
15243 case NEON_FIXCONV:
15244 case NEON_LANEMUL:
15245 case NEON_LANEMULL:
15246 case NEON_LANEMULH:
15247 case NEON_LANEMAC:
15248 case NEON_SCALARMUL:
15249 case NEON_SCALARMULL:
15250 case NEON_SCALARMULH:
15251 case NEON_SCALARMAC:
15252 case NEON_SELECT:
15253 case NEON_VTBL:
15254 case NEON_VTBX:
15256 int k;
15257 tree return_type = void_type_node, args = void_list_node;
15259 /* Build a function type directly from the insn_data for this
15260 builtin. The build_function_type() function takes care of
15261 removing duplicates for us. */
15262 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15264 tree eltype;
15266 if (is_load && k == 1)
15268 /* Neon load patterns always have the memory operand
15269 (a SImode pointer) in the operand 1 position. We
15270 want a const pointer to the element type in that
15271 position. */
15272 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15274 switch (1 << j)
15276 case T_V8QI:
15277 case T_V16QI:
15278 eltype = const_intQI_pointer_node;
15279 break;
15281 case T_V4HI:
15282 case T_V8HI:
15283 eltype = const_intHI_pointer_node;
15284 break;
15286 case T_V2SI:
15287 case T_V4SI:
15288 eltype = const_intSI_pointer_node;
15289 break;
15291 case T_V2SF:
15292 case T_V4SF:
15293 eltype = const_float_pointer_node;
15294 break;
15296 case T_DI:
15297 case T_V2DI:
15298 eltype = const_intDI_pointer_node;
15299 break;
15301 default: gcc_unreachable ();
15304 else if (is_store && k == 0)
15306 /* Similarly, Neon store patterns use operand 0 as
15307 the memory location to store to (a SImode pointer).
15308 Use a pointer to the element type of the store in
15309 that position. */
15310 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15312 switch (1 << j)
15314 case T_V8QI:
15315 case T_V16QI:
15316 eltype = intQI_pointer_node;
15317 break;
15319 case T_V4HI:
15320 case T_V8HI:
15321 eltype = intHI_pointer_node;
15322 break;
15324 case T_V2SI:
15325 case T_V4SI:
15326 eltype = intSI_pointer_node;
15327 break;
15329 case T_V2SF:
15330 case T_V4SF:
15331 eltype = float_pointer_node;
15332 break;
15334 case T_DI:
15335 case T_V2DI:
15336 eltype = intDI_pointer_node;
15337 break;
15339 default: gcc_unreachable ();
15342 else
15344 switch (insn_data[icode].operand[k].mode)
15346 case VOIDmode: eltype = void_type_node; break;
15347 /* Scalars. */
15348 case QImode: eltype = neon_intQI_type_node; break;
15349 case HImode: eltype = neon_intHI_type_node; break;
15350 case SImode: eltype = neon_intSI_type_node; break;
15351 case SFmode: eltype = neon_float_type_node; break;
15352 case DImode: eltype = neon_intDI_type_node; break;
15353 case TImode: eltype = intTI_type_node; break;
15354 case EImode: eltype = intEI_type_node; break;
15355 case OImode: eltype = intOI_type_node; break;
15356 case CImode: eltype = intCI_type_node; break;
15357 case XImode: eltype = intXI_type_node; break;
15358 /* 64-bit vectors. */
15359 case V8QImode: eltype = V8QI_type_node; break;
15360 case V4HImode: eltype = V4HI_type_node; break;
15361 case V2SImode: eltype = V2SI_type_node; break;
15362 case V2SFmode: eltype = V2SF_type_node; break;
15363 /* 128-bit vectors. */
15364 case V16QImode: eltype = V16QI_type_node; break;
15365 case V8HImode: eltype = V8HI_type_node; break;
15366 case V4SImode: eltype = V4SI_type_node; break;
15367 case V4SFmode: eltype = V4SF_type_node; break;
15368 case V2DImode: eltype = V2DI_type_node; break;
15369 default: gcc_unreachable ();
15373 if (k == 0 && !is_store)
15374 return_type = eltype;
15375 else
15376 args = tree_cons (NULL_TREE, eltype, args);
15379 ftype = build_function_type (return_type, args);
15381 break;
15383 case NEON_RESULTPAIR:
15385 switch (insn_data[icode].operand[1].mode)
15387 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
15388 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
15389 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
15390 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
15391 case DImode: ftype = void_ftype_pdi_di_di; break;
15392 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
15393 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
15394 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
15395 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
15396 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
15397 default: gcc_unreachable ();
15400 break;
15402 case NEON_REINTERP:
15404 /* We iterate over 5 doubleword types, then 5 quadword
15405 types. */
15406 int rhs = j % 5;
15407 switch (insn_data[icode].operand[0].mode)
15409 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
15410 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
15411 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
15412 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
15413 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
15414 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
15415 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
15416 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
15417 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
15418 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
15419 default: gcc_unreachable ();
15422 break;
15424 default:
15425 gcc_unreachable ();
15428 gcc_assert (ftype != NULL);
15430 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
15432 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
15433 NULL_TREE);
15438 static void
15439 arm_init_builtins (void)
15441 arm_init_tls_builtins ();
15443 if (TARGET_REALLY_IWMMXT)
15444 arm_init_iwmmxt_builtins ();
15446 if (TARGET_NEON)
15447 arm_init_neon_builtins ();
15450 /* Errors in the source file can cause expand_expr to return const0_rtx
15451 where we expect a vector. To avoid crashing, use one of the vector
15452 clear instructions. */
15454 static rtx
15455 safe_vector_operand (rtx x, enum machine_mode mode)
15457 if (x != const0_rtx)
15458 return x;
15459 x = gen_reg_rtx (mode);
15461 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
15462 : gen_rtx_SUBREG (DImode, x, 0)));
15463 return x;
15466 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15468 static rtx
15469 arm_expand_binop_builtin (enum insn_code icode,
15470 tree exp, rtx target)
15472 rtx pat;
15473 tree arg0 = CALL_EXPR_ARG (exp, 0);
15474 tree arg1 = CALL_EXPR_ARG (exp, 1);
15475 rtx op0 = expand_normal (arg0);
15476 rtx op1 = expand_normal (arg1);
15477 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15478 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15479 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15481 if (VECTOR_MODE_P (mode0))
15482 op0 = safe_vector_operand (op0, mode0);
15483 if (VECTOR_MODE_P (mode1))
15484 op1 = safe_vector_operand (op1, mode1);
15486 if (! target
15487 || GET_MODE (target) != tmode
15488 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15489 target = gen_reg_rtx (tmode);
15491 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
15493 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15494 op0 = copy_to_mode_reg (mode0, op0);
15495 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15496 op1 = copy_to_mode_reg (mode1, op1);
15498 pat = GEN_FCN (icode) (target, op0, op1);
15499 if (! pat)
15500 return 0;
15501 emit_insn (pat);
15502 return target;
15505 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15507 static rtx
15508 arm_expand_unop_builtin (enum insn_code icode,
15509 tree exp, rtx target, int do_load)
15511 rtx pat;
15512 tree arg0 = CALL_EXPR_ARG (exp, 0);
15513 rtx op0 = expand_normal (arg0);
15514 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15515 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15517 if (! target
15518 || GET_MODE (target) != tmode
15519 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15520 target = gen_reg_rtx (tmode);
15521 if (do_load)
15522 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15523 else
15525 if (VECTOR_MODE_P (mode0))
15526 op0 = safe_vector_operand (op0, mode0);
15528 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15529 op0 = copy_to_mode_reg (mode0, op0);
15532 pat = GEN_FCN (icode) (target, op0);
15533 if (! pat)
15534 return 0;
15535 emit_insn (pat);
15536 return target;
15539 static int
15540 neon_builtin_compare (const void *a, const void *b)
15542 const neon_builtin_datum *key = a;
15543 const neon_builtin_datum *memb = b;
15544 unsigned int soughtcode = key->base_fcode;
15546 if (soughtcode >= memb->base_fcode
15547 && soughtcode < memb->base_fcode + memb->num_vars)
15548 return 0;
15549 else if (soughtcode < memb->base_fcode)
15550 return -1;
15551 else
15552 return 1;
15555 static enum insn_code
15556 locate_neon_builtin_icode (int fcode, neon_itype *itype)
15558 neon_builtin_datum key, *found;
15559 int idx;
15561 key.base_fcode = fcode;
15562 found = bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
15563 sizeof (neon_builtin_data[0]), neon_builtin_compare);
15564 gcc_assert (found);
15565 idx = fcode - (int) found->base_fcode;
15566 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
15568 if (itype)
15569 *itype = found->itype;
15571 return found->codes[idx];
15574 typedef enum {
15575 NEON_ARG_COPY_TO_REG,
15576 NEON_ARG_CONSTANT,
15577 NEON_ARG_STOP
15578 } builtin_arg;
15580 #define NEON_MAX_BUILTIN_ARGS 5
15582 /* Expand a Neon builtin. */
15583 static rtx
15584 arm_expand_neon_args (rtx target, int icode, int have_retval,
15585 tree exp, ...)
15587 va_list ap;
15588 rtx pat;
15589 tree arg[NEON_MAX_BUILTIN_ARGS];
15590 rtx op[NEON_MAX_BUILTIN_ARGS];
15591 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15592 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
15593 int argc = 0;
15595 if (have_retval
15596 && (!target
15597 || GET_MODE (target) != tmode
15598 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
15599 target = gen_reg_rtx (tmode);
15601 va_start (ap, exp);
15603 for (;;)
15605 builtin_arg thisarg = va_arg (ap, int);
15607 if (thisarg == NEON_ARG_STOP)
15608 break;
15609 else
15611 arg[argc] = CALL_EXPR_ARG (exp, argc);
15612 op[argc] = expand_normal (arg[argc]);
15613 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
15615 switch (thisarg)
15617 case NEON_ARG_COPY_TO_REG:
15618 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15619 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15620 (op[argc], mode[argc]))
15621 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
15622 break;
15624 case NEON_ARG_CONSTANT:
15625 /* FIXME: This error message is somewhat unhelpful. */
15626 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15627 (op[argc], mode[argc]))
15628 error ("argument must be a constant");
15629 break;
15631 case NEON_ARG_STOP:
15632 gcc_unreachable ();
15635 argc++;
15639 va_end (ap);
15641 if (have_retval)
15642 switch (argc)
15644 case 1:
15645 pat = GEN_FCN (icode) (target, op[0]);
15646 break;
15648 case 2:
15649 pat = GEN_FCN (icode) (target, op[0], op[1]);
15650 break;
15652 case 3:
15653 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
15654 break;
15656 case 4:
15657 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
15658 break;
15660 case 5:
15661 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
15662 break;
15664 default:
15665 gcc_unreachable ();
15667 else
15668 switch (argc)
15670 case 1:
15671 pat = GEN_FCN (icode) (op[0]);
15672 break;
15674 case 2:
15675 pat = GEN_FCN (icode) (op[0], op[1]);
15676 break;
15678 case 3:
15679 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15680 break;
15682 case 4:
15683 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15684 break;
15686 case 5:
15687 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
15688 break;
15690 default:
15691 gcc_unreachable ();
15694 if (!pat)
15695 return 0;
15697 emit_insn (pat);
15699 return target;
15702 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15703 constants defined per-instruction or per instruction-variant. Instead, the
15704 required info is looked up in the table neon_builtin_data. */
15705 static rtx
15706 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
15708 neon_itype itype;
15709 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
15711 switch (itype)
15713 case NEON_UNOP:
15714 case NEON_CONVERT:
15715 case NEON_DUPLANE:
15716 return arm_expand_neon_args (target, icode, 1, exp,
15717 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15719 case NEON_BINOP:
15720 case NEON_SETLANE:
15721 case NEON_SCALARMUL:
15722 case NEON_SCALARMULL:
15723 case NEON_SCALARMULH:
15724 case NEON_SHIFTINSERT:
15725 case NEON_LOGICBINOP:
15726 return arm_expand_neon_args (target, icode, 1, exp,
15727 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15728 NEON_ARG_STOP);
15730 case NEON_TERNOP:
15731 return arm_expand_neon_args (target, icode, 1, exp,
15732 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15733 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15735 case NEON_GETLANE:
15736 case NEON_FIXCONV:
15737 case NEON_SHIFTIMM:
15738 return arm_expand_neon_args (target, icode, 1, exp,
15739 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
15740 NEON_ARG_STOP);
15742 case NEON_CREATE:
15743 return arm_expand_neon_args (target, icode, 1, exp,
15744 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15746 case NEON_DUP:
15747 case NEON_SPLIT:
15748 case NEON_REINTERP:
15749 return arm_expand_neon_args (target, icode, 1, exp,
15750 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15752 case NEON_COMBINE:
15753 case NEON_VTBL:
15754 return arm_expand_neon_args (target, icode, 1, exp,
15755 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15757 case NEON_RESULTPAIR:
15758 return arm_expand_neon_args (target, icode, 0, exp,
15759 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15760 NEON_ARG_STOP);
15762 case NEON_LANEMUL:
15763 case NEON_LANEMULL:
15764 case NEON_LANEMULH:
15765 return arm_expand_neon_args (target, icode, 1, exp,
15766 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15767 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15769 case NEON_LANEMAC:
15770 return arm_expand_neon_args (target, icode, 1, exp,
15771 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15772 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15774 case NEON_SHIFTACC:
15775 return arm_expand_neon_args (target, icode, 1, exp,
15776 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15777 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15779 case NEON_SCALARMAC:
15780 return arm_expand_neon_args (target, icode, 1, exp,
15781 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15782 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15784 case NEON_SELECT:
15785 case NEON_VTBX:
15786 return arm_expand_neon_args (target, icode, 1, exp,
15787 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15788 NEON_ARG_STOP);
15790 case NEON_LOAD1:
15791 case NEON_LOADSTRUCT:
15792 return arm_expand_neon_args (target, icode, 1, exp,
15793 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15795 case NEON_LOAD1LANE:
15796 case NEON_LOADSTRUCTLANE:
15797 return arm_expand_neon_args (target, icode, 1, exp,
15798 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15799 NEON_ARG_STOP);
15801 case NEON_STORE1:
15802 case NEON_STORESTRUCT:
15803 return arm_expand_neon_args (target, icode, 0, exp,
15804 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15806 case NEON_STORE1LANE:
15807 case NEON_STORESTRUCTLANE:
15808 return arm_expand_neon_args (target, icode, 0, exp,
15809 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15810 NEON_ARG_STOP);
15813 gcc_unreachable ();
15816 /* Emit code to reinterpret one Neon type as another, without altering bits. */
15817 void
15818 neon_reinterpret (rtx dest, rtx src)
15820 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
15823 /* Emit code to place a Neon pair result in memory locations (with equal
15824 registers). */
15825 void
15826 neon_emit_pair_result_insn (enum machine_mode mode,
15827 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
15828 rtx op1, rtx op2)
15830 rtx mem = gen_rtx_MEM (mode, destaddr);
15831 rtx tmp1 = gen_reg_rtx (mode);
15832 rtx tmp2 = gen_reg_rtx (mode);
15834 emit_insn (intfn (tmp1, op1, tmp2, op2));
15836 emit_move_insn (mem, tmp1);
15837 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
15838 emit_move_insn (mem, tmp2);
15841 /* Set up operands for a register copy from src to dest, taking care not to
15842 clobber registers in the process.
15843 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
15844 be called with a large N, so that should be OK. */
15846 void
15847 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
15849 unsigned int copied = 0, opctr = 0;
15850 unsigned int done = (1 << count) - 1;
15851 unsigned int i, j;
15853 while (copied != done)
15855 for (i = 0; i < count; i++)
15857 int good = 1;
15859 for (j = 0; good && j < count; j++)
15860 if (i != j && (copied & (1 << j)) == 0
15861 && reg_overlap_mentioned_p (src[j], dest[i]))
15862 good = 0;
15864 if (good)
15866 operands[opctr++] = dest[i];
15867 operands[opctr++] = src[i];
15868 copied |= 1 << i;
15873 gcc_assert (opctr == count * 2);
15876 /* Expand an expression EXP that calls a built-in function,
15877 with result going to TARGET if that's convenient
15878 (and in mode MODE if that's convenient).
15879 SUBTARGET may be used as the target for computing one of EXP's operands.
15880 IGNORE is nonzero if the value is to be ignored. */
15882 static rtx
15883 arm_expand_builtin (tree exp,
15884 rtx target,
15885 rtx subtarget ATTRIBUTE_UNUSED,
15886 enum machine_mode mode ATTRIBUTE_UNUSED,
15887 int ignore ATTRIBUTE_UNUSED)
15889 const struct builtin_description * d;
15890 enum insn_code icode;
15891 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15892 tree arg0;
15893 tree arg1;
15894 tree arg2;
15895 rtx op0;
15896 rtx op1;
15897 rtx op2;
15898 rtx pat;
15899 int fcode = DECL_FUNCTION_CODE (fndecl);
15900 size_t i;
15901 enum machine_mode tmode;
15902 enum machine_mode mode0;
15903 enum machine_mode mode1;
15904 enum machine_mode mode2;
15906 if (fcode >= ARM_BUILTIN_NEON_BASE)
15907 return arm_expand_neon_builtin (fcode, exp, target);
15909 switch (fcode)
15911 case ARM_BUILTIN_TEXTRMSB:
15912 case ARM_BUILTIN_TEXTRMUB:
15913 case ARM_BUILTIN_TEXTRMSH:
15914 case ARM_BUILTIN_TEXTRMUH:
15915 case ARM_BUILTIN_TEXTRMSW:
15916 case ARM_BUILTIN_TEXTRMUW:
15917 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
15918 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
15919 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
15920 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
15921 : CODE_FOR_iwmmxt_textrmw);
15923 arg0 = CALL_EXPR_ARG (exp, 0);
15924 arg1 = CALL_EXPR_ARG (exp, 1);
15925 op0 = expand_normal (arg0);
15926 op1 = expand_normal (arg1);
15927 tmode = insn_data[icode].operand[0].mode;
15928 mode0 = insn_data[icode].operand[1].mode;
15929 mode1 = insn_data[icode].operand[2].mode;
15931 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15932 op0 = copy_to_mode_reg (mode0, op0);
15933 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15935 /* @@@ better error message */
15936 error ("selector must be an immediate");
15937 return gen_reg_rtx (tmode);
15939 if (target == 0
15940 || GET_MODE (target) != tmode
15941 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15942 target = gen_reg_rtx (tmode);
15943 pat = GEN_FCN (icode) (target, op0, op1);
15944 if (! pat)
15945 return 0;
15946 emit_insn (pat);
15947 return target;
15949 case ARM_BUILTIN_TINSRB:
15950 case ARM_BUILTIN_TINSRH:
15951 case ARM_BUILTIN_TINSRW:
15952 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
15953 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
15954 : CODE_FOR_iwmmxt_tinsrw);
15955 arg0 = CALL_EXPR_ARG (exp, 0);
15956 arg1 = CALL_EXPR_ARG (exp, 1);
15957 arg2 = CALL_EXPR_ARG (exp, 2);
15958 op0 = expand_normal (arg0);
15959 op1 = expand_normal (arg1);
15960 op2 = expand_normal (arg2);
15961 tmode = insn_data[icode].operand[0].mode;
15962 mode0 = insn_data[icode].operand[1].mode;
15963 mode1 = insn_data[icode].operand[2].mode;
15964 mode2 = insn_data[icode].operand[3].mode;
15966 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15967 op0 = copy_to_mode_reg (mode0, op0);
15968 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15969 op1 = copy_to_mode_reg (mode1, op1);
15970 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15972 /* @@@ better error message */
15973 error ("selector must be an immediate");
15974 return const0_rtx;
15976 if (target == 0
15977 || GET_MODE (target) != tmode
15978 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15979 target = gen_reg_rtx (tmode);
15980 pat = GEN_FCN (icode) (target, op0, op1, op2);
15981 if (! pat)
15982 return 0;
15983 emit_insn (pat);
15984 return target;
15986 case ARM_BUILTIN_SETWCX:
15987 arg0 = CALL_EXPR_ARG (exp, 0);
15988 arg1 = CALL_EXPR_ARG (exp, 1);
15989 op0 = force_reg (SImode, expand_normal (arg0));
15990 op1 = expand_normal (arg1);
15991 emit_insn (gen_iwmmxt_tmcr (op1, op0));
15992 return 0;
15994 case ARM_BUILTIN_GETWCX:
15995 arg0 = CALL_EXPR_ARG (exp, 0);
15996 op0 = expand_normal (arg0);
15997 target = gen_reg_rtx (SImode);
15998 emit_insn (gen_iwmmxt_tmrc (target, op0));
15999 return target;
16001 case ARM_BUILTIN_WSHUFH:
16002 icode = CODE_FOR_iwmmxt_wshufh;
16003 arg0 = CALL_EXPR_ARG (exp, 0);
16004 arg1 = CALL_EXPR_ARG (exp, 1);
16005 op0 = expand_normal (arg0);
16006 op1 = expand_normal (arg1);
16007 tmode = insn_data[icode].operand[0].mode;
16008 mode1 = insn_data[icode].operand[1].mode;
16009 mode2 = insn_data[icode].operand[2].mode;
16011 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16012 op0 = copy_to_mode_reg (mode1, op0);
16013 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16015 /* @@@ better error message */
16016 error ("mask must be an immediate");
16017 return const0_rtx;
16019 if (target == 0
16020 || GET_MODE (target) != tmode
16021 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16022 target = gen_reg_rtx (tmode);
16023 pat = GEN_FCN (icode) (target, op0, op1);
16024 if (! pat)
16025 return 0;
16026 emit_insn (pat);
16027 return target;
16029 case ARM_BUILTIN_WSADB:
16030 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
16031 case ARM_BUILTIN_WSADH:
16032 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
16033 case ARM_BUILTIN_WSADBZ:
16034 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
16035 case ARM_BUILTIN_WSADHZ:
16036 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
16038 /* Several three-argument builtins. */
16039 case ARM_BUILTIN_WMACS:
16040 case ARM_BUILTIN_WMACU:
16041 case ARM_BUILTIN_WALIGN:
16042 case ARM_BUILTIN_TMIA:
16043 case ARM_BUILTIN_TMIAPH:
16044 case ARM_BUILTIN_TMIATT:
16045 case ARM_BUILTIN_TMIATB:
16046 case ARM_BUILTIN_TMIABT:
16047 case ARM_BUILTIN_TMIABB:
16048 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
16049 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
16050 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
16051 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
16052 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
16053 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
16054 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
16055 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
16056 : CODE_FOR_iwmmxt_walign);
16057 arg0 = CALL_EXPR_ARG (exp, 0);
16058 arg1 = CALL_EXPR_ARG (exp, 1);
16059 arg2 = CALL_EXPR_ARG (exp, 2);
16060 op0 = expand_normal (arg0);
16061 op1 = expand_normal (arg1);
16062 op2 = expand_normal (arg2);
16063 tmode = insn_data[icode].operand[0].mode;
16064 mode0 = insn_data[icode].operand[1].mode;
16065 mode1 = insn_data[icode].operand[2].mode;
16066 mode2 = insn_data[icode].operand[3].mode;
16068 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16069 op0 = copy_to_mode_reg (mode0, op0);
16070 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16071 op1 = copy_to_mode_reg (mode1, op1);
16072 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16073 op2 = copy_to_mode_reg (mode2, op2);
16074 if (target == 0
16075 || GET_MODE (target) != tmode
16076 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16077 target = gen_reg_rtx (tmode);
16078 pat = GEN_FCN (icode) (target, op0, op1, op2);
16079 if (! pat)
16080 return 0;
16081 emit_insn (pat);
16082 return target;
16084 case ARM_BUILTIN_WZERO:
16085 target = gen_reg_rtx (DImode);
16086 emit_insn (gen_iwmmxt_clrdi (target));
16087 return target;
16089 case ARM_BUILTIN_THREAD_POINTER:
16090 return arm_load_tp (target);
16092 default:
16093 break;
16096 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16097 if (d->code == (const enum arm_builtins) fcode)
16098 return arm_expand_binop_builtin (d->icode, exp, target);
16100 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16101 if (d->code == (const enum arm_builtins) fcode)
16102 return arm_expand_unop_builtin (d->icode, exp, target, 0);
16104 /* @@@ Should really do something sensible here. */
16105 return NULL_RTX;
16108 /* Return the number (counting from 0) of
16109 the least significant set bit in MASK. */
16111 inline static int
16112 number_of_first_bit_set (unsigned mask)
16114 int bit;
16116 for (bit = 0;
16117 (mask & (1 << bit)) == 0;
16118 ++bit)
16119 continue;
16121 return bit;
16124 /* Emit code to push or pop registers to or from the stack. F is the
16125 assembly file. MASK is the registers to push or pop. PUSH is
16126 nonzero if we should push, and zero if we should pop. For debugging
16127 output, if pushing, adjust CFA_OFFSET by the amount of space added
16128 to the stack. REAL_REGS should have the same number of bits set as
16129 MASK, and will be used instead (in the same order) to describe which
16130 registers were saved - this is used to mark the save slots when we
16131 push high registers after moving them to low registers. */
16132 static void
16133 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
16134 unsigned long real_regs)
16136 int regno;
16137 int lo_mask = mask & 0xFF;
16138 int pushed_words = 0;
16140 gcc_assert (mask);
16142 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
16144 /* Special case. Do not generate a POP PC statement here, do it in
16145 thumb_exit() */
16146 thumb_exit (f, -1);
16147 return;
16150 if (ARM_EABI_UNWIND_TABLES && push)
16152 fprintf (f, "\t.save\t{");
16153 for (regno = 0; regno < 15; regno++)
16155 if (real_regs & (1 << regno))
16157 if (real_regs & ((1 << regno) -1))
16158 fprintf (f, ", ");
16159 asm_fprintf (f, "%r", regno);
16162 fprintf (f, "}\n");
16165 fprintf (f, "\t%s\t{", push ? "push" : "pop");
16167 /* Look at the low registers first. */
16168 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16170 if (lo_mask & 1)
16172 asm_fprintf (f, "%r", regno);
16174 if ((lo_mask & ~1) != 0)
16175 fprintf (f, ", ");
16177 pushed_words++;
16181 if (push && (mask & (1 << LR_REGNUM)))
16183 /* Catch pushing the LR. */
16184 if (mask & 0xFF)
16185 fprintf (f, ", ");
16187 asm_fprintf (f, "%r", LR_REGNUM);
16189 pushed_words++;
16191 else if (!push && (mask & (1 << PC_REGNUM)))
16193 /* Catch popping the PC. */
16194 if (TARGET_INTERWORK || TARGET_BACKTRACE
16195 || crtl->calls_eh_return)
16197 /* The PC is never poped directly, instead
16198 it is popped into r3 and then BX is used. */
16199 fprintf (f, "}\n");
16201 thumb_exit (f, -1);
16203 return;
16205 else
16207 if (mask & 0xFF)
16208 fprintf (f, ", ");
16210 asm_fprintf (f, "%r", PC_REGNUM);
16214 fprintf (f, "}\n");
16216 if (push && pushed_words && dwarf2out_do_frame ())
16218 char *l = dwarf2out_cfi_label ();
16219 int pushed_mask = real_regs;
16221 *cfa_offset += pushed_words * 4;
16222 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16224 pushed_words = 0;
16225 pushed_mask = real_regs;
16226 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16228 if (pushed_mask & 1)
16229 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16234 /* Generate code to return from a thumb function.
16235 If 'reg_containing_return_addr' is -1, then the return address is
16236 actually on the stack, at the stack pointer. */
16237 static void
16238 thumb_exit (FILE *f, int reg_containing_return_addr)
16240 unsigned regs_available_for_popping;
16241 unsigned regs_to_pop;
16242 int pops_needed;
16243 unsigned available;
16244 unsigned required;
16245 int mode;
16246 int size;
16247 int restore_a4 = FALSE;
16249 /* Compute the registers we need to pop. */
16250 regs_to_pop = 0;
16251 pops_needed = 0;
16253 if (reg_containing_return_addr == -1)
16255 regs_to_pop |= 1 << LR_REGNUM;
16256 ++pops_needed;
16259 if (TARGET_BACKTRACE)
16261 /* Restore the (ARM) frame pointer and stack pointer. */
16262 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16263 pops_needed += 2;
16266 /* If there is nothing to pop then just emit the BX instruction and
16267 return. */
16268 if (pops_needed == 0)
16270 if (crtl->calls_eh_return)
16271 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16273 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16274 return;
16276 /* Otherwise if we are not supporting interworking and we have not created
16277 a backtrace structure and the function was not entered in ARM mode then
16278 just pop the return address straight into the PC. */
16279 else if (!TARGET_INTERWORK
16280 && !TARGET_BACKTRACE
16281 && !is_called_in_ARM_mode (current_function_decl)
16282 && !crtl->calls_eh_return)
16284 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16285 return;
16288 /* Find out how many of the (return) argument registers we can corrupt. */
16289 regs_available_for_popping = 0;
16291 /* If returning via __builtin_eh_return, the bottom three registers
16292 all contain information needed for the return. */
16293 if (crtl->calls_eh_return)
16294 size = 12;
16295 else
16297 /* If we can deduce the registers used from the function's
16298 return value. This is more reliable that examining
16299 df_regs_ever_live_p () because that will be set if the register is
16300 ever used in the function, not just if the register is used
16301 to hold a return value. */
16303 if (crtl->return_rtx != 0)
16304 mode = GET_MODE (crtl->return_rtx);
16305 else
16306 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16308 size = GET_MODE_SIZE (mode);
16310 if (size == 0)
16312 /* In a void function we can use any argument register.
16313 In a function that returns a structure on the stack
16314 we can use the second and third argument registers. */
16315 if (mode == VOIDmode)
16316 regs_available_for_popping =
16317 (1 << ARG_REGISTER (1))
16318 | (1 << ARG_REGISTER (2))
16319 | (1 << ARG_REGISTER (3));
16320 else
16321 regs_available_for_popping =
16322 (1 << ARG_REGISTER (2))
16323 | (1 << ARG_REGISTER (3));
16325 else if (size <= 4)
16326 regs_available_for_popping =
16327 (1 << ARG_REGISTER (2))
16328 | (1 << ARG_REGISTER (3));
16329 else if (size <= 8)
16330 regs_available_for_popping =
16331 (1 << ARG_REGISTER (3));
16334 /* Match registers to be popped with registers into which we pop them. */
16335 for (available = regs_available_for_popping,
16336 required = regs_to_pop;
16337 required != 0 && available != 0;
16338 available &= ~(available & - available),
16339 required &= ~(required & - required))
16340 -- pops_needed;
16342 /* If we have any popping registers left over, remove them. */
16343 if (available > 0)
16344 regs_available_for_popping &= ~available;
16346 /* Otherwise if we need another popping register we can use
16347 the fourth argument register. */
16348 else if (pops_needed)
16350 /* If we have not found any free argument registers and
16351 reg a4 contains the return address, we must move it. */
16352 if (regs_available_for_popping == 0
16353 && reg_containing_return_addr == LAST_ARG_REGNUM)
16355 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16356 reg_containing_return_addr = LR_REGNUM;
16358 else if (size > 12)
16360 /* Register a4 is being used to hold part of the return value,
16361 but we have dire need of a free, low register. */
16362 restore_a4 = TRUE;
16364 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
16367 if (reg_containing_return_addr != LAST_ARG_REGNUM)
16369 /* The fourth argument register is available. */
16370 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
16372 --pops_needed;
16376 /* Pop as many registers as we can. */
16377 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16378 regs_available_for_popping);
16380 /* Process the registers we popped. */
16381 if (reg_containing_return_addr == -1)
16383 /* The return address was popped into the lowest numbered register. */
16384 regs_to_pop &= ~(1 << LR_REGNUM);
16386 reg_containing_return_addr =
16387 number_of_first_bit_set (regs_available_for_popping);
16389 /* Remove this register for the mask of available registers, so that
16390 the return address will not be corrupted by further pops. */
16391 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
16394 /* If we popped other registers then handle them here. */
16395 if (regs_available_for_popping)
16397 int frame_pointer;
16399 /* Work out which register currently contains the frame pointer. */
16400 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
16402 /* Move it into the correct place. */
16403 asm_fprintf (f, "\tmov\t%r, %r\n",
16404 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
16406 /* (Temporarily) remove it from the mask of popped registers. */
16407 regs_available_for_popping &= ~(1 << frame_pointer);
16408 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
16410 if (regs_available_for_popping)
16412 int stack_pointer;
16414 /* We popped the stack pointer as well,
16415 find the register that contains it. */
16416 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
16418 /* Move it into the stack register. */
16419 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
16421 /* At this point we have popped all necessary registers, so
16422 do not worry about restoring regs_available_for_popping
16423 to its correct value:
16425 assert (pops_needed == 0)
16426 assert (regs_available_for_popping == (1 << frame_pointer))
16427 assert (regs_to_pop == (1 << STACK_POINTER)) */
16429 else
16431 /* Since we have just move the popped value into the frame
16432 pointer, the popping register is available for reuse, and
16433 we know that we still have the stack pointer left to pop. */
16434 regs_available_for_popping |= (1 << frame_pointer);
16438 /* If we still have registers left on the stack, but we no longer have
16439 any registers into which we can pop them, then we must move the return
16440 address into the link register and make available the register that
16441 contained it. */
16442 if (regs_available_for_popping == 0 && pops_needed > 0)
16444 regs_available_for_popping |= 1 << reg_containing_return_addr;
16446 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
16447 reg_containing_return_addr);
16449 reg_containing_return_addr = LR_REGNUM;
16452 /* If we have registers left on the stack then pop some more.
16453 We know that at most we will want to pop FP and SP. */
16454 if (pops_needed > 0)
16456 int popped_into;
16457 int move_to;
16459 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16460 regs_available_for_popping);
16462 /* We have popped either FP or SP.
16463 Move whichever one it is into the correct register. */
16464 popped_into = number_of_first_bit_set (regs_available_for_popping);
16465 move_to = number_of_first_bit_set (regs_to_pop);
16467 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
16469 regs_to_pop &= ~(1 << move_to);
16471 --pops_needed;
16474 /* If we still have not popped everything then we must have only
16475 had one register available to us and we are now popping the SP. */
16476 if (pops_needed > 0)
16478 int popped_into;
16480 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16481 regs_available_for_popping);
16483 popped_into = number_of_first_bit_set (regs_available_for_popping);
16485 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
16487 assert (regs_to_pop == (1 << STACK_POINTER))
16488 assert (pops_needed == 1)
16492 /* If necessary restore the a4 register. */
16493 if (restore_a4)
16495 if (reg_containing_return_addr != LR_REGNUM)
16497 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16498 reg_containing_return_addr = LR_REGNUM;
16501 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
16504 if (crtl->calls_eh_return)
16505 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16507 /* Return to caller. */
16508 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16512 void
16513 thumb1_final_prescan_insn (rtx insn)
16515 if (flag_print_asm_name)
16516 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
16517 INSN_ADDRESSES (INSN_UID (insn)));
16521 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
16523 unsigned HOST_WIDE_INT mask = 0xff;
16524 int i;
16526 if (val == 0) /* XXX */
16527 return 0;
16529 for (i = 0; i < 25; i++)
16530 if ((val & (mask << i)) == val)
16531 return 1;
16533 return 0;
16536 /* Returns nonzero if the current function contains,
16537 or might contain a far jump. */
16538 static int
16539 thumb_far_jump_used_p (void)
16541 rtx insn;
16543 /* This test is only important for leaf functions. */
16544 /* assert (!leaf_function_p ()); */
16546 /* If we have already decided that far jumps may be used,
16547 do not bother checking again, and always return true even if
16548 it turns out that they are not being used. Once we have made
16549 the decision that far jumps are present (and that hence the link
16550 register will be pushed onto the stack) we cannot go back on it. */
16551 if (cfun->machine->far_jump_used)
16552 return 1;
16554 /* If this function is not being called from the prologue/epilogue
16555 generation code then it must be being called from the
16556 INITIAL_ELIMINATION_OFFSET macro. */
16557 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
16559 /* In this case we know that we are being asked about the elimination
16560 of the arg pointer register. If that register is not being used,
16561 then there are no arguments on the stack, and we do not have to
16562 worry that a far jump might force the prologue to push the link
16563 register, changing the stack offsets. In this case we can just
16564 return false, since the presence of far jumps in the function will
16565 not affect stack offsets.
16567 If the arg pointer is live (or if it was live, but has now been
16568 eliminated and so set to dead) then we do have to test to see if
16569 the function might contain a far jump. This test can lead to some
16570 false negatives, since before reload is completed, then length of
16571 branch instructions is not known, so gcc defaults to returning their
16572 longest length, which in turn sets the far jump attribute to true.
16574 A false negative will not result in bad code being generated, but it
16575 will result in a needless push and pop of the link register. We
16576 hope that this does not occur too often.
16578 If we need doubleword stack alignment this could affect the other
16579 elimination offsets so we can't risk getting it wrong. */
16580 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
16581 cfun->machine->arg_pointer_live = 1;
16582 else if (!cfun->machine->arg_pointer_live)
16583 return 0;
16586 /* Check to see if the function contains a branch
16587 insn with the far jump attribute set. */
16588 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16590 if (GET_CODE (insn) == JUMP_INSN
16591 /* Ignore tablejump patterns. */
16592 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16593 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
16594 && get_attr_far_jump (insn) == FAR_JUMP_YES
16597 /* Record the fact that we have decided that
16598 the function does use far jumps. */
16599 cfun->machine->far_jump_used = 1;
16600 return 1;
16604 return 0;
16607 /* Return nonzero if FUNC must be entered in ARM mode. */
16609 is_called_in_ARM_mode (tree func)
16611 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
16613 /* Ignore the problem about functions whose address is taken. */
16614 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
16615 return TRUE;
16617 #ifdef ARM_PE
16618 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
16619 #else
16620 return FALSE;
16621 #endif
16624 /* The bits which aren't usefully expanded as rtl. */
16625 const char *
16626 thumb_unexpanded_epilogue (void)
16628 arm_stack_offsets *offsets;
16629 int regno;
16630 unsigned long live_regs_mask = 0;
16631 int high_regs_pushed = 0;
16632 int had_to_push_lr;
16633 int size;
16635 if (return_used_this_function)
16636 return "";
16638 if (IS_NAKED (arm_current_func_type ()))
16639 return "";
16641 offsets = arm_get_frame_offsets ();
16642 live_regs_mask = offsets->saved_regs_mask;
16643 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16645 /* If we can deduce the registers used from the function's return value.
16646 This is more reliable that examining df_regs_ever_live_p () because that
16647 will be set if the register is ever used in the function, not just if
16648 the register is used to hold a return value. */
16649 size = arm_size_return_regs ();
16651 /* The prolog may have pushed some high registers to use as
16652 work registers. e.g. the testsuite file:
16653 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16654 compiles to produce:
16655 push {r4, r5, r6, r7, lr}
16656 mov r7, r9
16657 mov r6, r8
16658 push {r6, r7}
16659 as part of the prolog. We have to undo that pushing here. */
16661 if (high_regs_pushed)
16663 unsigned long mask = live_regs_mask & 0xff;
16664 int next_hi_reg;
16666 /* The available low registers depend on the size of the value we are
16667 returning. */
16668 if (size <= 12)
16669 mask |= 1 << 3;
16670 if (size <= 8)
16671 mask |= 1 << 2;
16673 if (mask == 0)
16674 /* Oh dear! We have no low registers into which we can pop
16675 high registers! */
16676 internal_error
16677 ("no low registers available for popping high registers");
16679 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
16680 if (live_regs_mask & (1 << next_hi_reg))
16681 break;
16683 while (high_regs_pushed)
16685 /* Find lo register(s) into which the high register(s) can
16686 be popped. */
16687 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16689 if (mask & (1 << regno))
16690 high_regs_pushed--;
16691 if (high_regs_pushed == 0)
16692 break;
16695 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
16697 /* Pop the values into the low register(s). */
16698 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
16700 /* Move the value(s) into the high registers. */
16701 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16703 if (mask & (1 << regno))
16705 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
16706 regno);
16708 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
16709 if (live_regs_mask & (1 << next_hi_reg))
16710 break;
16714 live_regs_mask &= ~0x0f00;
16717 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
16718 live_regs_mask &= 0xff;
16720 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
16722 /* Pop the return address into the PC. */
16723 if (had_to_push_lr)
16724 live_regs_mask |= 1 << PC_REGNUM;
16726 /* Either no argument registers were pushed or a backtrace
16727 structure was created which includes an adjusted stack
16728 pointer, so just pop everything. */
16729 if (live_regs_mask)
16730 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16731 live_regs_mask);
16733 /* We have either just popped the return address into the
16734 PC or it is was kept in LR for the entire function. */
16735 if (!had_to_push_lr)
16736 thumb_exit (asm_out_file, LR_REGNUM);
16738 else
16740 /* Pop everything but the return address. */
16741 if (live_regs_mask)
16742 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16743 live_regs_mask);
16745 if (had_to_push_lr)
16747 if (size > 12)
16749 /* We have no free low regs, so save one. */
16750 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
16751 LAST_ARG_REGNUM);
16754 /* Get the return address into a temporary register. */
16755 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
16756 1 << LAST_ARG_REGNUM);
16758 if (size > 12)
16760 /* Move the return address to lr. */
16761 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
16762 LAST_ARG_REGNUM);
16763 /* Restore the low register. */
16764 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
16765 IP_REGNUM);
16766 regno = LR_REGNUM;
16768 else
16769 regno = LAST_ARG_REGNUM;
16771 else
16772 regno = LR_REGNUM;
16774 /* Remove the argument registers that were pushed onto the stack. */
16775 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
16776 SP_REGNUM, SP_REGNUM,
16777 crtl->args.pretend_args_size);
16779 thumb_exit (asm_out_file, regno);
16782 return "";
16785 /* Functions to save and restore machine-specific function data. */
16786 static struct machine_function *
16787 arm_init_machine_status (void)
16789 struct machine_function *machine;
16790 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
16792 #if ARM_FT_UNKNOWN != 0
16793 machine->func_type = ARM_FT_UNKNOWN;
16794 #endif
16795 return machine;
16798 /* Return an RTX indicating where the return address to the
16799 calling function can be found. */
16801 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
16803 if (count != 0)
16804 return NULL_RTX;
16806 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
16809 /* Do anything needed before RTL is emitted for each function. */
16810 void
16811 arm_init_expanders (void)
16813 /* Arrange to initialize and mark the machine per-function status. */
16814 init_machine_status = arm_init_machine_status;
16816 /* This is to stop the combine pass optimizing away the alignment
16817 adjustment of va_arg. */
16818 /* ??? It is claimed that this should not be necessary. */
16819 if (cfun)
16820 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
16824 /* Like arm_compute_initial_elimination offset. Simpler because there
16825 isn't an ABI specified frame pointer for Thumb. Instead, we set it
16826 to point at the base of the local variables after static stack
16827 space for a function has been allocated. */
16829 HOST_WIDE_INT
16830 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16832 arm_stack_offsets *offsets;
16834 offsets = arm_get_frame_offsets ();
16836 switch (from)
16838 case ARG_POINTER_REGNUM:
16839 switch (to)
16841 case STACK_POINTER_REGNUM:
16842 return offsets->outgoing_args - offsets->saved_args;
16844 case FRAME_POINTER_REGNUM:
16845 return offsets->soft_frame - offsets->saved_args;
16847 case ARM_HARD_FRAME_POINTER_REGNUM:
16848 return offsets->saved_regs - offsets->saved_args;
16850 case THUMB_HARD_FRAME_POINTER_REGNUM:
16851 return offsets->locals_base - offsets->saved_args;
16853 default:
16854 gcc_unreachable ();
16856 break;
16858 case FRAME_POINTER_REGNUM:
16859 switch (to)
16861 case STACK_POINTER_REGNUM:
16862 return offsets->outgoing_args - offsets->soft_frame;
16864 case ARM_HARD_FRAME_POINTER_REGNUM:
16865 return offsets->saved_regs - offsets->soft_frame;
16867 case THUMB_HARD_FRAME_POINTER_REGNUM:
16868 return offsets->locals_base - offsets->soft_frame;
16870 default:
16871 gcc_unreachable ();
16873 break;
16875 default:
16876 gcc_unreachable ();
16880 /* Generate the rest of a function's prologue. */
16881 void
16882 thumb1_expand_prologue (void)
16884 rtx insn, dwarf;
16886 HOST_WIDE_INT amount;
16887 arm_stack_offsets *offsets;
16888 unsigned long func_type;
16889 int regno;
16890 unsigned long live_regs_mask;
16892 func_type = arm_current_func_type ();
16894 /* Naked functions don't have prologues. */
16895 if (IS_NAKED (func_type))
16896 return;
16898 if (IS_INTERRUPT (func_type))
16900 error ("interrupt Service Routines cannot be coded in Thumb mode");
16901 return;
16904 offsets = arm_get_frame_offsets ();
16905 live_regs_mask = offsets->saved_regs_mask;
16906 /* Load the pic register before setting the frame pointer,
16907 so we can use r7 as a temporary work register. */
16908 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16909 arm_load_pic_register (live_regs_mask);
16911 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16912 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
16913 stack_pointer_rtx);
16915 amount = offsets->outgoing_args - offsets->saved_regs;
16916 if (amount)
16918 if (amount < 512)
16920 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16921 GEN_INT (- amount)));
16922 RTX_FRAME_RELATED_P (insn) = 1;
16924 else
16926 rtx reg;
16928 /* The stack decrement is too big for an immediate value in a single
16929 insn. In theory we could issue multiple subtracts, but after
16930 three of them it becomes more space efficient to place the full
16931 value in the constant pool and load into a register. (Also the
16932 ARM debugger really likes to see only one stack decrement per
16933 function). So instead we look for a scratch register into which
16934 we can load the decrement, and then we subtract this from the
16935 stack pointer. Unfortunately on the thumb the only available
16936 scratch registers are the argument registers, and we cannot use
16937 these as they may hold arguments to the function. Instead we
16938 attempt to locate a call preserved register which is used by this
16939 function. If we can find one, then we know that it will have
16940 been pushed at the start of the prologue and so we can corrupt
16941 it now. */
16942 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
16943 if (live_regs_mask & (1 << regno)
16944 && !(frame_pointer_needed
16945 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
16946 break;
16948 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
16950 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
16952 /* Choose an arbitrary, non-argument low register. */
16953 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
16955 /* Save it by copying it into a high, scratch register. */
16956 emit_insn (gen_movsi (spare, reg));
16957 /* Add a USE to stop propagate_one_insn() from barfing. */
16958 emit_insn (gen_prologue_use (spare));
16960 /* Decrement the stack. */
16961 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16962 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16963 stack_pointer_rtx, reg));
16964 RTX_FRAME_RELATED_P (insn) = 1;
16965 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16966 plus_constant (stack_pointer_rtx,
16967 -amount));
16968 RTX_FRAME_RELATED_P (dwarf) = 1;
16969 REG_NOTES (insn)
16970 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16971 REG_NOTES (insn));
16973 /* Restore the low register's original value. */
16974 emit_insn (gen_movsi (reg, spare));
16976 /* Emit a USE of the restored scratch register, so that flow
16977 analysis will not consider the restore redundant. The
16978 register won't be used again in this function and isn't
16979 restored by the epilogue. */
16980 emit_insn (gen_prologue_use (reg));
16982 else
16984 reg = gen_rtx_REG (SImode, regno);
16986 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16988 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16989 stack_pointer_rtx, reg));
16990 RTX_FRAME_RELATED_P (insn) = 1;
16991 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16992 plus_constant (stack_pointer_rtx,
16993 -amount));
16994 RTX_FRAME_RELATED_P (dwarf) = 1;
16995 REG_NOTES (insn)
16996 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16997 REG_NOTES (insn));
17002 if (frame_pointer_needed)
17003 thumb_set_frame_pointer (offsets);
17005 /* If we are profiling, make sure no instructions are scheduled before
17006 the call to mcount. Similarly if the user has requested no
17007 scheduling in the prolog. Similarly if we want non-call exceptions
17008 using the EABI unwinder, to prevent faulting instructions from being
17009 swapped with a stack adjustment. */
17010 if (crtl->profile || !TARGET_SCHED_PROLOG
17011 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
17012 emit_insn (gen_blockage ());
17014 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
17015 if (live_regs_mask & 0xff)
17016 cfun->machine->lr_save_eliminated = 0;
17020 void
17021 thumb1_expand_epilogue (void)
17023 HOST_WIDE_INT amount;
17024 arm_stack_offsets *offsets;
17025 int regno;
17027 /* Naked functions don't have prologues. */
17028 if (IS_NAKED (arm_current_func_type ()))
17029 return;
17031 offsets = arm_get_frame_offsets ();
17032 amount = offsets->outgoing_args - offsets->saved_regs;
17034 if (frame_pointer_needed)
17036 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
17037 amount = offsets->locals_base - offsets->saved_regs;
17040 gcc_assert (amount >= 0);
17041 if (amount)
17043 if (amount < 512)
17044 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17045 GEN_INT (amount)));
17046 else
17048 /* r3 is always free in the epilogue. */
17049 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
17051 emit_insn (gen_movsi (reg, GEN_INT (amount)));
17052 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
17056 /* Emit a USE (stack_pointer_rtx), so that
17057 the stack adjustment will not be deleted. */
17058 emit_insn (gen_prologue_use (stack_pointer_rtx));
17060 if (crtl->profile || !TARGET_SCHED_PROLOG)
17061 emit_insn (gen_blockage ());
17063 /* Emit a clobber for each insn that will be restored in the epilogue,
17064 so that flow2 will get register lifetimes correct. */
17065 for (regno = 0; regno < 13; regno++)
17066 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
17067 emit_clobber (gen_rtx_REG (SImode, regno));
17069 if (! df_regs_ever_live_p (LR_REGNUM))
17070 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
17073 static void
17074 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
17076 arm_stack_offsets *offsets;
17077 unsigned long live_regs_mask = 0;
17078 unsigned long l_mask;
17079 unsigned high_regs_pushed = 0;
17080 int cfa_offset = 0;
17081 int regno;
17083 if (IS_NAKED (arm_current_func_type ()))
17084 return;
17086 if (is_called_in_ARM_mode (current_function_decl))
17088 const char * name;
17090 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
17091 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
17092 == SYMBOL_REF);
17093 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
17095 /* Generate code sequence to switch us into Thumb mode. */
17096 /* The .code 32 directive has already been emitted by
17097 ASM_DECLARE_FUNCTION_NAME. */
17098 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
17099 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
17101 /* Generate a label, so that the debugger will notice the
17102 change in instruction sets. This label is also used by
17103 the assembler to bypass the ARM code when this function
17104 is called from a Thumb encoded function elsewhere in the
17105 same file. Hence the definition of STUB_NAME here must
17106 agree with the definition in gas/config/tc-arm.c. */
17108 #define STUB_NAME ".real_start_of"
17110 fprintf (f, "\t.code\t16\n");
17111 #ifdef ARM_PE
17112 if (arm_dllexport_name_p (name))
17113 name = arm_strip_name_encoding (name);
17114 #endif
17115 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
17116 fprintf (f, "\t.thumb_func\n");
17117 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
17120 if (crtl->args.pretend_args_size)
17122 /* Output unwind directive for the stack adjustment. */
17123 if (ARM_EABI_UNWIND_TABLES)
17124 fprintf (f, "\t.pad #%d\n",
17125 crtl->args.pretend_args_size);
17127 if (cfun->machine->uses_anonymous_args)
17129 int num_pushes;
17131 fprintf (f, "\tpush\t{");
17133 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
17135 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
17136 regno <= LAST_ARG_REGNUM;
17137 regno++)
17138 asm_fprintf (f, "%r%s", regno,
17139 regno == LAST_ARG_REGNUM ? "" : ", ");
17141 fprintf (f, "}\n");
17143 else
17144 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
17145 SP_REGNUM, SP_REGNUM,
17146 crtl->args.pretend_args_size);
17148 /* We don't need to record the stores for unwinding (would it
17149 help the debugger any if we did?), but record the change in
17150 the stack pointer. */
17151 if (dwarf2out_do_frame ())
17153 char *l = dwarf2out_cfi_label ();
17155 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
17156 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17160 /* Get the registers we are going to push. */
17161 offsets = arm_get_frame_offsets ();
17162 live_regs_mask = offsets->saved_regs_mask;
17163 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17164 l_mask = live_regs_mask & 0x40ff;
17165 /* Then count how many other high registers will need to be pushed. */
17166 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17168 if (TARGET_BACKTRACE)
17170 unsigned offset;
17171 unsigned work_register;
17173 /* We have been asked to create a stack backtrace structure.
17174 The code looks like this:
17176 0 .align 2
17177 0 func:
17178 0 sub SP, #16 Reserve space for 4 registers.
17179 2 push {R7} Push low registers.
17180 4 add R7, SP, #20 Get the stack pointer before the push.
17181 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17182 8 mov R7, PC Get hold of the start of this code plus 12.
17183 10 str R7, [SP, #16] Store it.
17184 12 mov R7, FP Get hold of the current frame pointer.
17185 14 str R7, [SP, #4] Store it.
17186 16 mov R7, LR Get hold of the current return address.
17187 18 str R7, [SP, #12] Store it.
17188 20 add R7, SP, #16 Point at the start of the backtrace structure.
17189 22 mov FP, R7 Put this value into the frame pointer. */
17191 work_register = thumb_find_work_register (live_regs_mask);
17193 if (ARM_EABI_UNWIND_TABLES)
17194 asm_fprintf (f, "\t.pad #16\n");
17196 asm_fprintf
17197 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17198 SP_REGNUM, SP_REGNUM);
17200 if (dwarf2out_do_frame ())
17202 char *l = dwarf2out_cfi_label ();
17204 cfa_offset = cfa_offset + 16;
17205 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17208 if (l_mask)
17210 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17211 offset = bit_count (l_mask) * UNITS_PER_WORD;
17213 else
17214 offset = 0;
17216 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17217 offset + 16 + crtl->args.pretend_args_size);
17219 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17220 offset + 4);
17222 /* Make sure that the instruction fetching the PC is in the right place
17223 to calculate "start of backtrace creation code + 12". */
17224 if (l_mask)
17226 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17227 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17228 offset + 12);
17229 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17230 ARM_HARD_FRAME_POINTER_REGNUM);
17231 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17232 offset);
17234 else
17236 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17237 ARM_HARD_FRAME_POINTER_REGNUM);
17238 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17239 offset);
17240 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17241 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17242 offset + 12);
17245 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17246 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17247 offset + 8);
17248 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17249 offset + 12);
17250 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17251 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17253 /* Optimization: If we are not pushing any low registers but we are going
17254 to push some high registers then delay our first push. This will just
17255 be a push of LR and we can combine it with the push of the first high
17256 register. */
17257 else if ((l_mask & 0xff) != 0
17258 || (high_regs_pushed == 0 && l_mask))
17259 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17261 if (high_regs_pushed)
17263 unsigned pushable_regs;
17264 unsigned next_hi_reg;
17266 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17267 if (live_regs_mask & (1 << next_hi_reg))
17268 break;
17270 pushable_regs = l_mask & 0xff;
17272 if (pushable_regs == 0)
17273 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17275 while (high_regs_pushed > 0)
17277 unsigned long real_regs_mask = 0;
17279 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17281 if (pushable_regs & (1 << regno))
17283 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17285 high_regs_pushed --;
17286 real_regs_mask |= (1 << next_hi_reg);
17288 if (high_regs_pushed)
17290 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17291 next_hi_reg --)
17292 if (live_regs_mask & (1 << next_hi_reg))
17293 break;
17295 else
17297 pushable_regs &= ~((1 << regno) - 1);
17298 break;
17303 /* If we had to find a work register and we have not yet
17304 saved the LR then add it to the list of regs to push. */
17305 if (l_mask == (1 << LR_REGNUM))
17307 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17308 1, &cfa_offset,
17309 real_regs_mask | (1 << LR_REGNUM));
17310 l_mask = 0;
17312 else
17313 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17318 /* Handle the case of a double word load into a low register from
17319 a computed memory address. The computed address may involve a
17320 register which is overwritten by the load. */
17321 const char *
17322 thumb_load_double_from_address (rtx *operands)
17324 rtx addr;
17325 rtx base;
17326 rtx offset;
17327 rtx arg1;
17328 rtx arg2;
17330 gcc_assert (GET_CODE (operands[0]) == REG);
17331 gcc_assert (GET_CODE (operands[1]) == MEM);
17333 /* Get the memory address. */
17334 addr = XEXP (operands[1], 0);
17336 /* Work out how the memory address is computed. */
17337 switch (GET_CODE (addr))
17339 case REG:
17340 operands[2] = adjust_address (operands[1], SImode, 4);
17342 if (REGNO (operands[0]) == REGNO (addr))
17344 output_asm_insn ("ldr\t%H0, %2", operands);
17345 output_asm_insn ("ldr\t%0, %1", operands);
17347 else
17349 output_asm_insn ("ldr\t%0, %1", operands);
17350 output_asm_insn ("ldr\t%H0, %2", operands);
17352 break;
17354 case CONST:
17355 /* Compute <address> + 4 for the high order load. */
17356 operands[2] = adjust_address (operands[1], SImode, 4);
17358 output_asm_insn ("ldr\t%0, %1", operands);
17359 output_asm_insn ("ldr\t%H0, %2", operands);
17360 break;
17362 case PLUS:
17363 arg1 = XEXP (addr, 0);
17364 arg2 = XEXP (addr, 1);
17366 if (CONSTANT_P (arg1))
17367 base = arg2, offset = arg1;
17368 else
17369 base = arg1, offset = arg2;
17371 gcc_assert (GET_CODE (base) == REG);
17373 /* Catch the case of <address> = <reg> + <reg> */
17374 if (GET_CODE (offset) == REG)
17376 int reg_offset = REGNO (offset);
17377 int reg_base = REGNO (base);
17378 int reg_dest = REGNO (operands[0]);
17380 /* Add the base and offset registers together into the
17381 higher destination register. */
17382 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
17383 reg_dest + 1, reg_base, reg_offset);
17385 /* Load the lower destination register from the address in
17386 the higher destination register. */
17387 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
17388 reg_dest, reg_dest + 1);
17390 /* Load the higher destination register from its own address
17391 plus 4. */
17392 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
17393 reg_dest + 1, reg_dest + 1);
17395 else
17397 /* Compute <address> + 4 for the high order load. */
17398 operands[2] = adjust_address (operands[1], SImode, 4);
17400 /* If the computed address is held in the low order register
17401 then load the high order register first, otherwise always
17402 load the low order register first. */
17403 if (REGNO (operands[0]) == REGNO (base))
17405 output_asm_insn ("ldr\t%H0, %2", operands);
17406 output_asm_insn ("ldr\t%0, %1", operands);
17408 else
17410 output_asm_insn ("ldr\t%0, %1", operands);
17411 output_asm_insn ("ldr\t%H0, %2", operands);
17414 break;
17416 case LABEL_REF:
17417 /* With no registers to worry about we can just load the value
17418 directly. */
17419 operands[2] = adjust_address (operands[1], SImode, 4);
17421 output_asm_insn ("ldr\t%H0, %2", operands);
17422 output_asm_insn ("ldr\t%0, %1", operands);
17423 break;
17425 default:
17426 gcc_unreachable ();
17429 return "";
17432 const char *
17433 thumb_output_move_mem_multiple (int n, rtx *operands)
17435 rtx tmp;
17437 switch (n)
17439 case 2:
17440 if (REGNO (operands[4]) > REGNO (operands[5]))
17442 tmp = operands[4];
17443 operands[4] = operands[5];
17444 operands[5] = tmp;
17446 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
17447 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
17448 break;
17450 case 3:
17451 if (REGNO (operands[4]) > REGNO (operands[5]))
17453 tmp = operands[4];
17454 operands[4] = operands[5];
17455 operands[5] = tmp;
17457 if (REGNO (operands[5]) > REGNO (operands[6]))
17459 tmp = operands[5];
17460 operands[5] = operands[6];
17461 operands[6] = tmp;
17463 if (REGNO (operands[4]) > REGNO (operands[5]))
17465 tmp = operands[4];
17466 operands[4] = operands[5];
17467 operands[5] = tmp;
17470 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
17471 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
17472 break;
17474 default:
17475 gcc_unreachable ();
17478 return "";
17481 /* Output a call-via instruction for thumb state. */
17482 const char *
17483 thumb_call_via_reg (rtx reg)
17485 int regno = REGNO (reg);
17486 rtx *labelp;
17488 gcc_assert (regno < LR_REGNUM);
17490 /* If we are in the normal text section we can use a single instance
17491 per compilation unit. If we are doing function sections, then we need
17492 an entry per section, since we can't rely on reachability. */
17493 if (in_section == text_section)
17495 thumb_call_reg_needed = 1;
17497 if (thumb_call_via_label[regno] == NULL)
17498 thumb_call_via_label[regno] = gen_label_rtx ();
17499 labelp = thumb_call_via_label + regno;
17501 else
17503 if (cfun->machine->call_via[regno] == NULL)
17504 cfun->machine->call_via[regno] = gen_label_rtx ();
17505 labelp = cfun->machine->call_via + regno;
17508 output_asm_insn ("bl\t%a0", labelp);
17509 return "";
17512 /* Routines for generating rtl. */
17513 void
17514 thumb_expand_movmemqi (rtx *operands)
17516 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
17517 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
17518 HOST_WIDE_INT len = INTVAL (operands[2]);
17519 HOST_WIDE_INT offset = 0;
17521 while (len >= 12)
17523 emit_insn (gen_movmem12b (out, in, out, in));
17524 len -= 12;
17527 if (len >= 8)
17529 emit_insn (gen_movmem8b (out, in, out, in));
17530 len -= 8;
17533 if (len >= 4)
17535 rtx reg = gen_reg_rtx (SImode);
17536 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
17537 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
17538 len -= 4;
17539 offset += 4;
17542 if (len >= 2)
17544 rtx reg = gen_reg_rtx (HImode);
17545 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
17546 plus_constant (in, offset))));
17547 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
17548 reg));
17549 len -= 2;
17550 offset += 2;
17553 if (len)
17555 rtx reg = gen_reg_rtx (QImode);
17556 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
17557 plus_constant (in, offset))));
17558 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
17559 reg));
17563 void
17564 thumb_reload_out_hi (rtx *operands)
17566 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
17569 /* Handle reading a half-word from memory during reload. */
17570 void
17571 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
17573 gcc_unreachable ();
17576 /* Return the length of a function name prefix
17577 that starts with the character 'c'. */
17578 static int
17579 arm_get_strip_length (int c)
17581 switch (c)
17583 ARM_NAME_ENCODING_LENGTHS
17584 default: return 0;
17588 /* Return a pointer to a function's name with any
17589 and all prefix encodings stripped from it. */
17590 const char *
17591 arm_strip_name_encoding (const char *name)
17593 int skip;
17595 while ((skip = arm_get_strip_length (* name)))
17596 name += skip;
17598 return name;
17601 /* If there is a '*' anywhere in the name's prefix, then
17602 emit the stripped name verbatim, otherwise prepend an
17603 underscore if leading underscores are being used. */
17604 void
17605 arm_asm_output_labelref (FILE *stream, const char *name)
17607 int skip;
17608 int verbatim = 0;
17610 while ((skip = arm_get_strip_length (* name)))
17612 verbatim |= (*name == '*');
17613 name += skip;
17616 if (verbatim)
17617 fputs (name, stream);
17618 else
17619 asm_fprintf (stream, "%U%s", name);
17622 static void
17623 arm_file_start (void)
17625 int val;
17627 if (TARGET_UNIFIED_ASM)
17628 asm_fprintf (asm_out_file, "\t.syntax unified\n");
17630 if (TARGET_BPABI)
17632 const char *fpu_name;
17633 if (arm_select[0].string)
17634 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
17635 else if (arm_select[1].string)
17636 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
17637 else
17638 asm_fprintf (asm_out_file, "\t.cpu %s\n",
17639 all_cores[arm_default_cpu].name);
17641 if (TARGET_SOFT_FLOAT)
17643 if (TARGET_VFP)
17644 fpu_name = "softvfp";
17645 else
17646 fpu_name = "softfpa";
17648 else
17650 int set_float_abi_attributes = 0;
17651 switch (arm_fpu_arch)
17653 case FPUTYPE_FPA:
17654 fpu_name = "fpa";
17655 break;
17656 case FPUTYPE_FPA_EMU2:
17657 fpu_name = "fpe2";
17658 break;
17659 case FPUTYPE_FPA_EMU3:
17660 fpu_name = "fpe3";
17661 break;
17662 case FPUTYPE_MAVERICK:
17663 fpu_name = "maverick";
17664 break;
17665 case FPUTYPE_VFP:
17666 fpu_name = "vfp";
17667 set_float_abi_attributes = 1;
17668 break;
17669 case FPUTYPE_VFP3:
17670 fpu_name = "vfp3";
17671 set_float_abi_attributes = 1;
17672 break;
17673 case FPUTYPE_NEON:
17674 fpu_name = "neon";
17675 set_float_abi_attributes = 1;
17676 break;
17677 default:
17678 abort();
17680 if (set_float_abi_attributes)
17682 if (TARGET_HARD_FLOAT)
17683 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
17684 if (TARGET_HARD_FLOAT_ABI)
17685 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
17688 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
17690 /* Some of these attributes only apply when the corresponding features
17691 are used. However we don't have any easy way of figuring this out.
17692 Conservatively record the setting that would have been used. */
17694 /* Tag_ABI_FP_rounding. */
17695 if (flag_rounding_math)
17696 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
17697 if (!flag_unsafe_math_optimizations)
17699 /* Tag_ABI_FP_denomal. */
17700 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
17701 /* Tag_ABI_FP_exceptions. */
17702 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
17704 /* Tag_ABI_FP_user_exceptions. */
17705 if (flag_signaling_nans)
17706 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
17707 /* Tag_ABI_FP_number_model. */
17708 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
17709 flag_finite_math_only ? 1 : 3);
17711 /* Tag_ABI_align8_needed. */
17712 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
17713 /* Tag_ABI_align8_preserved. */
17714 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
17715 /* Tag_ABI_enum_size. */
17716 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
17717 flag_short_enums ? 1 : 2);
17719 /* Tag_ABI_optimization_goals. */
17720 if (optimize_size)
17721 val = 4;
17722 else if (optimize >= 2)
17723 val = 2;
17724 else if (optimize)
17725 val = 1;
17726 else
17727 val = 6;
17728 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
17730 if (arm_lang_output_object_attributes_hook)
17731 arm_lang_output_object_attributes_hook();
17733 default_file_start();
17736 static void
17737 arm_file_end (void)
17739 int regno;
17741 if (NEED_INDICATE_EXEC_STACK)
17742 /* Add .note.GNU-stack. */
17743 file_end_indicate_exec_stack ();
17745 if (! thumb_call_reg_needed)
17746 return;
17748 switch_to_section (text_section);
17749 asm_fprintf (asm_out_file, "\t.code 16\n");
17750 ASM_OUTPUT_ALIGN (asm_out_file, 1);
17752 for (regno = 0; regno < LR_REGNUM; regno++)
17754 rtx label = thumb_call_via_label[regno];
17756 if (label != 0)
17758 targetm.asm_out.internal_label (asm_out_file, "L",
17759 CODE_LABEL_NUMBER (label));
17760 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
17765 #ifndef ARM_PE
17766 /* Symbols in the text segment can be accessed without indirecting via the
17767 constant pool; it may take an extra binary operation, but this is still
17768 faster than indirecting via memory. Don't do this when not optimizing,
17769 since we won't be calculating al of the offsets necessary to do this
17770 simplification. */
17772 static void
17773 arm_encode_section_info (tree decl, rtx rtl, int first)
17775 if (optimize > 0 && TREE_CONSTANT (decl))
17776 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
17778 default_encode_section_info (decl, rtl, first);
17780 #endif /* !ARM_PE */
17782 static void
17783 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
17785 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
17786 && !strcmp (prefix, "L"))
17788 arm_ccfsm_state = 0;
17789 arm_target_insn = NULL;
17791 default_internal_label (stream, prefix, labelno);
17794 /* Output code to add DELTA to the first argument, and then jump
17795 to FUNCTION. Used for C++ multiple inheritance. */
17796 static void
17797 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
17798 HOST_WIDE_INT delta,
17799 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
17800 tree function)
17802 static int thunk_label = 0;
17803 char label[256];
17804 char labelpc[256];
17805 int mi_delta = delta;
17806 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
17807 int shift = 0;
17808 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
17809 ? 1 : 0);
17810 if (mi_delta < 0)
17811 mi_delta = - mi_delta;
17813 if (TARGET_THUMB1)
17815 int labelno = thunk_label++;
17816 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
17817 /* Thunks are entered in arm mode when avaiable. */
17818 if (TARGET_THUMB1_ONLY)
17820 /* push r3 so we can use it as a temporary. */
17821 /* TODO: Omit this save if r3 is not used. */
17822 fputs ("\tpush {r3}\n", file);
17823 fputs ("\tldr\tr3, ", file);
17825 else
17827 fputs ("\tldr\tr12, ", file);
17829 assemble_name (file, label);
17830 fputc ('\n', file);
17831 if (flag_pic)
17833 /* If we are generating PIC, the ldr instruction below loads
17834 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
17835 the address of the add + 8, so we have:
17837 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
17838 = target + 1.
17840 Note that we have "+ 1" because some versions of GNU ld
17841 don't set the low bit of the result for R_ARM_REL32
17842 relocations against thumb function symbols.
17843 On ARMv6M this is +4, not +8. */
17844 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
17845 assemble_name (file, labelpc);
17846 fputs (":\n", file);
17847 if (TARGET_THUMB1_ONLY)
17849 /* This is 2 insns after the start of the thunk, so we know it
17850 is 4-byte aligned. */
17851 fputs ("\tadd\tr3, pc, r3\n", file);
17852 fputs ("\tmov r12, r3\n", file);
17854 else
17855 fputs ("\tadd\tr12, pc, r12\n", file);
17857 else if (TARGET_THUMB1_ONLY)
17858 fputs ("\tmov r12, r3\n", file);
17860 if (TARGET_THUMB1_ONLY)
17862 if (mi_delta > 255)
17864 fputs ("\tldr\tr3, ", file);
17865 assemble_name (file, label);
17866 fputs ("+4\n", file);
17867 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
17868 mi_op, this_regno, this_regno);
17870 else if (mi_delta != 0)
17872 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17873 mi_op, this_regno, this_regno,
17874 mi_delta);
17877 else
17879 /* TODO: Use movw/movt for large constants when available. */
17880 while (mi_delta != 0)
17882 if ((mi_delta & (3 << shift)) == 0)
17883 shift += 2;
17884 else
17886 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17887 mi_op, this_regno, this_regno,
17888 mi_delta & (0xff << shift));
17889 mi_delta &= ~(0xff << shift);
17890 shift += 8;
17894 if (TARGET_THUMB1)
17896 if (TARGET_THUMB1_ONLY)
17897 fputs ("\tpop\t{r3}\n", file);
17899 fprintf (file, "\tbx\tr12\n");
17900 ASM_OUTPUT_ALIGN (file, 2);
17901 assemble_name (file, label);
17902 fputs (":\n", file);
17903 if (flag_pic)
17905 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
17906 rtx tem = XEXP (DECL_RTL (function), 0);
17907 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
17908 tem = gen_rtx_MINUS (GET_MODE (tem),
17909 tem,
17910 gen_rtx_SYMBOL_REF (Pmode,
17911 ggc_strdup (labelpc)));
17912 assemble_integer (tem, 4, BITS_PER_WORD, 1);
17914 else
17915 /* Output ".word .LTHUNKn". */
17916 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
17918 if (TARGET_THUMB1_ONLY && mi_delta > 255)
17919 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
17921 else
17923 fputs ("\tb\t", file);
17924 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
17925 if (NEED_PLT_RELOC)
17926 fputs ("(PLT)", file);
17927 fputc ('\n', file);
17932 arm_emit_vector_const (FILE *file, rtx x)
17934 int i;
17935 const char * pattern;
17937 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17939 switch (GET_MODE (x))
17941 case V2SImode: pattern = "%08x"; break;
17942 case V4HImode: pattern = "%04x"; break;
17943 case V8QImode: pattern = "%02x"; break;
17944 default: gcc_unreachable ();
17947 fprintf (file, "0x");
17948 for (i = CONST_VECTOR_NUNITS (x); i--;)
17950 rtx element;
17952 element = CONST_VECTOR_ELT (x, i);
17953 fprintf (file, pattern, INTVAL (element));
17956 return 1;
17959 const char *
17960 arm_output_load_gr (rtx *operands)
17962 rtx reg;
17963 rtx offset;
17964 rtx wcgr;
17965 rtx sum;
17967 if (GET_CODE (operands [1]) != MEM
17968 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
17969 || GET_CODE (reg = XEXP (sum, 0)) != REG
17970 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
17971 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
17972 return "wldrw%?\t%0, %1";
17974 /* Fix up an out-of-range load of a GR register. */
17975 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
17976 wcgr = operands[0];
17977 operands[0] = reg;
17978 output_asm_insn ("ldr%?\t%0, %1", operands);
17980 operands[0] = wcgr;
17981 operands[1] = reg;
17982 output_asm_insn ("tmcr%?\t%0, %1", operands);
17983 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
17985 return "";
17988 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
17990 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
17991 named arg and all anonymous args onto the stack.
17992 XXX I know the prologue shouldn't be pushing registers, but it is faster
17993 that way. */
17995 static void
17996 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
17997 enum machine_mode mode,
17998 tree type,
17999 int *pretend_size,
18000 int second_time ATTRIBUTE_UNUSED)
18002 int nregs = cum->nregs;
18003 if (nregs & 1
18004 && ARM_DOUBLEWORD_ALIGN
18005 && arm_needs_doubleword_align (mode, type))
18006 nregs++;
18008 cfun->machine->uses_anonymous_args = 1;
18009 if (nregs < NUM_ARG_REGS)
18010 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
18013 /* Return nonzero if the CONSUMER instruction (a store) does not need
18014 PRODUCER's value to calculate the address. */
18017 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
18019 rtx value = PATTERN (producer);
18020 rtx addr = PATTERN (consumer);
18022 if (GET_CODE (value) == COND_EXEC)
18023 value = COND_EXEC_CODE (value);
18024 if (GET_CODE (value) == PARALLEL)
18025 value = XVECEXP (value, 0, 0);
18026 value = XEXP (value, 0);
18027 if (GET_CODE (addr) == COND_EXEC)
18028 addr = COND_EXEC_CODE (addr);
18029 if (GET_CODE (addr) == PARALLEL)
18030 addr = XVECEXP (addr, 0, 0);
18031 addr = XEXP (addr, 0);
18033 return !reg_overlap_mentioned_p (value, addr);
18036 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18037 have an early register shift value or amount dependency on the
18038 result of PRODUCER. */
18041 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
18043 rtx value = PATTERN (producer);
18044 rtx op = PATTERN (consumer);
18045 rtx early_op;
18047 if (GET_CODE (value) == COND_EXEC)
18048 value = COND_EXEC_CODE (value);
18049 if (GET_CODE (value) == PARALLEL)
18050 value = XVECEXP (value, 0, 0);
18051 value = XEXP (value, 0);
18052 if (GET_CODE (op) == COND_EXEC)
18053 op = COND_EXEC_CODE (op);
18054 if (GET_CODE (op) == PARALLEL)
18055 op = XVECEXP (op, 0, 0);
18056 op = XEXP (op, 1);
18058 early_op = XEXP (op, 0);
18059 /* This is either an actual independent shift, or a shift applied to
18060 the first operand of another operation. We want the whole shift
18061 operation. */
18062 if (GET_CODE (early_op) == REG)
18063 early_op = op;
18065 return !reg_overlap_mentioned_p (value, early_op);
18068 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18069 have an early register shift value dependency on the result of
18070 PRODUCER. */
18073 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18075 rtx value = PATTERN (producer);
18076 rtx op = PATTERN (consumer);
18077 rtx early_op;
18079 if (GET_CODE (value) == COND_EXEC)
18080 value = COND_EXEC_CODE (value);
18081 if (GET_CODE (value) == PARALLEL)
18082 value = XVECEXP (value, 0, 0);
18083 value = XEXP (value, 0);
18084 if (GET_CODE (op) == COND_EXEC)
18085 op = COND_EXEC_CODE (op);
18086 if (GET_CODE (op) == PARALLEL)
18087 op = XVECEXP (op, 0, 0);
18088 op = XEXP (op, 1);
18090 early_op = XEXP (op, 0);
18092 /* This is either an actual independent shift, or a shift applied to
18093 the first operand of another operation. We want the value being
18094 shifted, in either case. */
18095 if (GET_CODE (early_op) != REG)
18096 early_op = XEXP (early_op, 0);
18098 return !reg_overlap_mentioned_p (value, early_op);
18101 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18102 have an early register mult dependency on the result of
18103 PRODUCER. */
18106 arm_no_early_mul_dep (rtx producer, rtx consumer)
18108 rtx value = PATTERN (producer);
18109 rtx op = PATTERN (consumer);
18111 if (GET_CODE (value) == COND_EXEC)
18112 value = COND_EXEC_CODE (value);
18113 if (GET_CODE (value) == PARALLEL)
18114 value = XVECEXP (value, 0, 0);
18115 value = XEXP (value, 0);
18116 if (GET_CODE (op) == COND_EXEC)
18117 op = COND_EXEC_CODE (op);
18118 if (GET_CODE (op) == PARALLEL)
18119 op = XVECEXP (op, 0, 0);
18120 op = XEXP (op, 1);
18122 return (GET_CODE (op) == PLUS
18123 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
18126 /* We can't rely on the caller doing the proper promotion when
18127 using APCS or ATPCS. */
18129 static bool
18130 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
18132 return !TARGET_AAPCS_BASED;
18136 /* AAPCS based ABIs use short enums by default. */
18138 static bool
18139 arm_default_short_enums (void)
18141 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18145 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18147 static bool
18148 arm_align_anon_bitfield (void)
18150 return TARGET_AAPCS_BASED;
18154 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18156 static tree
18157 arm_cxx_guard_type (void)
18159 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18162 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18163 has an accumulator dependency on the result of the producer (a
18164 multiplication instruction) and no other dependency on that result. */
18166 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
18168 rtx mul = PATTERN (producer);
18169 rtx mac = PATTERN (consumer);
18170 rtx mul_result;
18171 rtx mac_op0, mac_op1, mac_acc;
18173 if (GET_CODE (mul) == COND_EXEC)
18174 mul = COND_EXEC_CODE (mul);
18175 if (GET_CODE (mac) == COND_EXEC)
18176 mac = COND_EXEC_CODE (mac);
18178 /* Check that mul is of the form (set (...) (mult ...))
18179 and mla is of the form (set (...) (plus (mult ...) (...))). */
18180 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
18181 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
18182 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
18183 return 0;
18185 mul_result = XEXP (mul, 0);
18186 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
18187 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
18188 mac_acc = XEXP (XEXP (mac, 1), 1);
18190 return (reg_overlap_mentioned_p (mul_result, mac_acc)
18191 && !reg_overlap_mentioned_p (mul_result, mac_op0)
18192 && !reg_overlap_mentioned_p (mul_result, mac_op1));
18196 /* The EABI says test the least significant bit of a guard variable. */
18198 static bool
18199 arm_cxx_guard_mask_bit (void)
18201 return TARGET_AAPCS_BASED;
18205 /* The EABI specifies that all array cookies are 8 bytes long. */
18207 static tree
18208 arm_get_cookie_size (tree type)
18210 tree size;
18212 if (!TARGET_AAPCS_BASED)
18213 return default_cxx_get_cookie_size (type);
18215 size = build_int_cst (sizetype, 8);
18216 return size;
18220 /* The EABI says that array cookies should also contain the element size. */
18222 static bool
18223 arm_cookie_has_size (void)
18225 return TARGET_AAPCS_BASED;
18229 /* The EABI says constructors and destructors should return a pointer to
18230 the object constructed/destroyed. */
18232 static bool
18233 arm_cxx_cdtor_returns_this (void)
18235 return TARGET_AAPCS_BASED;
18238 /* The EABI says that an inline function may never be the key
18239 method. */
18241 static bool
18242 arm_cxx_key_method_may_be_inline (void)
18244 return !TARGET_AAPCS_BASED;
18247 static void
18248 arm_cxx_determine_class_data_visibility (tree decl)
18250 if (!TARGET_AAPCS_BASED)
18251 return;
18253 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18254 is exported. However, on systems without dynamic vague linkage,
18255 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18256 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18257 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18258 else
18259 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18260 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18263 static bool
18264 arm_cxx_class_data_always_comdat (void)
18266 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18267 vague linkage if the class has no key function. */
18268 return !TARGET_AAPCS_BASED;
18272 /* The EABI says __aeabi_atexit should be used to register static
18273 destructors. */
18275 static bool
18276 arm_cxx_use_aeabi_atexit (void)
18278 return TARGET_AAPCS_BASED;
18282 void
18283 arm_set_return_address (rtx source, rtx scratch)
18285 arm_stack_offsets *offsets;
18286 HOST_WIDE_INT delta;
18287 rtx addr;
18288 unsigned long saved_regs;
18290 offsets = arm_get_frame_offsets ();
18291 saved_regs = offsets->saved_regs_mask;
18293 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18294 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18295 else
18297 if (frame_pointer_needed)
18298 addr = plus_constant(hard_frame_pointer_rtx, -4);
18299 else
18301 /* LR will be the first saved register. */
18302 delta = offsets->outgoing_args - (offsets->frame + 4);
18305 if (delta >= 4096)
18307 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18308 GEN_INT (delta & ~4095)));
18309 addr = scratch;
18310 delta &= 4095;
18312 else
18313 addr = stack_pointer_rtx;
18315 addr = plus_constant (addr, delta);
18317 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18322 void
18323 thumb_set_return_address (rtx source, rtx scratch)
18325 arm_stack_offsets *offsets;
18326 HOST_WIDE_INT delta;
18327 HOST_WIDE_INT limit;
18328 int reg;
18329 rtx addr;
18330 unsigned long mask;
18332 emit_use (source);
18334 offsets = arm_get_frame_offsets ();
18335 mask = offsets->saved_regs_mask;
18336 if (mask & (1 << LR_REGNUM))
18338 limit = 1024;
18339 /* Find the saved regs. */
18340 if (frame_pointer_needed)
18342 delta = offsets->soft_frame - offsets->saved_args;
18343 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18344 if (TARGET_THUMB1)
18345 limit = 128;
18347 else
18349 delta = offsets->outgoing_args - offsets->saved_args;
18350 reg = SP_REGNUM;
18352 /* Allow for the stack frame. */
18353 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18354 delta -= 16;
18355 /* The link register is always the first saved register. */
18356 delta -= 4;
18358 /* Construct the address. */
18359 addr = gen_rtx_REG (SImode, reg);
18360 if (delta > limit)
18362 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
18363 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
18364 addr = scratch;
18366 else
18367 addr = plus_constant (addr, delta);
18369 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18371 else
18372 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18375 /* Implements target hook vector_mode_supported_p. */
18376 bool
18377 arm_vector_mode_supported_p (enum machine_mode mode)
18379 /* Neon also supports V2SImode, etc. listed in the clause below. */
18380 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18381 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18382 return true;
18384 if ((mode == V2SImode)
18385 || (mode == V4HImode)
18386 || (mode == V8QImode))
18387 return true;
18389 return false;
18392 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18393 ARM insns and therefore guarantee that the shift count is modulo 256.
18394 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18395 guarantee no particular behavior for out-of-range counts. */
18397 static unsigned HOST_WIDE_INT
18398 arm_shift_truncation_mask (enum machine_mode mode)
18400 return mode == SImode ? 255 : 0;
18404 /* Map internal gcc register numbers to DWARF2 register numbers. */
18406 unsigned int
18407 arm_dbx_register_number (unsigned int regno)
18409 if (regno < 16)
18410 return regno;
18412 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18413 compatibility. The EABI defines them as registers 96-103. */
18414 if (IS_FPA_REGNUM (regno))
18415 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
18417 /* FIXME: VFPv3 register numbering. */
18418 if (IS_VFP_REGNUM (regno))
18419 return 64 + regno - FIRST_VFP_REGNUM;
18421 if (IS_IWMMXT_GR_REGNUM (regno))
18422 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
18424 if (IS_IWMMXT_REGNUM (regno))
18425 return 112 + regno - FIRST_IWMMXT_REGNUM;
18427 gcc_unreachable ();
18431 #ifdef TARGET_UNWIND_INFO
18432 /* Emit unwind directives for a store-multiple instruction or stack pointer
18433 push during alignment.
18434 These should only ever be generated by the function prologue code, so
18435 expect them to have a particular form. */
18437 static void
18438 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
18440 int i;
18441 HOST_WIDE_INT offset;
18442 HOST_WIDE_INT nregs;
18443 int reg_size;
18444 unsigned reg;
18445 unsigned lastreg;
18446 rtx e;
18448 e = XVECEXP (p, 0, 0);
18449 if (GET_CODE (e) != SET)
18450 abort ();
18452 /* First insn will adjust the stack pointer. */
18453 if (GET_CODE (e) != SET
18454 || GET_CODE (XEXP (e, 0)) != REG
18455 || REGNO (XEXP (e, 0)) != SP_REGNUM
18456 || GET_CODE (XEXP (e, 1)) != PLUS)
18457 abort ();
18459 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
18460 nregs = XVECLEN (p, 0) - 1;
18462 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
18463 if (reg < 16)
18465 /* The function prologue may also push pc, but not annotate it as it is
18466 never restored. We turn this into a stack pointer adjustment. */
18467 if (nregs * 4 == offset - 4)
18469 fprintf (asm_out_file, "\t.pad #4\n");
18470 offset -= 4;
18472 reg_size = 4;
18473 fprintf (asm_out_file, "\t.save {");
18475 else if (IS_VFP_REGNUM (reg))
18477 reg_size = 8;
18478 fprintf (asm_out_file, "\t.vsave {");
18480 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
18482 /* FPA registers are done differently. */
18483 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
18484 return;
18486 else
18487 /* Unknown register type. */
18488 abort ();
18490 /* If the stack increment doesn't match the size of the saved registers,
18491 something has gone horribly wrong. */
18492 if (offset != nregs * reg_size)
18493 abort ();
18495 offset = 0;
18496 lastreg = 0;
18497 /* The remaining insns will describe the stores. */
18498 for (i = 1; i <= nregs; i++)
18500 /* Expect (set (mem <addr>) (reg)).
18501 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18502 e = XVECEXP (p, 0, i);
18503 if (GET_CODE (e) != SET
18504 || GET_CODE (XEXP (e, 0)) != MEM
18505 || GET_CODE (XEXP (e, 1)) != REG)
18506 abort ();
18508 reg = REGNO (XEXP (e, 1));
18509 if (reg < lastreg)
18510 abort ();
18512 if (i != 1)
18513 fprintf (asm_out_file, ", ");
18514 /* We can't use %r for vfp because we need to use the
18515 double precision register names. */
18516 if (IS_VFP_REGNUM (reg))
18517 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
18518 else
18519 asm_fprintf (asm_out_file, "%r", reg);
18521 #ifdef ENABLE_CHECKING
18522 /* Check that the addresses are consecutive. */
18523 e = XEXP (XEXP (e, 0), 0);
18524 if (GET_CODE (e) == PLUS)
18526 offset += reg_size;
18527 if (GET_CODE (XEXP (e, 0)) != REG
18528 || REGNO (XEXP (e, 0)) != SP_REGNUM
18529 || GET_CODE (XEXP (e, 1)) != CONST_INT
18530 || offset != INTVAL (XEXP (e, 1)))
18531 abort ();
18533 else if (i != 1
18534 || GET_CODE (e) != REG
18535 || REGNO (e) != SP_REGNUM)
18536 abort ();
18537 #endif
18539 fprintf (asm_out_file, "}\n");
18542 /* Emit unwind directives for a SET. */
18544 static void
18545 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
18547 rtx e0;
18548 rtx e1;
18549 unsigned reg;
18551 e0 = XEXP (p, 0);
18552 e1 = XEXP (p, 1);
18553 switch (GET_CODE (e0))
18555 case MEM:
18556 /* Pushing a single register. */
18557 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
18558 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
18559 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
18560 abort ();
18562 asm_fprintf (asm_out_file, "\t.save ");
18563 if (IS_VFP_REGNUM (REGNO (e1)))
18564 asm_fprintf(asm_out_file, "{d%d}\n",
18565 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
18566 else
18567 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
18568 break;
18570 case REG:
18571 if (REGNO (e0) == SP_REGNUM)
18573 /* A stack increment. */
18574 if (GET_CODE (e1) != PLUS
18575 || GET_CODE (XEXP (e1, 0)) != REG
18576 || REGNO (XEXP (e1, 0)) != SP_REGNUM
18577 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18578 abort ();
18580 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
18581 -INTVAL (XEXP (e1, 1)));
18583 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
18585 HOST_WIDE_INT offset;
18587 if (GET_CODE (e1) == PLUS)
18589 if (GET_CODE (XEXP (e1, 0)) != REG
18590 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18591 abort ();
18592 reg = REGNO (XEXP (e1, 0));
18593 offset = INTVAL (XEXP (e1, 1));
18594 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
18595 HARD_FRAME_POINTER_REGNUM, reg,
18596 INTVAL (XEXP (e1, 1)));
18598 else if (GET_CODE (e1) == REG)
18600 reg = REGNO (e1);
18601 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
18602 HARD_FRAME_POINTER_REGNUM, reg);
18604 else
18605 abort ();
18607 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
18609 /* Move from sp to reg. */
18610 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
18612 else if (GET_CODE (e1) == PLUS
18613 && GET_CODE (XEXP (e1, 0)) == REG
18614 && REGNO (XEXP (e1, 0)) == SP_REGNUM
18615 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
18617 /* Set reg to offset from sp. */
18618 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
18619 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
18621 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
18623 /* Stack pointer save before alignment. */
18624 reg = REGNO (e0);
18625 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18626 reg + 0x90, reg);
18628 else
18629 abort ();
18630 break;
18632 default:
18633 abort ();
18638 /* Emit unwind directives for the given insn. */
18640 static void
18641 arm_unwind_emit (FILE * asm_out_file, rtx insn)
18643 rtx pat;
18645 if (!ARM_EABI_UNWIND_TABLES)
18646 return;
18648 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
18649 && (TREE_NOTHROW (current_function_decl)
18650 || crtl->all_throwers_are_sibcalls))
18651 return;
18653 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
18654 return;
18656 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
18657 if (pat)
18658 pat = XEXP (pat, 0);
18659 else
18660 pat = PATTERN (insn);
18662 switch (GET_CODE (pat))
18664 case SET:
18665 arm_unwind_emit_set (asm_out_file, pat);
18666 break;
18668 case SEQUENCE:
18669 /* Store multiple. */
18670 arm_unwind_emit_sequence (asm_out_file, pat);
18671 break;
18673 default:
18674 abort();
18679 /* Output a reference from a function exception table to the type_info
18680 object X. The EABI specifies that the symbol should be relocated by
18681 an R_ARM_TARGET2 relocation. */
18683 static bool
18684 arm_output_ttype (rtx x)
18686 fputs ("\t.word\t", asm_out_file);
18687 output_addr_const (asm_out_file, x);
18688 /* Use special relocations for symbol references. */
18689 if (GET_CODE (x) != CONST_INT)
18690 fputs ("(TARGET2)", asm_out_file);
18691 fputc ('\n', asm_out_file);
18693 return TRUE;
18695 #endif /* TARGET_UNWIND_INFO */
18698 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18699 stack alignment. */
18701 static void
18702 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
18704 rtx unspec = SET_SRC (pattern);
18705 gcc_assert (GET_CODE (unspec) == UNSPEC);
18707 switch (index)
18709 case UNSPEC_STACK_ALIGN:
18710 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18711 put anything on the stack, so hopefully it won't matter.
18712 CFA = SP will be correct after alignment. */
18713 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
18714 SET_DEST (pattern));
18715 break;
18716 default:
18717 gcc_unreachable ();
18722 /* Output unwind directives for the start/end of a function. */
18724 void
18725 arm_output_fn_unwind (FILE * f, bool prologue)
18727 if (!ARM_EABI_UNWIND_TABLES)
18728 return;
18730 if (prologue)
18731 fputs ("\t.fnstart\n", f);
18732 else
18734 /* If this function will never be unwound, then mark it as such.
18735 The came condition is used in arm_unwind_emit to suppress
18736 the frame annotations. */
18737 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
18738 && (TREE_NOTHROW (current_function_decl)
18739 || crtl->all_throwers_are_sibcalls))
18740 fputs("\t.cantunwind\n", f);
18742 fputs ("\t.fnend\n", f);
18746 static bool
18747 arm_emit_tls_decoration (FILE *fp, rtx x)
18749 enum tls_reloc reloc;
18750 rtx val;
18752 val = XVECEXP (x, 0, 0);
18753 reloc = INTVAL (XVECEXP (x, 0, 1));
18755 output_addr_const (fp, val);
18757 switch (reloc)
18759 case TLS_GD32:
18760 fputs ("(tlsgd)", fp);
18761 break;
18762 case TLS_LDM32:
18763 fputs ("(tlsldm)", fp);
18764 break;
18765 case TLS_LDO32:
18766 fputs ("(tlsldo)", fp);
18767 break;
18768 case TLS_IE32:
18769 fputs ("(gottpoff)", fp);
18770 break;
18771 case TLS_LE32:
18772 fputs ("(tpoff)", fp);
18773 break;
18774 default:
18775 gcc_unreachable ();
18778 switch (reloc)
18780 case TLS_GD32:
18781 case TLS_LDM32:
18782 case TLS_IE32:
18783 fputs (" + (. - ", fp);
18784 output_addr_const (fp, XVECEXP (x, 0, 2));
18785 fputs (" - ", fp);
18786 output_addr_const (fp, XVECEXP (x, 0, 3));
18787 fputc (')', fp);
18788 break;
18789 default:
18790 break;
18793 return TRUE;
18796 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18798 static void
18799 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
18801 gcc_assert (size == 4);
18802 fputs ("\t.word\t", file);
18803 output_addr_const (file, x);
18804 fputs ("(tlsldo)", file);
18807 bool
18808 arm_output_addr_const_extra (FILE *fp, rtx x)
18810 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
18811 return arm_emit_tls_decoration (fp, x);
18812 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
18814 char label[256];
18815 int labelno = INTVAL (XVECEXP (x, 0, 0));
18817 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
18818 assemble_name_raw (fp, label);
18820 return TRUE;
18822 else if (GET_CODE (x) == CONST_VECTOR)
18823 return arm_emit_vector_const (fp, x);
18825 return FALSE;
18828 /* Output assembly for a shift instruction.
18829 SET_FLAGS determines how the instruction modifies the condition codes.
18830 0 - Do not set condition codes.
18831 1 - Set condition codes.
18832 2 - Use smallest instruction. */
18833 const char *
18834 arm_output_shift(rtx * operands, int set_flags)
18836 char pattern[100];
18837 static const char flag_chars[3] = {'?', '.', '!'};
18838 const char *shift;
18839 HOST_WIDE_INT val;
18840 char c;
18842 c = flag_chars[set_flags];
18843 if (TARGET_UNIFIED_ASM)
18845 shift = shift_op(operands[3], &val);
18846 if (shift)
18848 if (val != -1)
18849 operands[2] = GEN_INT(val);
18850 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
18852 else
18853 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
18855 else
18856 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
18857 output_asm_insn (pattern, operands);
18858 return "";
18861 /* Output a Thumb-2 casesi instruction. */
18862 const char *
18863 thumb2_output_casesi (rtx *operands)
18865 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
18867 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
18869 output_asm_insn ("cmp\t%0, %1", operands);
18870 output_asm_insn ("bhi\t%l3", operands);
18871 switch (GET_MODE(diff_vec))
18873 case QImode:
18874 return "tbb\t[%|pc, %0]";
18875 case HImode:
18876 return "tbh\t[%|pc, %0, lsl #1]";
18877 case SImode:
18878 if (flag_pic)
18880 output_asm_insn ("adr\t%4, %l2", operands);
18881 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
18882 output_asm_insn ("add\t%4, %4, %5", operands);
18883 return "bx\t%4";
18885 else
18887 output_asm_insn ("adr\t%4, %l2", operands);
18888 return "ldr\t%|pc, [%4, %0, lsl #2]";
18890 default:
18891 gcc_unreachable ();
18895 /* Most ARM cores are single issue, but some newer ones can dual issue.
18896 The scheduler descriptions rely on this being correct. */
18897 static int
18898 arm_issue_rate (void)
18900 switch (arm_tune)
18902 case cortexr4:
18903 case cortexa8:
18904 return 2;
18906 default:
18907 return 1;
18911 /* A table and a function to perform ARM-specific name mangling for
18912 NEON vector types in order to conform to the AAPCS (see "Procedure
18913 Call Standard for the ARM Architecture", Appendix A). To qualify
18914 for emission with the mangled names defined in that document, a
18915 vector type must not only be of the correct mode but also be
18916 composed of NEON vector element types (e.g. __builtin_neon_qi). */
18917 typedef struct
18919 enum machine_mode mode;
18920 const char *element_type_name;
18921 const char *aapcs_name;
18922 } arm_mangle_map_entry;
18924 static arm_mangle_map_entry arm_mangle_map[] = {
18925 /* 64-bit containerized types. */
18926 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
18927 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
18928 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
18929 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
18930 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
18931 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
18932 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
18933 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
18934 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
18935 /* 128-bit containerized types. */
18936 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
18937 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
18938 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
18939 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
18940 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
18941 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
18942 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
18943 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
18944 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
18945 { VOIDmode, NULL, NULL }
18948 const char *
18949 arm_mangle_type (const_tree type)
18951 arm_mangle_map_entry *pos = arm_mangle_map;
18953 if (TREE_CODE (type) != VECTOR_TYPE)
18954 return NULL;
18956 /* Check the mode of the vector type, and the name of the vector
18957 element type, against the table. */
18958 while (pos->mode != VOIDmode)
18960 tree elt_type = TREE_TYPE (type);
18962 if (pos->mode == TYPE_MODE (type)
18963 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
18964 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
18965 pos->element_type_name))
18966 return pos->aapcs_name;
18968 pos++;
18971 /* Use the default mangling for unrecognized (possibly user-defined)
18972 vector types. */
18973 return NULL;
18976 #include "gt-arm.h"