PR testsuite/35843
[official-gcc.git] / gcc / config / arm / arm.c
blob82f8352eac2613c1e1da3d6cbc7e182d31c38b8e
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 3, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "obstack.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "real.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "recog.h"
45 #include "ggc.h"
46 #include "except.h"
47 #include "c-pragma.h"
48 #include "integrate.h"
49 #include "tm_p.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "debug.h"
53 #include "langhooks.h"
54 #include "df.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 void (*arm_lang_output_object_attributes_hook)(void);
64 /* Forward function declarations. */
65 static arm_stack_offsets *arm_get_frame_offsets (void);
66 static void arm_add_gc_roots (void);
67 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
68 HOST_WIDE_INT, rtx, rtx, int, int);
69 static unsigned bit_count (unsigned long);
70 static int arm_address_register_rtx_p (rtx, int);
71 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
72 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
73 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
74 inline static int thumb1_index_register_rtx_p (rtx, int);
75 static int thumb_far_jump_used_p (void);
76 static bool thumb_force_lr_save (void);
77 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
78 static rtx emit_sfm (int, int);
79 static unsigned arm_size_return_regs (void);
80 static bool arm_assemble_integer (rtx, unsigned int, int);
81 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
82 static arm_cc get_arm_condition_code (rtx);
83 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
84 static rtx is_jump_table (rtx);
85 static const char *output_multi_immediate (rtx *, const char *, const char *,
86 int, HOST_WIDE_INT);
87 static const char *shift_op (rtx, HOST_WIDE_INT *);
88 static struct machine_function *arm_init_machine_status (void);
89 static void thumb_exit (FILE *, int);
90 static rtx is_jump_table (rtx);
91 static HOST_WIDE_INT get_jump_table_size (rtx);
92 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
93 static Mnode *add_minipool_forward_ref (Mfix *);
94 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
95 static Mnode *add_minipool_backward_ref (Mfix *);
96 static void assign_minipool_offsets (Mfix *);
97 static void arm_print_value (FILE *, rtx);
98 static void dump_minipool (rtx);
99 static int arm_barrier_cost (rtx);
100 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
101 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
102 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
103 rtx);
104 static void arm_reorg (void);
105 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
106 static unsigned long arm_compute_save_reg0_reg12_mask (void);
107 static unsigned long arm_compute_save_reg_mask (void);
108 static unsigned long arm_isr_value (tree);
109 static unsigned long arm_compute_func_type (void);
110 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
111 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
112 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
113 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
114 #endif
115 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
116 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
117 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static int arm_comp_type_attributes (const_tree, const_tree);
119 static void arm_set_default_type_attributes (tree);
120 static int arm_adjust_cost (rtx, rtx, rtx, int);
121 static int count_insns_for_constant (HOST_WIDE_INT, int);
122 static int arm_get_strip_length (int);
123 static bool arm_function_ok_for_sibcall (tree, tree);
124 static void arm_internal_label (FILE *, const char *, unsigned long);
125 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
126 tree);
127 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
128 static bool arm_size_rtx_costs (rtx, int, int, int *);
129 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
130 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
131 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
132 static bool arm_9e_rtx_costs (rtx, int, int, int *);
133 static int arm_address_cost (rtx);
134 static bool arm_memory_load_p (rtx);
135 static bool arm_cirrus_insn_p (rtx);
136 static void cirrus_reorg (rtx);
137 static void arm_init_builtins (void);
138 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
139 static void arm_init_iwmmxt_builtins (void);
140 static rtx safe_vector_operand (rtx, enum machine_mode);
141 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
142 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
143 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
144 static void emit_constant_insn (rtx cond, rtx pattern);
145 static rtx emit_set_insn (rtx, rtx);
146 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
147 tree, bool);
149 #ifdef OBJECT_FORMAT_ELF
150 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
151 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
152 #endif
153 #ifndef ARM_PE
154 static void arm_encode_section_info (tree, rtx, int);
155 #endif
157 static void arm_file_end (void);
158 static void arm_file_start (void);
160 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
161 tree, int *, int);
162 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
163 enum machine_mode, const_tree, bool);
164 static bool arm_promote_prototypes (const_tree);
165 static bool arm_default_short_enums (void);
166 static bool arm_align_anon_bitfield (void);
167 static bool arm_return_in_msb (const_tree);
168 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
169 #ifdef TARGET_UNWIND_INFO
170 static void arm_unwind_emit (FILE *, rtx);
171 static bool arm_output_ttype (rtx);
172 #endif
173 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
175 static tree arm_cxx_guard_type (void);
176 static bool arm_cxx_guard_mask_bit (void);
177 static tree arm_get_cookie_size (tree);
178 static bool arm_cookie_has_size (void);
179 static bool arm_cxx_cdtor_returns_this (void);
180 static bool arm_cxx_key_method_may_be_inline (void);
181 static void arm_cxx_determine_class_data_visibility (tree);
182 static bool arm_cxx_class_data_always_comdat (void);
183 static bool arm_cxx_use_aeabi_atexit (void);
184 static void arm_init_libfuncs (void);
185 static bool arm_handle_option (size_t, const char *, int);
186 static void arm_target_help (void);
187 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
188 static bool arm_cannot_copy_insn_p (rtx);
189 static bool arm_tls_symbol_p (rtx x);
190 static int arm_issue_rate (void);
191 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
194 /* Initialize the GCC target structure. */
195 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
196 #undef TARGET_MERGE_DECL_ATTRIBUTES
197 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
198 #endif
200 #undef TARGET_ATTRIBUTE_TABLE
201 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
203 #undef TARGET_ASM_FILE_START
204 #define TARGET_ASM_FILE_START arm_file_start
205 #undef TARGET_ASM_FILE_END
206 #define TARGET_ASM_FILE_END arm_file_end
208 #undef TARGET_ASM_ALIGNED_SI_OP
209 #define TARGET_ASM_ALIGNED_SI_OP NULL
210 #undef TARGET_ASM_INTEGER
211 #define TARGET_ASM_INTEGER arm_assemble_integer
213 #undef TARGET_ASM_FUNCTION_PROLOGUE
214 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
216 #undef TARGET_ASM_FUNCTION_EPILOGUE
217 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
219 #undef TARGET_DEFAULT_TARGET_FLAGS
220 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
221 #undef TARGET_HANDLE_OPTION
222 #define TARGET_HANDLE_OPTION arm_handle_option
223 #undef TARGET_HELP
224 #define TARGET_HELP arm_target_help
226 #undef TARGET_COMP_TYPE_ATTRIBUTES
227 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
229 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
230 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
232 #undef TARGET_SCHED_ADJUST_COST
233 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
235 #undef TARGET_ENCODE_SECTION_INFO
236 #ifdef ARM_PE
237 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
238 #else
239 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
240 #endif
242 #undef TARGET_STRIP_NAME_ENCODING
243 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
245 #undef TARGET_ASM_INTERNAL_LABEL
246 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
248 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
249 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
251 #undef TARGET_ASM_OUTPUT_MI_THUNK
252 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
253 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
254 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
256 /* This will be overridden in arm_override_options. */
257 #undef TARGET_RTX_COSTS
258 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
259 #undef TARGET_ADDRESS_COST
260 #define TARGET_ADDRESS_COST arm_address_cost
262 #undef TARGET_SHIFT_TRUNCATION_MASK
263 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
264 #undef TARGET_VECTOR_MODE_SUPPORTED_P
265 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
267 #undef TARGET_MACHINE_DEPENDENT_REORG
268 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
270 #undef TARGET_INIT_BUILTINS
271 #define TARGET_INIT_BUILTINS arm_init_builtins
272 #undef TARGET_EXPAND_BUILTIN
273 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
275 #undef TARGET_INIT_LIBFUNCS
276 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
278 #undef TARGET_PROMOTE_FUNCTION_ARGS
279 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
280 #undef TARGET_PROMOTE_FUNCTION_RETURN
281 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
282 #undef TARGET_PROMOTE_PROTOTYPES
283 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
284 #undef TARGET_PASS_BY_REFERENCE
285 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
286 #undef TARGET_ARG_PARTIAL_BYTES
287 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
289 #undef TARGET_SETUP_INCOMING_VARARGS
290 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
292 #undef TARGET_DEFAULT_SHORT_ENUMS
293 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
295 #undef TARGET_ALIGN_ANON_BITFIELD
296 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
298 #undef TARGET_NARROW_VOLATILE_BITFIELD
299 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
301 #undef TARGET_CXX_GUARD_TYPE
302 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
304 #undef TARGET_CXX_GUARD_MASK_BIT
305 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
307 #undef TARGET_CXX_GET_COOKIE_SIZE
308 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
310 #undef TARGET_CXX_COOKIE_HAS_SIZE
311 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
313 #undef TARGET_CXX_CDTOR_RETURNS_THIS
314 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
316 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
317 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
319 #undef TARGET_CXX_USE_AEABI_ATEXIT
320 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
322 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
323 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
324 arm_cxx_determine_class_data_visibility
326 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
327 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
329 #undef TARGET_RETURN_IN_MSB
330 #define TARGET_RETURN_IN_MSB arm_return_in_msb
332 #undef TARGET_MUST_PASS_IN_STACK
333 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
335 #ifdef TARGET_UNWIND_INFO
336 #undef TARGET_UNWIND_EMIT
337 #define TARGET_UNWIND_EMIT arm_unwind_emit
339 /* EABI unwinding tables use a different format for the typeinfo tables. */
340 #undef TARGET_ASM_TTYPE
341 #define TARGET_ASM_TTYPE arm_output_ttype
343 #undef TARGET_ARM_EABI_UNWINDER
344 #define TARGET_ARM_EABI_UNWINDER true
345 #endif /* TARGET_UNWIND_INFO */
347 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
348 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
350 #undef TARGET_CANNOT_COPY_INSN_P
351 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
353 #ifdef HAVE_AS_TLS
354 #undef TARGET_HAVE_TLS
355 #define TARGET_HAVE_TLS true
356 #endif
358 #undef TARGET_CANNOT_FORCE_CONST_MEM
359 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
361 #undef TARGET_SCHED_ISSUE_RATE
362 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
364 #undef TARGET_MANGLE_TYPE
365 #define TARGET_MANGLE_TYPE arm_mangle_type
367 #ifdef HAVE_AS_TLS
368 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
369 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
370 #endif
372 struct gcc_target targetm = TARGET_INITIALIZER;
374 /* Obstack for minipool constant handling. */
375 static struct obstack minipool_obstack;
376 static char * minipool_startobj;
378 /* The maximum number of insns skipped which
379 will be conditionalised if possible. */
380 static int max_insns_skipped = 5;
382 extern FILE * asm_out_file;
384 /* True if we are currently building a constant table. */
385 int making_const_table;
387 /* Define the information needed to generate branch insns. This is
388 stored from the compare operation. */
389 rtx arm_compare_op0, arm_compare_op1;
391 /* The processor for which instructions should be scheduled. */
392 enum processor_type arm_tune = arm_none;
394 /* The default processor used if not overridden by commandline. */
395 static enum processor_type arm_default_cpu = arm_none;
397 /* Which floating point model to use. */
398 enum arm_fp_model arm_fp_model;
400 /* Which floating point hardware is available. */
401 enum fputype arm_fpu_arch;
403 /* Which floating point hardware to schedule for. */
404 enum fputype arm_fpu_tune;
406 /* Whether to use floating point hardware. */
407 enum float_abi_type arm_float_abi;
409 /* Which ABI to use. */
410 enum arm_abi_type arm_abi;
412 /* Which thread pointer model to use. */
413 enum arm_tp_type target_thread_pointer = TP_AUTO;
415 /* Used to parse -mstructure_size_boundary command line option. */
416 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
418 /* Used for Thumb call_via trampolines. */
419 rtx thumb_call_via_label[14];
420 static int thumb_call_reg_needed;
422 /* Bit values used to identify processor capabilities. */
423 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
424 #define FL_ARCH3M (1 << 1) /* Extended multiply */
425 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
426 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
427 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
428 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
429 #define FL_THUMB (1 << 6) /* Thumb aware */
430 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
431 #define FL_STRONG (1 << 8) /* StrongARM */
432 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
433 #define FL_XSCALE (1 << 10) /* XScale */
434 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
435 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
436 media instructions. */
437 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
438 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
439 Note: ARM6 & 7 derivatives only. */
440 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
441 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
442 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
443 profile. */
444 #define FL_DIV (1 << 18) /* Hardware divide. */
445 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
446 #define FL_NEON (1 << 20) /* Neon instructions. */
448 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
450 #define FL_FOR_ARCH2 FL_NOTM
451 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
452 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
453 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
454 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
455 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
456 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
457 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
458 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
459 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
460 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
461 #define FL_FOR_ARCH6J FL_FOR_ARCH6
462 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
463 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
464 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
465 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
466 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
467 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
468 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
469 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
470 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
472 /* The bits in this mask specify which
473 instructions we are allowed to generate. */
474 static unsigned long insn_flags = 0;
476 /* The bits in this mask specify which instruction scheduling options should
477 be used. */
478 static unsigned long tune_flags = 0;
480 /* The following are used in the arm.md file as equivalents to bits
481 in the above two flag variables. */
483 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
484 int arm_arch3m = 0;
486 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
487 int arm_arch4 = 0;
489 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
490 int arm_arch4t = 0;
492 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
493 int arm_arch5 = 0;
495 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
496 int arm_arch5e = 0;
498 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
499 int arm_arch6 = 0;
501 /* Nonzero if this chip supports the ARM 6K extensions. */
502 int arm_arch6k = 0;
504 /* Nonzero if instructions not present in the 'M' profile can be used. */
505 int arm_arch_notm = 0;
507 /* Nonzero if this chip can benefit from load scheduling. */
508 int arm_ld_sched = 0;
510 /* Nonzero if this chip is a StrongARM. */
511 int arm_tune_strongarm = 0;
513 /* Nonzero if this chip is a Cirrus variant. */
514 int arm_arch_cirrus = 0;
516 /* Nonzero if this chip supports Intel Wireless MMX technology. */
517 int arm_arch_iwmmxt = 0;
519 /* Nonzero if this chip is an XScale. */
520 int arm_arch_xscale = 0;
522 /* Nonzero if tuning for XScale */
523 int arm_tune_xscale = 0;
525 /* Nonzero if we want to tune for stores that access the write-buffer.
526 This typically means an ARM6 or ARM7 with MMU or MPU. */
527 int arm_tune_wbuf = 0;
529 /* Nonzero if generating Thumb instructions. */
530 int thumb_code = 0;
532 /* Nonzero if we should define __THUMB_INTERWORK__ in the
533 preprocessor.
534 XXX This is a bit of a hack, it's intended to help work around
535 problems in GLD which doesn't understand that armv5t code is
536 interworking clean. */
537 int arm_cpp_interwork = 0;
539 /* Nonzero if chip supports Thumb 2. */
540 int arm_arch_thumb2;
542 /* Nonzero if chip supports integer division instruction. */
543 int arm_arch_hwdiv;
545 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
546 must report the mode of the memory reference from PRINT_OPERAND to
547 PRINT_OPERAND_ADDRESS. */
548 enum machine_mode output_memory_reference_mode;
550 /* The register number to be used for the PIC offset register. */
551 unsigned arm_pic_register = INVALID_REGNUM;
553 /* Set to 1 when a return insn is output, this means that the epilogue
554 is not needed. */
555 int return_used_this_function;
557 /* Set to 1 after arm_reorg has started. Reset to start at the start of
558 the next function. */
559 static int after_arm_reorg = 0;
561 /* The maximum number of insns to be used when loading a constant. */
562 static int arm_constant_limit = 3;
564 /* For an explanation of these variables, see final_prescan_insn below. */
565 int arm_ccfsm_state;
566 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
567 enum arm_cond_code arm_current_cc;
568 rtx arm_target_insn;
569 int arm_target_label;
570 /* The number of conditionally executed insns, including the current insn. */
571 int arm_condexec_count = 0;
572 /* A bitmask specifying the patterns for the IT block.
573 Zero means do not output an IT block before this insn. */
574 int arm_condexec_mask = 0;
575 /* The number of bits used in arm_condexec_mask. */
576 int arm_condexec_masklen = 0;
578 /* The condition codes of the ARM, and the inverse function. */
579 static const char * const arm_condition_codes[] =
581 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
582 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
585 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
586 #define streq(string1, string2) (strcmp (string1, string2) == 0)
588 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
589 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
590 | (1 << PIC_OFFSET_TABLE_REGNUM)))
592 /* Initialization code. */
594 struct processors
596 const char *const name;
597 enum processor_type core;
598 const char *arch;
599 const unsigned long flags;
600 bool (* rtx_costs) (rtx, int, int, int *);
603 /* Not all of these give usefully different compilation alternatives,
604 but there is no simple way of generalizing them. */
605 static const struct processors all_cores[] =
607 /* ARM Cores */
608 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
609 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
610 #include "arm-cores.def"
611 #undef ARM_CORE
612 {NULL, arm_none, NULL, 0, NULL}
615 static const struct processors all_architectures[] =
617 /* ARM Architectures */
618 /* We don't specify rtx_costs here as it will be figured out
619 from the core. */
621 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
622 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
623 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
624 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
625 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
626 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
627 implementations that support it, so we will leave it out for now. */
628 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
629 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
630 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
631 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
632 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
633 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
634 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
635 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
636 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
637 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
638 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
639 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
640 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
641 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
642 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
643 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
644 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
645 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
646 {NULL, arm_none, NULL, 0 , NULL}
649 struct arm_cpu_select
651 const char * string;
652 const char * name;
653 const struct processors * processors;
656 /* This is a magic structure. The 'string' field is magically filled in
657 with a pointer to the value specified by the user on the command line
658 assuming that the user has specified such a value. */
660 static struct arm_cpu_select arm_select[] =
662 /* string name processors */
663 { NULL, "-mcpu=", all_cores },
664 { NULL, "-march=", all_architectures },
665 { NULL, "-mtune=", all_cores }
668 /* Defines representing the indexes into the above table. */
669 #define ARM_OPT_SET_CPU 0
670 #define ARM_OPT_SET_ARCH 1
671 #define ARM_OPT_SET_TUNE 2
673 /* The name of the preprocessor macro to define for this architecture. */
675 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
677 struct fpu_desc
679 const char * name;
680 enum fputype fpu;
684 /* Available values for -mfpu=. */
686 static const struct fpu_desc all_fpus[] =
688 {"fpa", FPUTYPE_FPA},
689 {"fpe2", FPUTYPE_FPA_EMU2},
690 {"fpe3", FPUTYPE_FPA_EMU2},
691 {"maverick", FPUTYPE_MAVERICK},
692 {"vfp", FPUTYPE_VFP},
693 {"vfp3", FPUTYPE_VFP3},
694 {"neon", FPUTYPE_NEON}
698 /* Floating point models used by the different hardware.
699 See fputype in arm.h. */
701 static const enum fputype fp_model_for_fpu[] =
703 /* No FP hardware. */
704 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
705 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
706 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
707 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
708 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
709 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
710 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
711 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
715 struct float_abi
717 const char * name;
718 enum float_abi_type abi_type;
722 /* Available values for -mfloat-abi=. */
724 static const struct float_abi all_float_abis[] =
726 {"soft", ARM_FLOAT_ABI_SOFT},
727 {"softfp", ARM_FLOAT_ABI_SOFTFP},
728 {"hard", ARM_FLOAT_ABI_HARD}
732 struct abi_name
734 const char *name;
735 enum arm_abi_type abi_type;
739 /* Available values for -mabi=. */
741 static const struct abi_name arm_all_abis[] =
743 {"apcs-gnu", ARM_ABI_APCS},
744 {"atpcs", ARM_ABI_ATPCS},
745 {"aapcs", ARM_ABI_AAPCS},
746 {"iwmmxt", ARM_ABI_IWMMXT},
747 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
750 /* Supported TLS relocations. */
752 enum tls_reloc {
753 TLS_GD32,
754 TLS_LDM32,
755 TLS_LDO32,
756 TLS_IE32,
757 TLS_LE32
760 /* Emit an insn that's a simple single-set. Both the operands must be known
761 to be valid. */
762 inline static rtx
763 emit_set_insn (rtx x, rtx y)
765 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
768 /* Return the number of bits set in VALUE. */
769 static unsigned
770 bit_count (unsigned long value)
772 unsigned long count = 0;
774 while (value)
776 count++;
777 value &= value - 1; /* Clear the least-significant set bit. */
780 return count;
783 /* Set up library functions unique to ARM. */
785 static void
786 arm_init_libfuncs (void)
788 /* There are no special library functions unless we are using the
789 ARM BPABI. */
790 if (!TARGET_BPABI)
791 return;
793 /* The functions below are described in Section 4 of the "Run-Time
794 ABI for the ARM architecture", Version 1.0. */
796 /* Double-precision floating-point arithmetic. Table 2. */
797 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
798 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
799 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
800 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
801 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
803 /* Double-precision comparisons. Table 3. */
804 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
805 set_optab_libfunc (ne_optab, DFmode, NULL);
806 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
807 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
808 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
809 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
810 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
812 /* Single-precision floating-point arithmetic. Table 4. */
813 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
814 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
815 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
816 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
817 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
819 /* Single-precision comparisons. Table 5. */
820 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
821 set_optab_libfunc (ne_optab, SFmode, NULL);
822 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
823 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
824 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
825 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
826 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
828 /* Floating-point to integer conversions. Table 6. */
829 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
830 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
831 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
832 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
833 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
834 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
835 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
836 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
838 /* Conversions between floating types. Table 7. */
839 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
840 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
842 /* Integer to floating-point conversions. Table 8. */
843 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
844 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
845 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
846 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
847 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
848 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
849 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
850 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
852 /* Long long. Table 9. */
853 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
854 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
855 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
856 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
857 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
858 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
859 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
860 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
862 /* Integer (32/32->32) division. \S 4.3.1. */
863 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
864 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
866 /* The divmod functions are designed so that they can be used for
867 plain division, even though they return both the quotient and the
868 remainder. The quotient is returned in the usual location (i.e.,
869 r0 for SImode, {r0, r1} for DImode), just as would be expected
870 for an ordinary division routine. Because the AAPCS calling
871 conventions specify that all of { r0, r1, r2, r3 } are
872 callee-saved registers, there is no need to tell the compiler
873 explicitly that those registers are clobbered by these
874 routines. */
875 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
876 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
878 /* For SImode division the ABI provides div-without-mod routines,
879 which are faster. */
880 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
881 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
883 /* We don't have mod libcalls. Fortunately gcc knows how to use the
884 divmod libcalls instead. */
885 set_optab_libfunc (smod_optab, DImode, NULL);
886 set_optab_libfunc (umod_optab, DImode, NULL);
887 set_optab_libfunc (smod_optab, SImode, NULL);
888 set_optab_libfunc (umod_optab, SImode, NULL);
891 /* Implement TARGET_HANDLE_OPTION. */
893 static bool
894 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
896 switch (code)
898 case OPT_march_:
899 arm_select[1].string = arg;
900 return true;
902 case OPT_mcpu_:
903 arm_select[0].string = arg;
904 return true;
906 case OPT_mhard_float:
907 target_float_abi_name = "hard";
908 return true;
910 case OPT_msoft_float:
911 target_float_abi_name = "soft";
912 return true;
914 case OPT_mtune_:
915 arm_select[2].string = arg;
916 return true;
918 default:
919 return true;
923 static void
924 arm_target_help (void)
926 int i;
927 static int columns = 0;
928 int remaining;
930 /* If we have not done so already, obtain the desired maximum width of
931 the output. Note - this is a duplication of the code at the start of
932 gcc/opts.c:print_specific_help() - the two copies should probably be
933 replaced by a single function. */
934 if (columns == 0)
936 const char *p;
938 GET_ENVIRONMENT (p, "COLUMNS");
939 if (p != NULL)
941 int value = atoi (p);
943 if (value > 0)
944 columns = value;
947 if (columns == 0)
948 /* Use a reasonable default. */
949 columns = 80;
952 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
954 /* The - 2 is because we know that the last entry in the array is NULL. */
955 i = ARRAY_SIZE (all_cores) - 2;
956 gcc_assert (i > 0);
957 printf (" %s", all_cores[i].name);
958 remaining = columns - (strlen (all_cores[i].name) + 4);
959 gcc_assert (remaining >= 0);
961 while (i--)
963 int len = strlen (all_cores[i].name);
965 if (remaining > len + 2)
967 printf (", %s", all_cores[i].name);
968 remaining -= len + 2;
970 else
972 if (remaining > 0)
973 printf (",");
974 printf ("\n %s", all_cores[i].name);
975 remaining = columns - (len + 4);
979 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
981 i = ARRAY_SIZE (all_architectures) - 2;
982 gcc_assert (i > 0);
984 printf (" %s", all_architectures[i].name);
985 remaining = columns - (strlen (all_architectures[i].name) + 4);
986 gcc_assert (remaining >= 0);
988 while (i--)
990 int len = strlen (all_architectures[i].name);
992 if (remaining > len + 2)
994 printf (", %s", all_architectures[i].name);
995 remaining -= len + 2;
997 else
999 if (remaining > 0)
1000 printf (",");
1001 printf ("\n %s", all_architectures[i].name);
1002 remaining = columns - (len + 4);
1005 printf ("\n");
1009 /* Fix up any incompatible options that the user has specified.
1010 This has now turned into a maze. */
1011 void
1012 arm_override_options (void)
1014 unsigned i;
1015 enum processor_type target_arch_cpu = arm_none;
1017 /* Set up the flags based on the cpu/architecture selected by the user. */
1018 for (i = ARRAY_SIZE (arm_select); i--;)
1020 struct arm_cpu_select * ptr = arm_select + i;
1022 if (ptr->string != NULL && ptr->string[0] != '\0')
1024 const struct processors * sel;
1026 for (sel = ptr->processors; sel->name != NULL; sel++)
1027 if (streq (ptr->string, sel->name))
1029 /* Set the architecture define. */
1030 if (i != ARM_OPT_SET_TUNE)
1031 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1033 /* Determine the processor core for which we should
1034 tune code-generation. */
1035 if (/* -mcpu= is a sensible default. */
1036 i == ARM_OPT_SET_CPU
1037 /* -mtune= overrides -mcpu= and -march=. */
1038 || i == ARM_OPT_SET_TUNE)
1039 arm_tune = (enum processor_type) (sel - ptr->processors);
1041 /* Remember the CPU associated with this architecture.
1042 If no other option is used to set the CPU type,
1043 we'll use this to guess the most suitable tuning
1044 options. */
1045 if (i == ARM_OPT_SET_ARCH)
1046 target_arch_cpu = sel->core;
1048 if (i != ARM_OPT_SET_TUNE)
1050 /* If we have been given an architecture and a processor
1051 make sure that they are compatible. We only generate
1052 a warning though, and we prefer the CPU over the
1053 architecture. */
1054 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1055 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1056 ptr->string);
1058 insn_flags = sel->flags;
1061 break;
1064 if (sel->name == NULL)
1065 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1069 /* Guess the tuning options from the architecture if necessary. */
1070 if (arm_tune == arm_none)
1071 arm_tune = target_arch_cpu;
1073 /* If the user did not specify a processor, choose one for them. */
1074 if (insn_flags == 0)
1076 const struct processors * sel;
1077 unsigned int sought;
1078 enum processor_type cpu;
1080 cpu = TARGET_CPU_DEFAULT;
1081 if (cpu == arm_none)
1083 #ifdef SUBTARGET_CPU_DEFAULT
1084 /* Use the subtarget default CPU if none was specified by
1085 configure. */
1086 cpu = SUBTARGET_CPU_DEFAULT;
1087 #endif
1088 /* Default to ARM6. */
1089 if (cpu == arm_none)
1090 cpu = arm6;
1092 sel = &all_cores[cpu];
1094 insn_flags = sel->flags;
1096 /* Now check to see if the user has specified some command line
1097 switch that require certain abilities from the cpu. */
1098 sought = 0;
1100 if (TARGET_INTERWORK || TARGET_THUMB)
1102 sought |= (FL_THUMB | FL_MODE32);
1104 /* There are no ARM processors that support both APCS-26 and
1105 interworking. Therefore we force FL_MODE26 to be removed
1106 from insn_flags here (if it was set), so that the search
1107 below will always be able to find a compatible processor. */
1108 insn_flags &= ~FL_MODE26;
1111 if (sought != 0 && ((sought & insn_flags) != sought))
1113 /* Try to locate a CPU type that supports all of the abilities
1114 of the default CPU, plus the extra abilities requested by
1115 the user. */
1116 for (sel = all_cores; sel->name != NULL; sel++)
1117 if ((sel->flags & sought) == (sought | insn_flags))
1118 break;
1120 if (sel->name == NULL)
1122 unsigned current_bit_count = 0;
1123 const struct processors * best_fit = NULL;
1125 /* Ideally we would like to issue an error message here
1126 saying that it was not possible to find a CPU compatible
1127 with the default CPU, but which also supports the command
1128 line options specified by the programmer, and so they
1129 ought to use the -mcpu=<name> command line option to
1130 override the default CPU type.
1132 If we cannot find a cpu that has both the
1133 characteristics of the default cpu and the given
1134 command line options we scan the array again looking
1135 for a best match. */
1136 for (sel = all_cores; sel->name != NULL; sel++)
1137 if ((sel->flags & sought) == sought)
1139 unsigned count;
1141 count = bit_count (sel->flags & insn_flags);
1143 if (count >= current_bit_count)
1145 best_fit = sel;
1146 current_bit_count = count;
1150 gcc_assert (best_fit);
1151 sel = best_fit;
1154 insn_flags = sel->flags;
1156 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1157 arm_default_cpu = (enum processor_type) (sel - all_cores);
1158 if (arm_tune == arm_none)
1159 arm_tune = arm_default_cpu;
1162 /* The processor for which we should tune should now have been
1163 chosen. */
1164 gcc_assert (arm_tune != arm_none);
1166 tune_flags = all_cores[(int)arm_tune].flags;
1167 if (optimize_size)
1168 targetm.rtx_costs = arm_size_rtx_costs;
1169 else
1170 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1172 /* Make sure that the processor choice does not conflict with any of the
1173 other command line choices. */
1174 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1175 error ("target CPU does not support ARM mode");
1177 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1179 warning (0, "target CPU does not support interworking" );
1180 target_flags &= ~MASK_INTERWORK;
1183 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1185 warning (0, "target CPU does not support THUMB instructions");
1186 target_flags &= ~MASK_THUMB;
1189 if (TARGET_APCS_FRAME && TARGET_THUMB)
1191 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1192 target_flags &= ~MASK_APCS_FRAME;
1195 /* Callee super interworking implies thumb interworking. Adding
1196 this to the flags here simplifies the logic elsewhere. */
1197 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1198 target_flags |= MASK_INTERWORK;
1200 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1201 from here where no function is being compiled currently. */
1202 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1203 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1205 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1206 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1208 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1209 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1211 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1213 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1214 target_flags |= MASK_APCS_FRAME;
1217 if (TARGET_POKE_FUNCTION_NAME)
1218 target_flags |= MASK_APCS_FRAME;
1220 if (TARGET_APCS_REENT && flag_pic)
1221 error ("-fpic and -mapcs-reent are incompatible");
1223 if (TARGET_APCS_REENT)
1224 warning (0, "APCS reentrant code not supported. Ignored");
1226 /* If this target is normally configured to use APCS frames, warn if they
1227 are turned off and debugging is turned on. */
1228 if (TARGET_ARM
1229 && write_symbols != NO_DEBUG
1230 && !TARGET_APCS_FRAME
1231 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1232 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1234 if (TARGET_APCS_FLOAT)
1235 warning (0, "passing floating point arguments in fp regs not yet supported");
1237 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1238 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1239 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1240 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1241 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1242 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1243 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1244 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1245 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1246 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1247 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1248 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1250 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1251 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1252 thumb_code = (TARGET_ARM == 0);
1253 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1254 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1255 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1256 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1258 /* V5 code we generate is completely interworking capable, so we turn off
1259 TARGET_INTERWORK here to avoid many tests later on. */
1261 /* XXX However, we must pass the right pre-processor defines to CPP
1262 or GLD can get confused. This is a hack. */
1263 if (TARGET_INTERWORK)
1264 arm_cpp_interwork = 1;
1266 if (arm_arch5)
1267 target_flags &= ~MASK_INTERWORK;
1269 if (target_abi_name)
1271 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1273 if (streq (arm_all_abis[i].name, target_abi_name))
1275 arm_abi = arm_all_abis[i].abi_type;
1276 break;
1279 if (i == ARRAY_SIZE (arm_all_abis))
1280 error ("invalid ABI option: -mabi=%s", target_abi_name);
1282 else
1283 arm_abi = ARM_DEFAULT_ABI;
1285 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1286 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1288 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1289 error ("iwmmxt abi requires an iwmmxt capable cpu");
1291 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1292 if (target_fpu_name == NULL && target_fpe_name != NULL)
1294 if (streq (target_fpe_name, "2"))
1295 target_fpu_name = "fpe2";
1296 else if (streq (target_fpe_name, "3"))
1297 target_fpu_name = "fpe3";
1298 else
1299 error ("invalid floating point emulation option: -mfpe=%s",
1300 target_fpe_name);
1302 if (target_fpu_name != NULL)
1304 /* The user specified a FPU. */
1305 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1307 if (streq (all_fpus[i].name, target_fpu_name))
1309 arm_fpu_arch = all_fpus[i].fpu;
1310 arm_fpu_tune = arm_fpu_arch;
1311 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1312 break;
1315 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1316 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1318 else
1320 #ifdef FPUTYPE_DEFAULT
1321 /* Use the default if it is specified for this platform. */
1322 arm_fpu_arch = FPUTYPE_DEFAULT;
1323 arm_fpu_tune = FPUTYPE_DEFAULT;
1324 #else
1325 /* Pick one based on CPU type. */
1326 /* ??? Some targets assume FPA is the default.
1327 if ((insn_flags & FL_VFP) != 0)
1328 arm_fpu_arch = FPUTYPE_VFP;
1329 else
1331 if (arm_arch_cirrus)
1332 arm_fpu_arch = FPUTYPE_MAVERICK;
1333 else
1334 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1335 #endif
1336 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1337 arm_fpu_tune = FPUTYPE_FPA;
1338 else
1339 arm_fpu_tune = arm_fpu_arch;
1340 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1341 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1344 if (target_float_abi_name != NULL)
1346 /* The user specified a FP ABI. */
1347 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1349 if (streq (all_float_abis[i].name, target_float_abi_name))
1351 arm_float_abi = all_float_abis[i].abi_type;
1352 break;
1355 if (i == ARRAY_SIZE (all_float_abis))
1356 error ("invalid floating point abi: -mfloat-abi=%s",
1357 target_float_abi_name);
1359 else
1360 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1362 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1363 sorry ("-mfloat-abi=hard and VFP");
1365 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1366 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1367 will ever exist. GCC makes no attempt to support this combination. */
1368 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1369 sorry ("iWMMXt and hardware floating point");
1371 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1372 if (TARGET_THUMB2 && TARGET_IWMMXT)
1373 sorry ("Thumb-2 iWMMXt");
1375 /* If soft-float is specified then don't use FPU. */
1376 if (TARGET_SOFT_FLOAT)
1377 arm_fpu_arch = FPUTYPE_NONE;
1379 /* For arm2/3 there is no need to do any scheduling if there is only
1380 a floating point emulator, or we are doing software floating-point. */
1381 if ((TARGET_SOFT_FLOAT
1382 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1383 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1384 && (tune_flags & FL_MODE32) == 0)
1385 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1387 if (target_thread_switch)
1389 if (strcmp (target_thread_switch, "soft") == 0)
1390 target_thread_pointer = TP_SOFT;
1391 else if (strcmp (target_thread_switch, "auto") == 0)
1392 target_thread_pointer = TP_AUTO;
1393 else if (strcmp (target_thread_switch, "cp15") == 0)
1394 target_thread_pointer = TP_CP15;
1395 else
1396 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1399 /* Use the cp15 method if it is available. */
1400 if (target_thread_pointer == TP_AUTO)
1402 if (arm_arch6k && !TARGET_THUMB)
1403 target_thread_pointer = TP_CP15;
1404 else
1405 target_thread_pointer = TP_SOFT;
1408 if (TARGET_HARD_TP && TARGET_THUMB1)
1409 error ("can not use -mtp=cp15 with 16-bit Thumb");
1411 /* Override the default structure alignment for AAPCS ABI. */
1412 if (TARGET_AAPCS_BASED)
1413 arm_structure_size_boundary = 8;
1415 if (structure_size_string != NULL)
1417 int size = strtol (structure_size_string, NULL, 0);
1419 if (size == 8 || size == 32
1420 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1421 arm_structure_size_boundary = size;
1422 else
1423 warning (0, "structure size boundary can only be set to %s",
1424 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1427 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1429 error ("RTP PIC is incompatible with Thumb");
1430 flag_pic = 0;
1433 /* If stack checking is disabled, we can use r10 as the PIC register,
1434 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1435 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1437 if (TARGET_VXWORKS_RTP)
1438 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1439 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1442 if (flag_pic && TARGET_VXWORKS_RTP)
1443 arm_pic_register = 9;
1445 if (arm_pic_register_string != NULL)
1447 int pic_register = decode_reg_name (arm_pic_register_string);
1449 if (!flag_pic)
1450 warning (0, "-mpic-register= is useless without -fpic");
1452 /* Prevent the user from choosing an obviously stupid PIC register. */
1453 else if (pic_register < 0 || call_used_regs[pic_register]
1454 || pic_register == HARD_FRAME_POINTER_REGNUM
1455 || pic_register == STACK_POINTER_REGNUM
1456 || pic_register >= PC_REGNUM
1457 || (TARGET_VXWORKS_RTP
1458 && (unsigned int) pic_register != arm_pic_register))
1459 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1460 else
1461 arm_pic_register = pic_register;
1464 /* ??? We might want scheduling for thumb2. */
1465 if (TARGET_THUMB && flag_schedule_insns)
1467 /* Don't warn since it's on by default in -O2. */
1468 flag_schedule_insns = 0;
1471 if (optimize_size)
1473 arm_constant_limit = 1;
1475 /* If optimizing for size, bump the number of instructions that we
1476 are prepared to conditionally execute (even on a StrongARM). */
1477 max_insns_skipped = 6;
1479 else
1481 /* For processors with load scheduling, it never costs more than
1482 2 cycles to load a constant, and the load scheduler may well
1483 reduce that to 1. */
1484 if (arm_ld_sched)
1485 arm_constant_limit = 1;
1487 /* On XScale the longer latency of a load makes it more difficult
1488 to achieve a good schedule, so it's faster to synthesize
1489 constants that can be done in two insns. */
1490 if (arm_tune_xscale)
1491 arm_constant_limit = 2;
1493 /* StrongARM has early execution of branches, so a sequence
1494 that is worth skipping is shorter. */
1495 if (arm_tune_strongarm)
1496 max_insns_skipped = 3;
1499 /* Register global variables with the garbage collector. */
1500 arm_add_gc_roots ();
1503 static void
1504 arm_add_gc_roots (void)
1506 gcc_obstack_init(&minipool_obstack);
1507 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1510 /* A table of known ARM exception types.
1511 For use with the interrupt function attribute. */
1513 typedef struct
1515 const char *const arg;
1516 const unsigned long return_value;
1518 isr_attribute_arg;
1520 static const isr_attribute_arg isr_attribute_args [] =
1522 { "IRQ", ARM_FT_ISR },
1523 { "irq", ARM_FT_ISR },
1524 { "FIQ", ARM_FT_FIQ },
1525 { "fiq", ARM_FT_FIQ },
1526 { "ABORT", ARM_FT_ISR },
1527 { "abort", ARM_FT_ISR },
1528 { "ABORT", ARM_FT_ISR },
1529 { "abort", ARM_FT_ISR },
1530 { "UNDEF", ARM_FT_EXCEPTION },
1531 { "undef", ARM_FT_EXCEPTION },
1532 { "SWI", ARM_FT_EXCEPTION },
1533 { "swi", ARM_FT_EXCEPTION },
1534 { NULL, ARM_FT_NORMAL }
1537 /* Returns the (interrupt) function type of the current
1538 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1540 static unsigned long
1541 arm_isr_value (tree argument)
1543 const isr_attribute_arg * ptr;
1544 const char * arg;
1546 if (!arm_arch_notm)
1547 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1549 /* No argument - default to IRQ. */
1550 if (argument == NULL_TREE)
1551 return ARM_FT_ISR;
1553 /* Get the value of the argument. */
1554 if (TREE_VALUE (argument) == NULL_TREE
1555 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1556 return ARM_FT_UNKNOWN;
1558 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1560 /* Check it against the list of known arguments. */
1561 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1562 if (streq (arg, ptr->arg))
1563 return ptr->return_value;
1565 /* An unrecognized interrupt type. */
1566 return ARM_FT_UNKNOWN;
1569 /* Computes the type of the current function. */
1571 static unsigned long
1572 arm_compute_func_type (void)
1574 unsigned long type = ARM_FT_UNKNOWN;
1575 tree a;
1576 tree attr;
1578 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1580 /* Decide if the current function is volatile. Such functions
1581 never return, and many memory cycles can be saved by not storing
1582 register values that will never be needed again. This optimization
1583 was added to speed up context switching in a kernel application. */
1584 if (optimize > 0
1585 && (TREE_NOTHROW (current_function_decl)
1586 || !(flag_unwind_tables
1587 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1588 && TREE_THIS_VOLATILE (current_function_decl))
1589 type |= ARM_FT_VOLATILE;
1591 if (cfun->static_chain_decl != NULL)
1592 type |= ARM_FT_NESTED;
1594 attr = DECL_ATTRIBUTES (current_function_decl);
1596 a = lookup_attribute ("naked", attr);
1597 if (a != NULL_TREE)
1598 type |= ARM_FT_NAKED;
1600 a = lookup_attribute ("isr", attr);
1601 if (a == NULL_TREE)
1602 a = lookup_attribute ("interrupt", attr);
1604 if (a == NULL_TREE)
1605 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1606 else
1607 type |= arm_isr_value (TREE_VALUE (a));
1609 return type;
1612 /* Returns the type of the current function. */
1614 unsigned long
1615 arm_current_func_type (void)
1617 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1618 cfun->machine->func_type = arm_compute_func_type ();
1620 return cfun->machine->func_type;
1623 /* Return 1 if it is possible to return using a single instruction.
1624 If SIBLING is non-null, this is a test for a return before a sibling
1625 call. SIBLING is the call insn, so we can examine its register usage. */
1628 use_return_insn (int iscond, rtx sibling)
1630 int regno;
1631 unsigned int func_type;
1632 unsigned long saved_int_regs;
1633 unsigned HOST_WIDE_INT stack_adjust;
1634 arm_stack_offsets *offsets;
1636 /* Never use a return instruction before reload has run. */
1637 if (!reload_completed)
1638 return 0;
1640 func_type = arm_current_func_type ();
1642 /* Naked, volatile and stack alignment functions need special
1643 consideration. */
1644 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1645 return 0;
1647 /* So do interrupt functions that use the frame pointer and Thumb
1648 interrupt functions. */
1649 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1650 return 0;
1652 offsets = arm_get_frame_offsets ();
1653 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1655 /* As do variadic functions. */
1656 if (crtl->args.pretend_args_size
1657 || cfun->machine->uses_anonymous_args
1658 /* Or if the function calls __builtin_eh_return () */
1659 || crtl->calls_eh_return
1660 /* Or if the function calls alloca */
1661 || cfun->calls_alloca
1662 /* Or if there is a stack adjustment. However, if the stack pointer
1663 is saved on the stack, we can use a pre-incrementing stack load. */
1664 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
1665 && stack_adjust == 4)))
1666 return 0;
1668 saved_int_regs = offsets->saved_regs_mask;
1670 /* Unfortunately, the insn
1672 ldmib sp, {..., sp, ...}
1674 triggers a bug on most SA-110 based devices, such that the stack
1675 pointer won't be correctly restored if the instruction takes a
1676 page fault. We work around this problem by popping r3 along with
1677 the other registers, since that is never slower than executing
1678 another instruction.
1680 We test for !arm_arch5 here, because code for any architecture
1681 less than this could potentially be run on one of the buggy
1682 chips. */
1683 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1685 /* Validate that r3 is a call-clobbered register (always true in
1686 the default abi) ... */
1687 if (!call_used_regs[3])
1688 return 0;
1690 /* ... that it isn't being used for a return value ... */
1691 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1692 return 0;
1694 /* ... or for a tail-call argument ... */
1695 if (sibling)
1697 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1699 if (find_regno_fusage (sibling, USE, 3))
1700 return 0;
1703 /* ... and that there are no call-saved registers in r0-r2
1704 (always true in the default ABI). */
1705 if (saved_int_regs & 0x7)
1706 return 0;
1709 /* Can't be done if interworking with Thumb, and any registers have been
1710 stacked. */
1711 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1712 return 0;
1714 /* On StrongARM, conditional returns are expensive if they aren't
1715 taken and multiple registers have been stacked. */
1716 if (iscond && arm_tune_strongarm)
1718 /* Conditional return when just the LR is stored is a simple
1719 conditional-load instruction, that's not expensive. */
1720 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1721 return 0;
1723 if (flag_pic
1724 && arm_pic_register != INVALID_REGNUM
1725 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1726 return 0;
1729 /* If there are saved registers but the LR isn't saved, then we need
1730 two instructions for the return. */
1731 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1732 return 0;
1734 /* Can't be done if any of the FPA regs are pushed,
1735 since this also requires an insn. */
1736 if (TARGET_HARD_FLOAT && TARGET_FPA)
1737 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1738 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1739 return 0;
1741 /* Likewise VFP regs. */
1742 if (TARGET_HARD_FLOAT && TARGET_VFP)
1743 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1744 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1745 return 0;
1747 if (TARGET_REALLY_IWMMXT)
1748 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1749 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1750 return 0;
1752 return 1;
1755 /* Return TRUE if int I is a valid immediate ARM constant. */
1758 const_ok_for_arm (HOST_WIDE_INT i)
1760 int lowbit;
1762 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1763 be all zero, or all one. */
1764 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1765 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1766 != ((~(unsigned HOST_WIDE_INT) 0)
1767 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1768 return FALSE;
1770 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1772 /* Fast return for 0 and small values. We must do this for zero, since
1773 the code below can't handle that one case. */
1774 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1775 return TRUE;
1777 /* Get the number of trailing zeros. */
1778 lowbit = ffs((int) i) - 1;
1780 /* Only even shifts are allowed in ARM mode so round down to the
1781 nearest even number. */
1782 if (TARGET_ARM)
1783 lowbit &= ~1;
1785 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1786 return TRUE;
1788 if (TARGET_ARM)
1790 /* Allow rotated constants in ARM mode. */
1791 if (lowbit <= 4
1792 && ((i & ~0xc000003f) == 0
1793 || (i & ~0xf000000f) == 0
1794 || (i & ~0xfc000003) == 0))
1795 return TRUE;
1797 else
1799 HOST_WIDE_INT v;
1801 /* Allow repeated pattern. */
1802 v = i & 0xff;
1803 v |= v << 16;
1804 if (i == v || i == (v | (v << 8)))
1805 return TRUE;
1808 return FALSE;
1811 /* Return true if I is a valid constant for the operation CODE. */
1812 static int
1813 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1815 if (const_ok_for_arm (i))
1816 return 1;
1818 switch (code)
1820 case PLUS:
1821 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1823 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1824 case XOR:
1825 case IOR:
1826 return 0;
1828 case AND:
1829 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1831 default:
1832 gcc_unreachable ();
1836 /* Emit a sequence of insns to handle a large constant.
1837 CODE is the code of the operation required, it can be any of SET, PLUS,
1838 IOR, AND, XOR, MINUS;
1839 MODE is the mode in which the operation is being performed;
1840 VAL is the integer to operate on;
1841 SOURCE is the other operand (a register, or a null-pointer for SET);
1842 SUBTARGETS means it is safe to create scratch registers if that will
1843 either produce a simpler sequence, or we will want to cse the values.
1844 Return value is the number of insns emitted. */
1846 /* ??? Tweak this for thumb2. */
1848 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1849 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1851 rtx cond;
1853 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1854 cond = COND_EXEC_TEST (PATTERN (insn));
1855 else
1856 cond = NULL_RTX;
1858 if (subtargets || code == SET
1859 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1860 && REGNO (target) != REGNO (source)))
1862 /* After arm_reorg has been called, we can't fix up expensive
1863 constants by pushing them into memory so we must synthesize
1864 them in-line, regardless of the cost. This is only likely to
1865 be more costly on chips that have load delay slots and we are
1866 compiling without running the scheduler (so no splitting
1867 occurred before the final instruction emission).
1869 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1871 if (!after_arm_reorg
1872 && !cond
1873 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1874 1, 0)
1875 > arm_constant_limit + (code != SET)))
1877 if (code == SET)
1879 /* Currently SET is the only monadic value for CODE, all
1880 the rest are diadic. */
1881 emit_set_insn (target, GEN_INT (val));
1882 return 1;
1884 else
1886 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1888 emit_set_insn (temp, GEN_INT (val));
1889 /* For MINUS, the value is subtracted from, since we never
1890 have subtraction of a constant. */
1891 if (code == MINUS)
1892 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1893 else
1894 emit_set_insn (target,
1895 gen_rtx_fmt_ee (code, mode, source, temp));
1896 return 2;
1901 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1905 /* Return the number of ARM instructions required to synthesize the given
1906 constant. */
1907 static int
1908 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1910 HOST_WIDE_INT temp1;
1911 int num_insns = 0;
1914 int end;
1916 if (i <= 0)
1917 i += 32;
1918 if (remainder & (3 << (i - 2)))
1920 end = i - 8;
1921 if (end < 0)
1922 end += 32;
1923 temp1 = remainder & ((0x0ff << end)
1924 | ((i < end) ? (0xff >> (32 - end)) : 0));
1925 remainder &= ~temp1;
1926 num_insns++;
1927 i -= 6;
1929 i -= 2;
1930 } while (remainder);
1931 return num_insns;
1934 /* Emit an instruction with the indicated PATTERN. If COND is
1935 non-NULL, conditionalize the execution of the instruction on COND
1936 being true. */
1938 static void
1939 emit_constant_insn (rtx cond, rtx pattern)
1941 if (cond)
1942 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1943 emit_insn (pattern);
1946 /* As above, but extra parameter GENERATE which, if clear, suppresses
1947 RTL generation. */
1948 /* ??? This needs more work for thumb2. */
1950 static int
1951 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1952 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1953 int generate)
1955 int can_invert = 0;
1956 int can_negate = 0;
1957 int can_negate_initial = 0;
1958 int can_shift = 0;
1959 int i;
1960 int num_bits_set = 0;
1961 int set_sign_bit_copies = 0;
1962 int clear_sign_bit_copies = 0;
1963 int clear_zero_bit_copies = 0;
1964 int set_zero_bit_copies = 0;
1965 int insns = 0;
1966 unsigned HOST_WIDE_INT temp1, temp2;
1967 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1969 /* Find out which operations are safe for a given CODE. Also do a quick
1970 check for degenerate cases; these can occur when DImode operations
1971 are split. */
1972 switch (code)
1974 case SET:
1975 can_invert = 1;
1976 can_shift = 1;
1977 can_negate = 1;
1978 break;
1980 case PLUS:
1981 can_negate = 1;
1982 can_negate_initial = 1;
1983 break;
1985 case IOR:
1986 if (remainder == 0xffffffff)
1988 if (generate)
1989 emit_constant_insn (cond,
1990 gen_rtx_SET (VOIDmode, target,
1991 GEN_INT (ARM_SIGN_EXTEND (val))));
1992 return 1;
1994 if (remainder == 0)
1996 if (reload_completed && rtx_equal_p (target, source))
1997 return 0;
1998 if (generate)
1999 emit_constant_insn (cond,
2000 gen_rtx_SET (VOIDmode, target, source));
2001 return 1;
2003 break;
2005 case AND:
2006 if (remainder == 0)
2008 if (generate)
2009 emit_constant_insn (cond,
2010 gen_rtx_SET (VOIDmode, target, const0_rtx));
2011 return 1;
2013 if (remainder == 0xffffffff)
2015 if (reload_completed && rtx_equal_p (target, source))
2016 return 0;
2017 if (generate)
2018 emit_constant_insn (cond,
2019 gen_rtx_SET (VOIDmode, target, source));
2020 return 1;
2022 can_invert = 1;
2023 break;
2025 case XOR:
2026 if (remainder == 0)
2028 if (reload_completed && rtx_equal_p (target, source))
2029 return 0;
2030 if (generate)
2031 emit_constant_insn (cond,
2032 gen_rtx_SET (VOIDmode, target, source));
2033 return 1;
2036 /* We don't know how to handle other cases yet. */
2037 gcc_assert (remainder == 0xffffffff);
2039 if (generate)
2040 emit_constant_insn (cond,
2041 gen_rtx_SET (VOIDmode, target,
2042 gen_rtx_NOT (mode, source)));
2043 return 1;
2045 case MINUS:
2046 /* We treat MINUS as (val - source), since (source - val) is always
2047 passed as (source + (-val)). */
2048 if (remainder == 0)
2050 if (generate)
2051 emit_constant_insn (cond,
2052 gen_rtx_SET (VOIDmode, target,
2053 gen_rtx_NEG (mode, source)));
2054 return 1;
2056 if (const_ok_for_arm (val))
2058 if (generate)
2059 emit_constant_insn (cond,
2060 gen_rtx_SET (VOIDmode, target,
2061 gen_rtx_MINUS (mode, GEN_INT (val),
2062 source)));
2063 return 1;
2065 can_negate = 1;
2067 break;
2069 default:
2070 gcc_unreachable ();
2073 /* If we can do it in one insn get out quickly. */
2074 if (const_ok_for_arm (val)
2075 || (can_negate_initial && const_ok_for_arm (-val))
2076 || (can_invert && const_ok_for_arm (~val)))
2078 if (generate)
2079 emit_constant_insn (cond,
2080 gen_rtx_SET (VOIDmode, target,
2081 (source
2082 ? gen_rtx_fmt_ee (code, mode, source,
2083 GEN_INT (val))
2084 : GEN_INT (val))));
2085 return 1;
2088 /* Calculate a few attributes that may be useful for specific
2089 optimizations. */
2090 for (i = 31; i >= 0; i--)
2092 if ((remainder & (1 << i)) == 0)
2093 clear_sign_bit_copies++;
2094 else
2095 break;
2098 for (i = 31; i >= 0; i--)
2100 if ((remainder & (1 << i)) != 0)
2101 set_sign_bit_copies++;
2102 else
2103 break;
2106 for (i = 0; i <= 31; i++)
2108 if ((remainder & (1 << i)) == 0)
2109 clear_zero_bit_copies++;
2110 else
2111 break;
2114 for (i = 0; i <= 31; i++)
2116 if ((remainder & (1 << i)) != 0)
2117 set_zero_bit_copies++;
2118 else
2119 break;
2122 switch (code)
2124 case SET:
2125 /* See if we can use movw. */
2126 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2128 if (generate)
2129 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2130 GEN_INT (val)));
2131 return 1;
2134 /* See if we can do this by sign_extending a constant that is known
2135 to be negative. This is a good, way of doing it, since the shift
2136 may well merge into a subsequent insn. */
2137 if (set_sign_bit_copies > 1)
2139 if (const_ok_for_arm
2140 (temp1 = ARM_SIGN_EXTEND (remainder
2141 << (set_sign_bit_copies - 1))))
2143 if (generate)
2145 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2146 emit_constant_insn (cond,
2147 gen_rtx_SET (VOIDmode, new_src,
2148 GEN_INT (temp1)));
2149 emit_constant_insn (cond,
2150 gen_ashrsi3 (target, new_src,
2151 GEN_INT (set_sign_bit_copies - 1)));
2153 return 2;
2155 /* For an inverted constant, we will need to set the low bits,
2156 these will be shifted out of harm's way. */
2157 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2158 if (const_ok_for_arm (~temp1))
2160 if (generate)
2162 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2163 emit_constant_insn (cond,
2164 gen_rtx_SET (VOIDmode, new_src,
2165 GEN_INT (temp1)));
2166 emit_constant_insn (cond,
2167 gen_ashrsi3 (target, new_src,
2168 GEN_INT (set_sign_bit_copies - 1)));
2170 return 2;
2174 /* See if we can calculate the value as the difference between two
2175 valid immediates. */
2176 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2178 int topshift = clear_sign_bit_copies & ~1;
2180 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2181 & (0xff000000 >> topshift));
2183 /* If temp1 is zero, then that means the 9 most significant
2184 bits of remainder were 1 and we've caused it to overflow.
2185 When topshift is 0 we don't need to do anything since we
2186 can borrow from 'bit 32'. */
2187 if (temp1 == 0 && topshift != 0)
2188 temp1 = 0x80000000 >> (topshift - 1);
2190 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2192 if (const_ok_for_arm (temp2))
2194 if (generate)
2196 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2197 emit_constant_insn (cond,
2198 gen_rtx_SET (VOIDmode, new_src,
2199 GEN_INT (temp1)));
2200 emit_constant_insn (cond,
2201 gen_addsi3 (target, new_src,
2202 GEN_INT (-temp2)));
2205 return 2;
2209 /* See if we can generate this by setting the bottom (or the top)
2210 16 bits, and then shifting these into the other half of the
2211 word. We only look for the simplest cases, to do more would cost
2212 too much. Be careful, however, not to generate this when the
2213 alternative would take fewer insns. */
2214 if (val & 0xffff0000)
2216 temp1 = remainder & 0xffff0000;
2217 temp2 = remainder & 0x0000ffff;
2219 /* Overlaps outside this range are best done using other methods. */
2220 for (i = 9; i < 24; i++)
2222 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2223 && !const_ok_for_arm (temp2))
2225 rtx new_src = (subtargets
2226 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2227 : target);
2228 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2229 source, subtargets, generate);
2230 source = new_src;
2231 if (generate)
2232 emit_constant_insn
2233 (cond,
2234 gen_rtx_SET
2235 (VOIDmode, target,
2236 gen_rtx_IOR (mode,
2237 gen_rtx_ASHIFT (mode, source,
2238 GEN_INT (i)),
2239 source)));
2240 return insns + 1;
2244 /* Don't duplicate cases already considered. */
2245 for (i = 17; i < 24; i++)
2247 if (((temp1 | (temp1 >> i)) == remainder)
2248 && !const_ok_for_arm (temp1))
2250 rtx new_src = (subtargets
2251 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2252 : target);
2253 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2254 source, subtargets, generate);
2255 source = new_src;
2256 if (generate)
2257 emit_constant_insn
2258 (cond,
2259 gen_rtx_SET (VOIDmode, target,
2260 gen_rtx_IOR
2261 (mode,
2262 gen_rtx_LSHIFTRT (mode, source,
2263 GEN_INT (i)),
2264 source)));
2265 return insns + 1;
2269 break;
2271 case IOR:
2272 case XOR:
2273 /* If we have IOR or XOR, and the constant can be loaded in a
2274 single instruction, and we can find a temporary to put it in,
2275 then this can be done in two instructions instead of 3-4. */
2276 if (subtargets
2277 /* TARGET can't be NULL if SUBTARGETS is 0 */
2278 || (reload_completed && !reg_mentioned_p (target, source)))
2280 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2282 if (generate)
2284 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2286 emit_constant_insn (cond,
2287 gen_rtx_SET (VOIDmode, sub,
2288 GEN_INT (val)));
2289 emit_constant_insn (cond,
2290 gen_rtx_SET (VOIDmode, target,
2291 gen_rtx_fmt_ee (code, mode,
2292 source, sub)));
2294 return 2;
2298 if (code == XOR)
2299 break;
2301 if (set_sign_bit_copies > 8
2302 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2304 if (generate)
2306 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2307 rtx shift = GEN_INT (set_sign_bit_copies);
2309 emit_constant_insn
2310 (cond,
2311 gen_rtx_SET (VOIDmode, sub,
2312 gen_rtx_NOT (mode,
2313 gen_rtx_ASHIFT (mode,
2314 source,
2315 shift))));
2316 emit_constant_insn
2317 (cond,
2318 gen_rtx_SET (VOIDmode, target,
2319 gen_rtx_NOT (mode,
2320 gen_rtx_LSHIFTRT (mode, sub,
2321 shift))));
2323 return 2;
2326 if (set_zero_bit_copies > 8
2327 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2329 if (generate)
2331 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2332 rtx shift = GEN_INT (set_zero_bit_copies);
2334 emit_constant_insn
2335 (cond,
2336 gen_rtx_SET (VOIDmode, sub,
2337 gen_rtx_NOT (mode,
2338 gen_rtx_LSHIFTRT (mode,
2339 source,
2340 shift))));
2341 emit_constant_insn
2342 (cond,
2343 gen_rtx_SET (VOIDmode, target,
2344 gen_rtx_NOT (mode,
2345 gen_rtx_ASHIFT (mode, sub,
2346 shift))));
2348 return 2;
2351 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2353 if (generate)
2355 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2356 emit_constant_insn (cond,
2357 gen_rtx_SET (VOIDmode, sub,
2358 gen_rtx_NOT (mode, source)));
2359 source = sub;
2360 if (subtargets)
2361 sub = gen_reg_rtx (mode);
2362 emit_constant_insn (cond,
2363 gen_rtx_SET (VOIDmode, sub,
2364 gen_rtx_AND (mode, source,
2365 GEN_INT (temp1))));
2366 emit_constant_insn (cond,
2367 gen_rtx_SET (VOIDmode, target,
2368 gen_rtx_NOT (mode, sub)));
2370 return 3;
2372 break;
2374 case AND:
2375 /* See if two shifts will do 2 or more insn's worth of work. */
2376 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2378 HOST_WIDE_INT shift_mask = ((0xffffffff
2379 << (32 - clear_sign_bit_copies))
2380 & 0xffffffff);
2382 if ((remainder | shift_mask) != 0xffffffff)
2384 if (generate)
2386 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2387 insns = arm_gen_constant (AND, mode, cond,
2388 remainder | shift_mask,
2389 new_src, source, subtargets, 1);
2390 source = new_src;
2392 else
2394 rtx targ = subtargets ? NULL_RTX : target;
2395 insns = arm_gen_constant (AND, mode, cond,
2396 remainder | shift_mask,
2397 targ, source, subtargets, 0);
2401 if (generate)
2403 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2404 rtx shift = GEN_INT (clear_sign_bit_copies);
2406 emit_insn (gen_ashlsi3 (new_src, source, shift));
2407 emit_insn (gen_lshrsi3 (target, new_src, shift));
2410 return insns + 2;
2413 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2415 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2417 if ((remainder | shift_mask) != 0xffffffff)
2419 if (generate)
2421 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2423 insns = arm_gen_constant (AND, mode, cond,
2424 remainder | shift_mask,
2425 new_src, source, subtargets, 1);
2426 source = new_src;
2428 else
2430 rtx targ = subtargets ? NULL_RTX : target;
2432 insns = arm_gen_constant (AND, mode, cond,
2433 remainder | shift_mask,
2434 targ, source, subtargets, 0);
2438 if (generate)
2440 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2441 rtx shift = GEN_INT (clear_zero_bit_copies);
2443 emit_insn (gen_lshrsi3 (new_src, source, shift));
2444 emit_insn (gen_ashlsi3 (target, new_src, shift));
2447 return insns + 2;
2450 break;
2452 default:
2453 break;
2456 for (i = 0; i < 32; i++)
2457 if (remainder & (1 << i))
2458 num_bits_set++;
2460 if (code == AND || (can_invert && num_bits_set > 16))
2461 remainder = (~remainder) & 0xffffffff;
2462 else if (code == PLUS && num_bits_set > 16)
2463 remainder = (-remainder) & 0xffffffff;
2464 else
2466 can_invert = 0;
2467 can_negate = 0;
2470 /* Now try and find a way of doing the job in either two or three
2471 instructions.
2472 We start by looking for the largest block of zeros that are aligned on
2473 a 2-bit boundary, we then fill up the temps, wrapping around to the
2474 top of the word when we drop off the bottom.
2475 In the worst case this code should produce no more than four insns.
2476 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2477 best place to start. */
2479 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2480 the same. */
2482 int best_start = 0;
2483 if (!TARGET_THUMB2)
2485 int best_consecutive_zeros = 0;
2487 for (i = 0; i < 32; i += 2)
2489 int consecutive_zeros = 0;
2491 if (!(remainder & (3 << i)))
2493 while ((i < 32) && !(remainder & (3 << i)))
2495 consecutive_zeros += 2;
2496 i += 2;
2498 if (consecutive_zeros > best_consecutive_zeros)
2500 best_consecutive_zeros = consecutive_zeros;
2501 best_start = i - consecutive_zeros;
2503 i -= 2;
2507 /* So long as it won't require any more insns to do so, it's
2508 desirable to emit a small constant (in bits 0...9) in the last
2509 insn. This way there is more chance that it can be combined with
2510 a later addressing insn to form a pre-indexed load or store
2511 operation. Consider:
2513 *((volatile int *)0xe0000100) = 1;
2514 *((volatile int *)0xe0000110) = 2;
2516 We want this to wind up as:
2518 mov rA, #0xe0000000
2519 mov rB, #1
2520 str rB, [rA, #0x100]
2521 mov rB, #2
2522 str rB, [rA, #0x110]
2524 rather than having to synthesize both large constants from scratch.
2526 Therefore, we calculate how many insns would be required to emit
2527 the constant starting from `best_start', and also starting from
2528 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2529 yield a shorter sequence, we may as well use zero. */
2530 if (best_start != 0
2531 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2532 && (count_insns_for_constant (remainder, 0) <=
2533 count_insns_for_constant (remainder, best_start)))
2534 best_start = 0;
2537 /* Now start emitting the insns. */
2538 i = best_start;
2541 int end;
2543 if (i <= 0)
2544 i += 32;
2545 if (remainder & (3 << (i - 2)))
2547 end = i - 8;
2548 if (end < 0)
2549 end += 32;
2550 temp1 = remainder & ((0x0ff << end)
2551 | ((i < end) ? (0xff >> (32 - end)) : 0));
2552 remainder &= ~temp1;
2554 if (generate)
2556 rtx new_src, temp1_rtx;
2558 if (code == SET || code == MINUS)
2560 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2561 if (can_invert && code != MINUS)
2562 temp1 = ~temp1;
2564 else
2566 if (remainder && subtargets)
2567 new_src = gen_reg_rtx (mode);
2568 else
2569 new_src = target;
2570 if (can_invert)
2571 temp1 = ~temp1;
2572 else if (can_negate)
2573 temp1 = -temp1;
2576 temp1 = trunc_int_for_mode (temp1, mode);
2577 temp1_rtx = GEN_INT (temp1);
2579 if (code == SET)
2581 else if (code == MINUS)
2582 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2583 else
2584 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2586 emit_constant_insn (cond,
2587 gen_rtx_SET (VOIDmode, new_src,
2588 temp1_rtx));
2589 source = new_src;
2592 if (code == SET)
2594 can_invert = 0;
2595 code = PLUS;
2597 else if (code == MINUS)
2598 code = PLUS;
2600 insns++;
2601 if (TARGET_ARM)
2602 i -= 6;
2603 else
2604 i -= 7;
2606 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2607 shifts. */
2608 if (TARGET_ARM)
2609 i -= 2;
2610 else
2611 i--;
2613 while (remainder);
2616 return insns;
2619 /* Canonicalize a comparison so that we are more likely to recognize it.
2620 This can be done for a few constant compares, where we can make the
2621 immediate value easier to load. */
2623 enum rtx_code
2624 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2625 rtx * op1)
2627 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2628 unsigned HOST_WIDE_INT maxval;
2629 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2631 switch (code)
2633 case EQ:
2634 case NE:
2635 return code;
2637 case GT:
2638 case LE:
2639 if (i != maxval
2640 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2642 *op1 = GEN_INT (i + 1);
2643 return code == GT ? GE : LT;
2645 break;
2647 case GE:
2648 case LT:
2649 if (i != ~maxval
2650 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2652 *op1 = GEN_INT (i - 1);
2653 return code == GE ? GT : LE;
2655 break;
2657 case GTU:
2658 case LEU:
2659 if (i != ~((unsigned HOST_WIDE_INT) 0)
2660 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2662 *op1 = GEN_INT (i + 1);
2663 return code == GTU ? GEU : LTU;
2665 break;
2667 case GEU:
2668 case LTU:
2669 if (i != 0
2670 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2672 *op1 = GEN_INT (i - 1);
2673 return code == GEU ? GTU : LEU;
2675 break;
2677 default:
2678 gcc_unreachable ();
2681 return code;
2685 /* Define how to find the value returned by a function. */
2688 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
2690 enum machine_mode mode;
2691 int unsignedp ATTRIBUTE_UNUSED;
2692 rtx r ATTRIBUTE_UNUSED;
2694 mode = TYPE_MODE (type);
2695 /* Promote integer types. */
2696 if (INTEGRAL_TYPE_P (type))
2697 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2699 /* Promotes small structs returned in a register to full-word size
2700 for big-endian AAPCS. */
2701 if (arm_return_in_msb (type))
2703 HOST_WIDE_INT size = int_size_in_bytes (type);
2704 if (size % UNITS_PER_WORD != 0)
2706 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2707 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2711 return LIBCALL_VALUE(mode);
2714 /* Determine the amount of memory needed to store the possible return
2715 registers of an untyped call. */
2717 arm_apply_result_size (void)
2719 int size = 16;
2721 if (TARGET_ARM)
2723 if (TARGET_HARD_FLOAT_ABI)
2725 if (TARGET_FPA)
2726 size += 12;
2727 if (TARGET_MAVERICK)
2728 size += 8;
2730 if (TARGET_IWMMXT_ABI)
2731 size += 8;
2734 return size;
2737 /* Decide whether a type should be returned in memory (true)
2738 or in a register (false). This is called by the macro
2739 RETURN_IN_MEMORY. */
2741 arm_return_in_memory (const_tree type)
2743 HOST_WIDE_INT size;
2745 size = int_size_in_bytes (type);
2747 /* Vector values should be returned using ARM registers, not memory (unless
2748 they're over 16 bytes, which will break since we only have four
2749 call-clobbered registers to play with). */
2750 if (TREE_CODE (type) == VECTOR_TYPE)
2751 return (size < 0 || size > (4 * UNITS_PER_WORD));
2753 if (!AGGREGATE_TYPE_P (type) &&
2754 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2755 /* All simple types are returned in registers.
2756 For AAPCS, complex types are treated the same as aggregates. */
2757 return 0;
2759 if (arm_abi != ARM_ABI_APCS)
2761 /* ATPCS and later return aggregate types in memory only if they are
2762 larger than a word (or are variable size). */
2763 return (size < 0 || size > UNITS_PER_WORD);
2766 /* For the arm-wince targets we choose to be compatible with Microsoft's
2767 ARM and Thumb compilers, which always return aggregates in memory. */
2768 #ifndef ARM_WINCE
2769 /* All structures/unions bigger than one word are returned in memory.
2770 Also catch the case where int_size_in_bytes returns -1. In this case
2771 the aggregate is either huge or of variable size, and in either case
2772 we will want to return it via memory and not in a register. */
2773 if (size < 0 || size > UNITS_PER_WORD)
2774 return 1;
2776 if (TREE_CODE (type) == RECORD_TYPE)
2778 tree field;
2780 /* For a struct the APCS says that we only return in a register
2781 if the type is 'integer like' and every addressable element
2782 has an offset of zero. For practical purposes this means
2783 that the structure can have at most one non bit-field element
2784 and that this element must be the first one in the structure. */
2786 /* Find the first field, ignoring non FIELD_DECL things which will
2787 have been created by C++. */
2788 for (field = TYPE_FIELDS (type);
2789 field && TREE_CODE (field) != FIELD_DECL;
2790 field = TREE_CHAIN (field))
2791 continue;
2793 if (field == NULL)
2794 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2796 /* Check that the first field is valid for returning in a register. */
2798 /* ... Floats are not allowed */
2799 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2800 return 1;
2802 /* ... Aggregates that are not themselves valid for returning in
2803 a register are not allowed. */
2804 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2805 return 1;
2807 /* Now check the remaining fields, if any. Only bitfields are allowed,
2808 since they are not addressable. */
2809 for (field = TREE_CHAIN (field);
2810 field;
2811 field = TREE_CHAIN (field))
2813 if (TREE_CODE (field) != FIELD_DECL)
2814 continue;
2816 if (!DECL_BIT_FIELD_TYPE (field))
2817 return 1;
2820 return 0;
2823 if (TREE_CODE (type) == UNION_TYPE)
2825 tree field;
2827 /* Unions can be returned in registers if every element is
2828 integral, or can be returned in an integer register. */
2829 for (field = TYPE_FIELDS (type);
2830 field;
2831 field = TREE_CHAIN (field))
2833 if (TREE_CODE (field) != FIELD_DECL)
2834 continue;
2836 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2837 return 1;
2839 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2840 return 1;
2843 return 0;
2845 #endif /* not ARM_WINCE */
2847 /* Return all other types in memory. */
2848 return 1;
2851 /* Indicate whether or not words of a double are in big-endian order. */
2854 arm_float_words_big_endian (void)
2856 if (TARGET_MAVERICK)
2857 return 0;
2859 /* For FPA, float words are always big-endian. For VFP, floats words
2860 follow the memory system mode. */
2862 if (TARGET_FPA)
2864 return 1;
2867 if (TARGET_VFP)
2868 return (TARGET_BIG_END ? 1 : 0);
2870 return 1;
2873 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2874 for a call to a function whose data type is FNTYPE.
2875 For a library call, FNTYPE is NULL. */
2876 void
2877 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2878 rtx libname ATTRIBUTE_UNUSED,
2879 tree fndecl ATTRIBUTE_UNUSED)
2881 /* On the ARM, the offset starts at 0. */
2882 pcum->nregs = 0;
2883 pcum->iwmmxt_nregs = 0;
2884 pcum->can_split = true;
2886 /* Varargs vectors are treated the same as long long.
2887 named_count avoids having to change the way arm handles 'named' */
2888 pcum->named_count = 0;
2889 pcum->nargs = 0;
2891 if (TARGET_REALLY_IWMMXT && fntype)
2893 tree fn_arg;
2895 for (fn_arg = TYPE_ARG_TYPES (fntype);
2896 fn_arg;
2897 fn_arg = TREE_CHAIN (fn_arg))
2898 pcum->named_count += 1;
2900 if (! pcum->named_count)
2901 pcum->named_count = INT_MAX;
2906 /* Return true if mode/type need doubleword alignment. */
2907 bool
2908 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2910 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2911 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2915 /* Determine where to put an argument to a function.
2916 Value is zero to push the argument on the stack,
2917 or a hard register in which to store the argument.
2919 MODE is the argument's machine mode.
2920 TYPE is the data type of the argument (as a tree).
2921 This is null for libcalls where that information may
2922 not be available.
2923 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2924 the preceding args and about the function being called.
2925 NAMED is nonzero if this argument is a named parameter
2926 (otherwise it is an extra parameter matching an ellipsis). */
2929 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2930 tree type, int named)
2932 int nregs;
2934 /* Varargs vectors are treated the same as long long.
2935 named_count avoids having to change the way arm handles 'named' */
2936 if (TARGET_IWMMXT_ABI
2937 && arm_vector_mode_supported_p (mode)
2938 && pcum->named_count > pcum->nargs + 1)
2940 if (pcum->iwmmxt_nregs <= 9)
2941 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2942 else
2944 pcum->can_split = false;
2945 return NULL_RTX;
2949 /* Put doubleword aligned quantities in even register pairs. */
2950 if (pcum->nregs & 1
2951 && ARM_DOUBLEWORD_ALIGN
2952 && arm_needs_doubleword_align (mode, type))
2953 pcum->nregs++;
2955 if (mode == VOIDmode)
2956 /* Pick an arbitrary value for operand 2 of the call insn. */
2957 return const0_rtx;
2959 /* Only allow splitting an arg between regs and memory if all preceding
2960 args were allocated to regs. For args passed by reference we only count
2961 the reference pointer. */
2962 if (pcum->can_split)
2963 nregs = 1;
2964 else
2965 nregs = ARM_NUM_REGS2 (mode, type);
2967 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2968 return NULL_RTX;
2970 return gen_rtx_REG (mode, pcum->nregs);
2973 static int
2974 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2975 tree type, bool named ATTRIBUTE_UNUSED)
2977 int nregs = pcum->nregs;
2979 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
2980 return 0;
2982 if (NUM_ARG_REGS > nregs
2983 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2984 && pcum->can_split)
2985 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2987 return 0;
2990 /* Variable sized types are passed by reference. This is a GCC
2991 extension to the ARM ABI. */
2993 static bool
2994 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2995 enum machine_mode mode ATTRIBUTE_UNUSED,
2996 const_tree type, bool named ATTRIBUTE_UNUSED)
2998 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3001 /* Encode the current state of the #pragma [no_]long_calls. */
3002 typedef enum
3004 OFF, /* No #pragma [no_]long_calls is in effect. */
3005 LONG, /* #pragma long_calls is in effect. */
3006 SHORT /* #pragma no_long_calls is in effect. */
3007 } arm_pragma_enum;
3009 static arm_pragma_enum arm_pragma_long_calls = OFF;
3011 void
3012 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3014 arm_pragma_long_calls = LONG;
3017 void
3018 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3020 arm_pragma_long_calls = SHORT;
3023 void
3024 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3026 arm_pragma_long_calls = OFF;
3029 /* Table of machine attributes. */
3030 const struct attribute_spec arm_attribute_table[] =
3032 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3033 /* Function calls made to this symbol must be done indirectly, because
3034 it may lie outside of the 26 bit addressing range of a normal function
3035 call. */
3036 { "long_call", 0, 0, false, true, true, NULL },
3037 /* Whereas these functions are always known to reside within the 26 bit
3038 addressing range. */
3039 { "short_call", 0, 0, false, true, true, NULL },
3040 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3041 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3042 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3043 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3044 #ifdef ARM_PE
3045 /* ARM/PE has three new attributes:
3046 interfacearm - ?
3047 dllexport - for exporting a function/variable that will live in a dll
3048 dllimport - for importing a function/variable from a dll
3050 Microsoft allows multiple declspecs in one __declspec, separating
3051 them with spaces. We do NOT support this. Instead, use __declspec
3052 multiple times.
3054 { "dllimport", 0, 0, true, false, false, NULL },
3055 { "dllexport", 0, 0, true, false, false, NULL },
3056 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3057 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3058 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3059 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3060 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3061 #endif
3062 { NULL, 0, 0, false, false, false, NULL }
3065 /* Handle an attribute requiring a FUNCTION_DECL;
3066 arguments as in struct attribute_spec.handler. */
3067 static tree
3068 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3069 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3071 if (TREE_CODE (*node) != FUNCTION_DECL)
3073 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3074 IDENTIFIER_POINTER (name));
3075 *no_add_attrs = true;
3078 return NULL_TREE;
3081 /* Handle an "interrupt" or "isr" attribute;
3082 arguments as in struct attribute_spec.handler. */
3083 static tree
3084 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3085 bool *no_add_attrs)
3087 if (DECL_P (*node))
3089 if (TREE_CODE (*node) != FUNCTION_DECL)
3091 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3092 IDENTIFIER_POINTER (name));
3093 *no_add_attrs = true;
3095 /* FIXME: the argument if any is checked for type attributes;
3096 should it be checked for decl ones? */
3098 else
3100 if (TREE_CODE (*node) == FUNCTION_TYPE
3101 || TREE_CODE (*node) == METHOD_TYPE)
3103 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3105 warning (OPT_Wattributes, "%qs attribute ignored",
3106 IDENTIFIER_POINTER (name));
3107 *no_add_attrs = true;
3110 else if (TREE_CODE (*node) == POINTER_TYPE
3111 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3112 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3113 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3115 *node = build_variant_type_copy (*node);
3116 TREE_TYPE (*node) = build_type_attribute_variant
3117 (TREE_TYPE (*node),
3118 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3119 *no_add_attrs = true;
3121 else
3123 /* Possibly pass this attribute on from the type to a decl. */
3124 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3125 | (int) ATTR_FLAG_FUNCTION_NEXT
3126 | (int) ATTR_FLAG_ARRAY_NEXT))
3128 *no_add_attrs = true;
3129 return tree_cons (name, args, NULL_TREE);
3131 else
3133 warning (OPT_Wattributes, "%qs attribute ignored",
3134 IDENTIFIER_POINTER (name));
3139 return NULL_TREE;
3142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3143 /* Handle the "notshared" attribute. This attribute is another way of
3144 requesting hidden visibility. ARM's compiler supports
3145 "__declspec(notshared)"; we support the same thing via an
3146 attribute. */
3148 static tree
3149 arm_handle_notshared_attribute (tree *node,
3150 tree name ATTRIBUTE_UNUSED,
3151 tree args ATTRIBUTE_UNUSED,
3152 int flags ATTRIBUTE_UNUSED,
3153 bool *no_add_attrs)
3155 tree decl = TYPE_NAME (*node);
3157 if (decl)
3159 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3160 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3161 *no_add_attrs = false;
3163 return NULL_TREE;
3165 #endif
3167 /* Return 0 if the attributes for two types are incompatible, 1 if they
3168 are compatible, and 2 if they are nearly compatible (which causes a
3169 warning to be generated). */
3170 static int
3171 arm_comp_type_attributes (const_tree type1, const_tree type2)
3173 int l1, l2, s1, s2;
3175 /* Check for mismatch of non-default calling convention. */
3176 if (TREE_CODE (type1) != FUNCTION_TYPE)
3177 return 1;
3179 /* Check for mismatched call attributes. */
3180 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3181 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3182 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3183 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3185 /* Only bother to check if an attribute is defined. */
3186 if (l1 | l2 | s1 | s2)
3188 /* If one type has an attribute, the other must have the same attribute. */
3189 if ((l1 != l2) || (s1 != s2))
3190 return 0;
3192 /* Disallow mixed attributes. */
3193 if ((l1 & s2) || (l2 & s1))
3194 return 0;
3197 /* Check for mismatched ISR attribute. */
3198 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3199 if (! l1)
3200 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3201 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3202 if (! l2)
3203 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3204 if (l1 != l2)
3205 return 0;
3207 return 1;
3210 /* Assigns default attributes to newly defined type. This is used to
3211 set short_call/long_call attributes for function types of
3212 functions defined inside corresponding #pragma scopes. */
3213 static void
3214 arm_set_default_type_attributes (tree type)
3216 /* Add __attribute__ ((long_call)) to all functions, when
3217 inside #pragma long_calls or __attribute__ ((short_call)),
3218 when inside #pragma no_long_calls. */
3219 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3221 tree type_attr_list, attr_name;
3222 type_attr_list = TYPE_ATTRIBUTES (type);
3224 if (arm_pragma_long_calls == LONG)
3225 attr_name = get_identifier ("long_call");
3226 else if (arm_pragma_long_calls == SHORT)
3227 attr_name = get_identifier ("short_call");
3228 else
3229 return;
3231 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3232 TYPE_ATTRIBUTES (type) = type_attr_list;
3236 /* Return true if DECL is known to be linked into section SECTION. */
3238 static bool
3239 arm_function_in_section_p (tree decl, section *section)
3241 /* We can only be certain about functions defined in the same
3242 compilation unit. */
3243 if (!TREE_STATIC (decl))
3244 return false;
3246 /* Make sure that SYMBOL always binds to the definition in this
3247 compilation unit. */
3248 if (!targetm.binds_local_p (decl))
3249 return false;
3251 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3252 if (!DECL_SECTION_NAME (decl))
3254 /* Only cater for unit-at-a-time mode, where we know that the user
3255 cannot later specify a section for DECL. */
3256 if (!flag_unit_at_a_time)
3257 return false;
3259 /* Make sure that we will not create a unique section for DECL. */
3260 if (flag_function_sections || DECL_ONE_ONLY (decl))
3261 return false;
3264 return function_section (decl) == section;
3267 /* Return nonzero if a 32-bit "long_call" should be generated for
3268 a call from the current function to DECL. We generate a long_call
3269 if the function:
3271 a. has an __attribute__((long call))
3272 or b. is within the scope of a #pragma long_calls
3273 or c. the -mlong-calls command line switch has been specified
3275 However we do not generate a long call if the function:
3277 d. has an __attribute__ ((short_call))
3278 or e. is inside the scope of a #pragma no_long_calls
3279 or f. is defined in the same section as the current function. */
3281 bool
3282 arm_is_long_call_p (tree decl)
3284 tree attrs;
3286 if (!decl)
3287 return TARGET_LONG_CALLS;
3289 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3290 if (lookup_attribute ("short_call", attrs))
3291 return false;
3293 /* For "f", be conservative, and only cater for cases in which the
3294 whole of the current function is placed in the same section. */
3295 if (!flag_reorder_blocks_and_partition
3296 && arm_function_in_section_p (decl, current_function_section ()))
3297 return false;
3299 if (lookup_attribute ("long_call", attrs))
3300 return true;
3302 return TARGET_LONG_CALLS;
3305 /* Return nonzero if it is ok to make a tail-call to DECL. */
3306 static bool
3307 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3309 unsigned long func_type;
3311 if (cfun->machine->sibcall_blocked)
3312 return false;
3314 /* Never tailcall something for which we have no decl, or if we
3315 are in Thumb mode. */
3316 if (decl == NULL || TARGET_THUMB)
3317 return false;
3319 /* The PIC register is live on entry to VxWorks PLT entries, so we
3320 must make the call before restoring the PIC register. */
3321 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3322 return false;
3324 /* Cannot tail-call to long calls, since these are out of range of
3325 a branch instruction. */
3326 if (arm_is_long_call_p (decl))
3327 return false;
3329 /* If we are interworking and the function is not declared static
3330 then we can't tail-call it unless we know that it exists in this
3331 compilation unit (since it might be a Thumb routine). */
3332 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3333 return false;
3335 func_type = arm_current_func_type ();
3336 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3337 if (IS_INTERRUPT (func_type))
3338 return false;
3340 /* Never tailcall if function may be called with a misaligned SP. */
3341 if (IS_STACKALIGN (func_type))
3342 return false;
3344 /* Everything else is ok. */
3345 return true;
3349 /* Addressing mode support functions. */
3351 /* Return nonzero if X is a legitimate immediate operand when compiling
3352 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3354 legitimate_pic_operand_p (rtx x)
3356 if (GET_CODE (x) == SYMBOL_REF
3357 || (GET_CODE (x) == CONST
3358 && GET_CODE (XEXP (x, 0)) == PLUS
3359 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3360 return 0;
3362 return 1;
3365 /* Record that the current function needs a PIC register. Initialize
3366 cfun->machine->pic_reg if we have not already done so. */
3368 static void
3369 require_pic_register (void)
3371 /* A lot of the logic here is made obscure by the fact that this
3372 routine gets called as part of the rtx cost estimation process.
3373 We don't want those calls to affect any assumptions about the real
3374 function; and further, we can't call entry_of_function() until we
3375 start the real expansion process. */
3376 if (!crtl->uses_pic_offset_table)
3378 gcc_assert (can_create_pseudo_p ());
3379 if (arm_pic_register != INVALID_REGNUM)
3381 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3383 /* Play games to avoid marking the function as needing pic
3384 if we are being called as part of the cost-estimation
3385 process. */
3386 if (current_ir_type () != IR_GIMPLE)
3387 crtl->uses_pic_offset_table = 1;
3389 else
3391 rtx seq;
3393 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3395 /* Play games to avoid marking the function as needing pic
3396 if we are being called as part of the cost-estimation
3397 process. */
3398 if (current_ir_type () != IR_GIMPLE)
3400 crtl->uses_pic_offset_table = 1;
3401 start_sequence ();
3403 arm_load_pic_register (0UL);
3405 seq = get_insns ();
3406 end_sequence ();
3407 emit_insn_after (seq, entry_of_function ());
3414 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3416 if (GET_CODE (orig) == SYMBOL_REF
3417 || GET_CODE (orig) == LABEL_REF)
3419 rtx pic_ref, address;
3420 rtx insn;
3421 int subregs = 0;
3423 /* If this function doesn't have a pic register, create one now. */
3424 require_pic_register ();
3426 if (reg == 0)
3428 gcc_assert (can_create_pseudo_p ());
3429 reg = gen_reg_rtx (Pmode);
3431 subregs = 1;
3434 if (subregs)
3435 address = gen_reg_rtx (Pmode);
3436 else
3437 address = reg;
3439 if (TARGET_ARM)
3440 emit_insn (gen_pic_load_addr_arm (address, orig));
3441 else if (TARGET_THUMB2)
3442 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3443 else /* TARGET_THUMB1 */
3444 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3446 /* VxWorks does not impose a fixed gap between segments; the run-time
3447 gap can be different from the object-file gap. We therefore can't
3448 use GOTOFF unless we are absolutely sure that the symbol is in the
3449 same segment as the GOT. Unfortunately, the flexibility of linker
3450 scripts means that we can't be sure of that in general, so assume
3451 that GOTOFF is never valid on VxWorks. */
3452 if ((GET_CODE (orig) == LABEL_REF
3453 || (GET_CODE (orig) == SYMBOL_REF &&
3454 SYMBOL_REF_LOCAL_P (orig)))
3455 && NEED_GOT_RELOC
3456 && !TARGET_VXWORKS_RTP)
3457 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3458 else
3460 pic_ref = gen_const_mem (Pmode,
3461 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3462 address));
3465 insn = emit_move_insn (reg, pic_ref);
3467 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3468 by loop. */
3469 set_unique_reg_note (insn, REG_EQUAL, orig);
3471 return reg;
3473 else if (GET_CODE (orig) == CONST)
3475 rtx base, offset;
3477 if (GET_CODE (XEXP (orig, 0)) == PLUS
3478 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3479 return orig;
3481 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3482 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3483 return orig;
3485 if (reg == 0)
3487 gcc_assert (can_create_pseudo_p ());
3488 reg = gen_reg_rtx (Pmode);
3491 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3493 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3494 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3495 base == reg ? 0 : reg);
3497 if (GET_CODE (offset) == CONST_INT)
3499 /* The base register doesn't really matter, we only want to
3500 test the index for the appropriate mode. */
3501 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3503 gcc_assert (can_create_pseudo_p ());
3504 offset = force_reg (Pmode, offset);
3507 if (GET_CODE (offset) == CONST_INT)
3508 return plus_constant (base, INTVAL (offset));
3511 if (GET_MODE_SIZE (mode) > 4
3512 && (GET_MODE_CLASS (mode) == MODE_INT
3513 || TARGET_SOFT_FLOAT))
3515 emit_insn (gen_addsi3 (reg, base, offset));
3516 return reg;
3519 return gen_rtx_PLUS (Pmode, base, offset);
3522 return orig;
3526 /* Find a spare register to use during the prolog of a function. */
3528 static int
3529 thumb_find_work_register (unsigned long pushed_regs_mask)
3531 int reg;
3533 /* Check the argument registers first as these are call-used. The
3534 register allocation order means that sometimes r3 might be used
3535 but earlier argument registers might not, so check them all. */
3536 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3537 if (!df_regs_ever_live_p (reg))
3538 return reg;
3540 /* Before going on to check the call-saved registers we can try a couple
3541 more ways of deducing that r3 is available. The first is when we are
3542 pushing anonymous arguments onto the stack and we have less than 4
3543 registers worth of fixed arguments(*). In this case r3 will be part of
3544 the variable argument list and so we can be sure that it will be
3545 pushed right at the start of the function. Hence it will be available
3546 for the rest of the prologue.
3547 (*): ie crtl->args.pretend_args_size is greater than 0. */
3548 if (cfun->machine->uses_anonymous_args
3549 && crtl->args.pretend_args_size > 0)
3550 return LAST_ARG_REGNUM;
3552 /* The other case is when we have fixed arguments but less than 4 registers
3553 worth. In this case r3 might be used in the body of the function, but
3554 it is not being used to convey an argument into the function. In theory
3555 we could just check crtl->args.size to see how many bytes are
3556 being passed in argument registers, but it seems that it is unreliable.
3557 Sometimes it will have the value 0 when in fact arguments are being
3558 passed. (See testcase execute/20021111-1.c for an example). So we also
3559 check the args_info.nregs field as well. The problem with this field is
3560 that it makes no allowances for arguments that are passed to the
3561 function but which are not used. Hence we could miss an opportunity
3562 when a function has an unused argument in r3. But it is better to be
3563 safe than to be sorry. */
3564 if (! cfun->machine->uses_anonymous_args
3565 && crtl->args.size >= 0
3566 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3567 && crtl->args.info.nregs < 4)
3568 return LAST_ARG_REGNUM;
3570 /* Otherwise look for a call-saved register that is going to be pushed. */
3571 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3572 if (pushed_regs_mask & (1 << reg))
3573 return reg;
3575 if (TARGET_THUMB2)
3577 /* Thumb-2 can use high regs. */
3578 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3579 if (pushed_regs_mask & (1 << reg))
3580 return reg;
3582 /* Something went wrong - thumb_compute_save_reg_mask()
3583 should have arranged for a suitable register to be pushed. */
3584 gcc_unreachable ();
3587 static GTY(()) int pic_labelno;
3589 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3590 low register. */
3592 void
3593 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3595 rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
3596 rtx global_offset_table;
3598 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3599 return;
3601 gcc_assert (flag_pic);
3603 pic_reg = cfun->machine->pic_reg;
3604 if (TARGET_VXWORKS_RTP)
3606 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3607 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3608 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3610 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3612 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3613 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3615 else
3617 /* We use an UNSPEC rather than a LABEL_REF because this label
3618 never appears in the code stream. */
3620 labelno = GEN_INT (pic_labelno++);
3621 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3622 l1 = gen_rtx_CONST (VOIDmode, l1);
3624 global_offset_table
3625 = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3626 /* On the ARM the PC register contains 'dot + 8' at the time of the
3627 addition, on the Thumb it is 'dot + 4'. */
3628 pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3629 if (GOT_PCREL)
3631 pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
3632 pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
3634 else
3635 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3637 pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
3638 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3640 if (TARGET_ARM)
3642 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3643 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3645 else if (TARGET_THUMB2)
3647 /* Thumb-2 only allows very limited access to the PC. Calculate the
3648 address in a temporary register. */
3649 if (arm_pic_register != INVALID_REGNUM)
3651 pic_tmp = gen_rtx_REG (SImode,
3652 thumb_find_work_register (saved_regs));
3654 else
3656 gcc_assert (can_create_pseudo_p ());
3657 pic_tmp = gen_reg_rtx (Pmode);
3660 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3661 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3662 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3664 else /* TARGET_THUMB1 */
3666 if (arm_pic_register != INVALID_REGNUM
3667 && REGNO (pic_reg) > LAST_LO_REGNUM)
3669 /* We will have pushed the pic register, so we should always be
3670 able to find a work register. */
3671 pic_tmp = gen_rtx_REG (SImode,
3672 thumb_find_work_register (saved_regs));
3673 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3674 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3676 else
3677 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3678 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3682 /* Need to emit this whether or not we obey regdecls,
3683 since setjmp/longjmp can cause life info to screw up. */
3684 emit_insn (gen_rtx_USE (VOIDmode, pic_reg));
3688 /* Return nonzero if X is valid as an ARM state addressing register. */
3689 static int
3690 arm_address_register_rtx_p (rtx x, int strict_p)
3692 int regno;
3694 if (GET_CODE (x) != REG)
3695 return 0;
3697 regno = REGNO (x);
3699 if (strict_p)
3700 return ARM_REGNO_OK_FOR_BASE_P (regno);
3702 return (regno <= LAST_ARM_REGNUM
3703 || regno >= FIRST_PSEUDO_REGISTER
3704 || regno == FRAME_POINTER_REGNUM
3705 || regno == ARG_POINTER_REGNUM);
3708 /* Return TRUE if this rtx is the difference of a symbol and a label,
3709 and will reduce to a PC-relative relocation in the object file.
3710 Expressions like this can be left alone when generating PIC, rather
3711 than forced through the GOT. */
3712 static int
3713 pcrel_constant_p (rtx x)
3715 if (GET_CODE (x) == MINUS)
3716 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3718 return FALSE;
3721 /* Return nonzero if X is a valid ARM state address operand. */
3723 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3724 int strict_p)
3726 bool use_ldrd;
3727 enum rtx_code code = GET_CODE (x);
3729 if (arm_address_register_rtx_p (x, strict_p))
3730 return 1;
3732 use_ldrd = (TARGET_LDRD
3733 && (mode == DImode
3734 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3736 if (code == POST_INC || code == PRE_DEC
3737 || ((code == PRE_INC || code == POST_DEC)
3738 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3739 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3741 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3742 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3743 && GET_CODE (XEXP (x, 1)) == PLUS
3744 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3746 rtx addend = XEXP (XEXP (x, 1), 1);
3748 /* Don't allow ldrd post increment by register because it's hard
3749 to fixup invalid register choices. */
3750 if (use_ldrd
3751 && GET_CODE (x) == POST_MODIFY
3752 && GET_CODE (addend) == REG)
3753 return 0;
3755 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3756 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3759 /* After reload constants split into minipools will have addresses
3760 from a LABEL_REF. */
3761 else if (reload_completed
3762 && (code == LABEL_REF
3763 || (code == CONST
3764 && GET_CODE (XEXP (x, 0)) == PLUS
3765 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3766 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3767 return 1;
3769 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3770 return 0;
3772 else if (code == PLUS)
3774 rtx xop0 = XEXP (x, 0);
3775 rtx xop1 = XEXP (x, 1);
3777 return ((arm_address_register_rtx_p (xop0, strict_p)
3778 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3779 || (arm_address_register_rtx_p (xop1, strict_p)
3780 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3783 #if 0
3784 /* Reload currently can't handle MINUS, so disable this for now */
3785 else if (GET_CODE (x) == MINUS)
3787 rtx xop0 = XEXP (x, 0);
3788 rtx xop1 = XEXP (x, 1);
3790 return (arm_address_register_rtx_p (xop0, strict_p)
3791 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3793 #endif
3795 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3796 && code == SYMBOL_REF
3797 && CONSTANT_POOL_ADDRESS_P (x)
3798 && ! (flag_pic
3799 && symbol_mentioned_p (get_pool_constant (x))
3800 && ! pcrel_constant_p (get_pool_constant (x))))
3801 return 1;
3803 return 0;
3806 /* Return nonzero if X is a valid Thumb-2 address operand. */
3808 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3810 bool use_ldrd;
3811 enum rtx_code code = GET_CODE (x);
3813 if (arm_address_register_rtx_p (x, strict_p))
3814 return 1;
3816 use_ldrd = (TARGET_LDRD
3817 && (mode == DImode
3818 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3820 if (code == POST_INC || code == PRE_DEC
3821 || ((code == PRE_INC || code == POST_DEC)
3822 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3823 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3825 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3826 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3827 && GET_CODE (XEXP (x, 1)) == PLUS
3828 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3830 /* Thumb-2 only has autoincrement by constant. */
3831 rtx addend = XEXP (XEXP (x, 1), 1);
3832 HOST_WIDE_INT offset;
3834 if (GET_CODE (addend) != CONST_INT)
3835 return 0;
3837 offset = INTVAL(addend);
3838 if (GET_MODE_SIZE (mode) <= 4)
3839 return (offset > -256 && offset < 256);
3841 return (use_ldrd && offset > -1024 && offset < 1024
3842 && (offset & 3) == 0);
3845 /* After reload constants split into minipools will have addresses
3846 from a LABEL_REF. */
3847 else if (reload_completed
3848 && (code == LABEL_REF
3849 || (code == CONST
3850 && GET_CODE (XEXP (x, 0)) == PLUS
3851 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3852 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3853 return 1;
3855 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3856 return 0;
3858 else if (code == PLUS)
3860 rtx xop0 = XEXP (x, 0);
3861 rtx xop1 = XEXP (x, 1);
3863 return ((arm_address_register_rtx_p (xop0, strict_p)
3864 && thumb2_legitimate_index_p (mode, xop1, strict_p))
3865 || (arm_address_register_rtx_p (xop1, strict_p)
3866 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
3869 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3870 && code == SYMBOL_REF
3871 && CONSTANT_POOL_ADDRESS_P (x)
3872 && ! (flag_pic
3873 && symbol_mentioned_p (get_pool_constant (x))
3874 && ! pcrel_constant_p (get_pool_constant (x))))
3875 return 1;
3877 return 0;
3880 /* Return nonzero if INDEX is valid for an address index operand in
3881 ARM state. */
3882 static int
3883 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3884 int strict_p)
3886 HOST_WIDE_INT range;
3887 enum rtx_code code = GET_CODE (index);
3889 /* Standard coprocessor addressing modes. */
3890 if (TARGET_HARD_FLOAT
3891 && (TARGET_FPA || TARGET_MAVERICK)
3892 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3893 || (TARGET_MAVERICK && mode == DImode)))
3894 return (code == CONST_INT && INTVAL (index) < 1024
3895 && INTVAL (index) > -1024
3896 && (INTVAL (index) & 3) == 0);
3898 if (TARGET_NEON
3899 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
3900 return (code == CONST_INT
3901 && INTVAL (index) < 1016
3902 && INTVAL (index) > -1024
3903 && (INTVAL (index) & 3) == 0);
3905 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3906 return (code == CONST_INT
3907 && INTVAL (index) < 1024
3908 && INTVAL (index) > -1024
3909 && (INTVAL (index) & 3) == 0);
3911 if (arm_address_register_rtx_p (index, strict_p)
3912 && (GET_MODE_SIZE (mode) <= 4))
3913 return 1;
3915 if (mode == DImode || mode == DFmode)
3917 if (code == CONST_INT)
3919 HOST_WIDE_INT val = INTVAL (index);
3921 if (TARGET_LDRD)
3922 return val > -256 && val < 256;
3923 else
3924 return val > -4096 && val < 4092;
3927 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3930 if (GET_MODE_SIZE (mode) <= 4
3931 && ! (arm_arch4
3932 && (mode == HImode
3933 || (mode == QImode && outer == SIGN_EXTEND))))
3935 if (code == MULT)
3937 rtx xiop0 = XEXP (index, 0);
3938 rtx xiop1 = XEXP (index, 1);
3940 return ((arm_address_register_rtx_p (xiop0, strict_p)
3941 && power_of_two_operand (xiop1, SImode))
3942 || (arm_address_register_rtx_p (xiop1, strict_p)
3943 && power_of_two_operand (xiop0, SImode)));
3945 else if (code == LSHIFTRT || code == ASHIFTRT
3946 || code == ASHIFT || code == ROTATERT)
3948 rtx op = XEXP (index, 1);
3950 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3951 && GET_CODE (op) == CONST_INT
3952 && INTVAL (op) > 0
3953 && INTVAL (op) <= 31);
3957 /* For ARM v4 we may be doing a sign-extend operation during the
3958 load. */
3959 if (arm_arch4)
3961 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3962 range = 256;
3963 else
3964 range = 4096;
3966 else
3967 range = (mode == HImode) ? 4095 : 4096;
3969 return (code == CONST_INT
3970 && INTVAL (index) < range
3971 && INTVAL (index) > -range);
3974 /* Return true if OP is a valid index scaling factor for Thumb-2 address
3975 index operand. i.e. 1, 2, 4 or 8. */
3976 static bool
3977 thumb2_index_mul_operand (rtx op)
3979 HOST_WIDE_INT val;
3981 if (GET_CODE(op) != CONST_INT)
3982 return false;
3984 val = INTVAL(op);
3985 return (val == 1 || val == 2 || val == 4 || val == 8);
3988 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
3989 static int
3990 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
3992 enum rtx_code code = GET_CODE (index);
3994 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
3995 /* Standard coprocessor addressing modes. */
3996 if (TARGET_HARD_FLOAT
3997 && (TARGET_FPA || TARGET_MAVERICK)
3998 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3999 || (TARGET_MAVERICK && mode == DImode)))
4000 return (code == CONST_INT && INTVAL (index) < 1024
4001 && INTVAL (index) > -1024
4002 && (INTVAL (index) & 3) == 0);
4004 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4006 /* For DImode assume values will usually live in core regs
4007 and only allow LDRD addressing modes. */
4008 if (!TARGET_LDRD || mode != DImode)
4009 return (code == CONST_INT
4010 && INTVAL (index) < 1024
4011 && INTVAL (index) > -1024
4012 && (INTVAL (index) & 3) == 0);
4015 if (TARGET_NEON
4016 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4017 return (code == CONST_INT
4018 && INTVAL (index) < 1016
4019 && INTVAL (index) > -1024
4020 && (INTVAL (index) & 3) == 0);
4022 if (arm_address_register_rtx_p (index, strict_p)
4023 && (GET_MODE_SIZE (mode) <= 4))
4024 return 1;
4026 if (mode == DImode || mode == DFmode)
4028 HOST_WIDE_INT val = INTVAL (index);
4029 /* ??? Can we assume ldrd for thumb2? */
4030 /* Thumb-2 ldrd only has reg+const addressing modes. */
4031 if (code != CONST_INT)
4032 return 0;
4034 /* ldrd supports offsets of +-1020.
4035 However the ldr fallback does not. */
4036 return val > -256 && val < 256 && (val & 3) == 0;
4039 if (code == MULT)
4041 rtx xiop0 = XEXP (index, 0);
4042 rtx xiop1 = XEXP (index, 1);
4044 return ((arm_address_register_rtx_p (xiop0, strict_p)
4045 && thumb2_index_mul_operand (xiop1))
4046 || (arm_address_register_rtx_p (xiop1, strict_p)
4047 && thumb2_index_mul_operand (xiop0)));
4049 else if (code == ASHIFT)
4051 rtx op = XEXP (index, 1);
4053 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4054 && GET_CODE (op) == CONST_INT
4055 && INTVAL (op) > 0
4056 && INTVAL (op) <= 3);
4059 return (code == CONST_INT
4060 && INTVAL (index) < 4096
4061 && INTVAL (index) > -256);
4064 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4065 static int
4066 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4068 int regno;
4070 if (GET_CODE (x) != REG)
4071 return 0;
4073 regno = REGNO (x);
4075 if (strict_p)
4076 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4078 return (regno <= LAST_LO_REGNUM
4079 || regno > LAST_VIRTUAL_REGISTER
4080 || regno == FRAME_POINTER_REGNUM
4081 || (GET_MODE_SIZE (mode) >= 4
4082 && (regno == STACK_POINTER_REGNUM
4083 || regno >= FIRST_PSEUDO_REGISTER
4084 || x == hard_frame_pointer_rtx
4085 || x == arg_pointer_rtx)));
4088 /* Return nonzero if x is a legitimate index register. This is the case
4089 for any base register that can access a QImode object. */
4090 inline static int
4091 thumb1_index_register_rtx_p (rtx x, int strict_p)
4093 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4096 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4098 The AP may be eliminated to either the SP or the FP, so we use the
4099 least common denominator, e.g. SImode, and offsets from 0 to 64.
4101 ??? Verify whether the above is the right approach.
4103 ??? Also, the FP may be eliminated to the SP, so perhaps that
4104 needs special handling also.
4106 ??? Look at how the mips16 port solves this problem. It probably uses
4107 better ways to solve some of these problems.
4109 Although it is not incorrect, we don't accept QImode and HImode
4110 addresses based on the frame pointer or arg pointer until the
4111 reload pass starts. This is so that eliminating such addresses
4112 into stack based ones won't produce impossible code. */
4114 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4116 /* ??? Not clear if this is right. Experiment. */
4117 if (GET_MODE_SIZE (mode) < 4
4118 && !(reload_in_progress || reload_completed)
4119 && (reg_mentioned_p (frame_pointer_rtx, x)
4120 || reg_mentioned_p (arg_pointer_rtx, x)
4121 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4122 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4123 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4124 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4125 return 0;
4127 /* Accept any base register. SP only in SImode or larger. */
4128 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4129 return 1;
4131 /* This is PC relative data before arm_reorg runs. */
4132 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4133 && GET_CODE (x) == SYMBOL_REF
4134 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4135 return 1;
4137 /* This is PC relative data after arm_reorg runs. */
4138 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4139 && (GET_CODE (x) == LABEL_REF
4140 || (GET_CODE (x) == CONST
4141 && GET_CODE (XEXP (x, 0)) == PLUS
4142 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4143 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4144 return 1;
4146 /* Post-inc indexing only supported for SImode and larger. */
4147 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4148 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4149 return 1;
4151 else if (GET_CODE (x) == PLUS)
4153 /* REG+REG address can be any two index registers. */
4154 /* We disallow FRAME+REG addressing since we know that FRAME
4155 will be replaced with STACK, and SP relative addressing only
4156 permits SP+OFFSET. */
4157 if (GET_MODE_SIZE (mode) <= 4
4158 && XEXP (x, 0) != frame_pointer_rtx
4159 && XEXP (x, 1) != frame_pointer_rtx
4160 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4161 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4162 return 1;
4164 /* REG+const has 5-7 bit offset for non-SP registers. */
4165 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4166 || XEXP (x, 0) == arg_pointer_rtx)
4167 && GET_CODE (XEXP (x, 1)) == CONST_INT
4168 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4169 return 1;
4171 /* REG+const has 10-bit offset for SP, but only SImode and
4172 larger is supported. */
4173 /* ??? Should probably check for DI/DFmode overflow here
4174 just like GO_IF_LEGITIMATE_OFFSET does. */
4175 else if (GET_CODE (XEXP (x, 0)) == REG
4176 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4177 && GET_MODE_SIZE (mode) >= 4
4178 && GET_CODE (XEXP (x, 1)) == CONST_INT
4179 && INTVAL (XEXP (x, 1)) >= 0
4180 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4181 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4182 return 1;
4184 else if (GET_CODE (XEXP (x, 0)) == REG
4185 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4186 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4187 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4188 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4189 && GET_MODE_SIZE (mode) >= 4
4190 && GET_CODE (XEXP (x, 1)) == CONST_INT
4191 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4192 return 1;
4195 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4196 && GET_MODE_SIZE (mode) == 4
4197 && GET_CODE (x) == SYMBOL_REF
4198 && CONSTANT_POOL_ADDRESS_P (x)
4199 && ! (flag_pic
4200 && symbol_mentioned_p (get_pool_constant (x))
4201 && ! pcrel_constant_p (get_pool_constant (x))))
4202 return 1;
4204 return 0;
4207 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4208 instruction of mode MODE. */
4210 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4212 switch (GET_MODE_SIZE (mode))
4214 case 1:
4215 return val >= 0 && val < 32;
4217 case 2:
4218 return val >= 0 && val < 64 && (val & 1) == 0;
4220 default:
4221 return (val >= 0
4222 && (val + GET_MODE_SIZE (mode)) <= 128
4223 && (val & 3) == 0);
4227 /* Build the SYMBOL_REF for __tls_get_addr. */
4229 static GTY(()) rtx tls_get_addr_libfunc;
4231 static rtx
4232 get_tls_get_addr (void)
4234 if (!tls_get_addr_libfunc)
4235 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4236 return tls_get_addr_libfunc;
4239 static rtx
4240 arm_load_tp (rtx target)
4242 if (!target)
4243 target = gen_reg_rtx (SImode);
4245 if (TARGET_HARD_TP)
4247 /* Can return in any reg. */
4248 emit_insn (gen_load_tp_hard (target));
4250 else
4252 /* Always returned in r0. Immediately copy the result into a pseudo,
4253 otherwise other uses of r0 (e.g. setting up function arguments) may
4254 clobber the value. */
4256 rtx tmp;
4258 emit_insn (gen_load_tp_soft ());
4260 tmp = gen_rtx_REG (SImode, 0);
4261 emit_move_insn (target, tmp);
4263 return target;
4266 static rtx
4267 load_tls_operand (rtx x, rtx reg)
4269 rtx tmp;
4271 if (reg == NULL_RTX)
4272 reg = gen_reg_rtx (SImode);
4274 tmp = gen_rtx_CONST (SImode, x);
4276 emit_move_insn (reg, tmp);
4278 return reg;
4281 static rtx
4282 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4284 rtx insns, label, labelno, sum;
4286 start_sequence ();
4288 labelno = GEN_INT (pic_labelno++);
4289 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4290 label = gen_rtx_CONST (VOIDmode, label);
4292 sum = gen_rtx_UNSPEC (Pmode,
4293 gen_rtvec (4, x, GEN_INT (reloc), label,
4294 GEN_INT (TARGET_ARM ? 8 : 4)),
4295 UNSPEC_TLS);
4296 reg = load_tls_operand (sum, reg);
4298 if (TARGET_ARM)
4299 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4300 else if (TARGET_THUMB2)
4302 rtx tmp;
4303 /* Thumb-2 only allows very limited access to the PC. Calculate
4304 the address in a temporary register. */
4305 tmp = gen_reg_rtx (SImode);
4306 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4307 emit_insn (gen_addsi3(reg, reg, tmp));
4309 else /* TARGET_THUMB1 */
4310 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4312 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4313 Pmode, 1, reg, Pmode);
4315 insns = get_insns ();
4316 end_sequence ();
4318 return insns;
4322 legitimize_tls_address (rtx x, rtx reg)
4324 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4325 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4327 switch (model)
4329 case TLS_MODEL_GLOBAL_DYNAMIC:
4330 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4331 dest = gen_reg_rtx (Pmode);
4332 emit_libcall_block (insns, dest, ret, x);
4333 return dest;
4335 case TLS_MODEL_LOCAL_DYNAMIC:
4336 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4338 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4339 share the LDM result with other LD model accesses. */
4340 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4341 UNSPEC_TLS);
4342 dest = gen_reg_rtx (Pmode);
4343 emit_libcall_block (insns, dest, ret, eqv);
4345 /* Load the addend. */
4346 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4347 UNSPEC_TLS);
4348 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4349 return gen_rtx_PLUS (Pmode, dest, addend);
4351 case TLS_MODEL_INITIAL_EXEC:
4352 labelno = GEN_INT (pic_labelno++);
4353 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4354 label = gen_rtx_CONST (VOIDmode, label);
4355 sum = gen_rtx_UNSPEC (Pmode,
4356 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4357 GEN_INT (TARGET_ARM ? 8 : 4)),
4358 UNSPEC_TLS);
4359 reg = load_tls_operand (sum, reg);
4361 if (TARGET_ARM)
4362 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4363 else if (TARGET_THUMB2)
4365 rtx tmp;
4366 /* Thumb-2 only allows very limited access to the PC. Calculate
4367 the address in a temporary register. */
4368 tmp = gen_reg_rtx (SImode);
4369 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4370 emit_insn (gen_addsi3(reg, reg, tmp));
4371 emit_move_insn (reg, gen_const_mem (SImode, reg));
4373 else
4375 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4376 emit_move_insn (reg, gen_const_mem (SImode, reg));
4379 tp = arm_load_tp (NULL_RTX);
4381 return gen_rtx_PLUS (Pmode, tp, reg);
4383 case TLS_MODEL_LOCAL_EXEC:
4384 tp = arm_load_tp (NULL_RTX);
4386 reg = gen_rtx_UNSPEC (Pmode,
4387 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4388 UNSPEC_TLS);
4389 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4391 return gen_rtx_PLUS (Pmode, tp, reg);
4393 default:
4394 abort ();
4398 /* Try machine-dependent ways of modifying an illegitimate address
4399 to be legitimate. If we find one, return the new, valid address. */
4401 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4403 if (arm_tls_symbol_p (x))
4404 return legitimize_tls_address (x, NULL_RTX);
4406 if (GET_CODE (x) == PLUS)
4408 rtx xop0 = XEXP (x, 0);
4409 rtx xop1 = XEXP (x, 1);
4411 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4412 xop0 = force_reg (SImode, xop0);
4414 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4415 xop1 = force_reg (SImode, xop1);
4417 if (ARM_BASE_REGISTER_RTX_P (xop0)
4418 && GET_CODE (xop1) == CONST_INT)
4420 HOST_WIDE_INT n, low_n;
4421 rtx base_reg, val;
4422 n = INTVAL (xop1);
4424 /* VFP addressing modes actually allow greater offsets, but for
4425 now we just stick with the lowest common denominator. */
4426 if (mode == DImode
4427 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4429 low_n = n & 0x0f;
4430 n &= ~0x0f;
4431 if (low_n > 4)
4433 n += 16;
4434 low_n -= 16;
4437 else
4439 low_n = ((mode) == TImode ? 0
4440 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4441 n -= low_n;
4444 base_reg = gen_reg_rtx (SImode);
4445 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4446 emit_move_insn (base_reg, val);
4447 x = plus_constant (base_reg, low_n);
4449 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4450 x = gen_rtx_PLUS (SImode, xop0, xop1);
4453 /* XXX We don't allow MINUS any more -- see comment in
4454 arm_legitimate_address_p (). */
4455 else if (GET_CODE (x) == MINUS)
4457 rtx xop0 = XEXP (x, 0);
4458 rtx xop1 = XEXP (x, 1);
4460 if (CONSTANT_P (xop0))
4461 xop0 = force_reg (SImode, xop0);
4463 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4464 xop1 = force_reg (SImode, xop1);
4466 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4467 x = gen_rtx_MINUS (SImode, xop0, xop1);
4470 /* Make sure to take full advantage of the pre-indexed addressing mode
4471 with absolute addresses which often allows for the base register to
4472 be factorized for multiple adjacent memory references, and it might
4473 even allows for the mini pool to be avoided entirely. */
4474 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4476 unsigned int bits;
4477 HOST_WIDE_INT mask, base, index;
4478 rtx base_reg;
4480 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4481 use a 8-bit index. So let's use a 12-bit index for SImode only and
4482 hope that arm_gen_constant will enable ldrb to use more bits. */
4483 bits = (mode == SImode) ? 12 : 8;
4484 mask = (1 << bits) - 1;
4485 base = INTVAL (x) & ~mask;
4486 index = INTVAL (x) & mask;
4487 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4489 /* It'll most probably be more efficient to generate the base
4490 with more bits set and use a negative index instead. */
4491 base |= mask;
4492 index -= mask;
4494 base_reg = force_reg (SImode, GEN_INT (base));
4495 x = plus_constant (base_reg, index);
4498 if (flag_pic)
4500 /* We need to find and carefully transform any SYMBOL and LABEL
4501 references; so go back to the original address expression. */
4502 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4504 if (new_x != orig_x)
4505 x = new_x;
4508 return x;
4512 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4513 to be legitimate. If we find one, return the new, valid address. */
4515 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4517 if (arm_tls_symbol_p (x))
4518 return legitimize_tls_address (x, NULL_RTX);
4520 if (GET_CODE (x) == PLUS
4521 && GET_CODE (XEXP (x, 1)) == CONST_INT
4522 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4523 || INTVAL (XEXP (x, 1)) < 0))
4525 rtx xop0 = XEXP (x, 0);
4526 rtx xop1 = XEXP (x, 1);
4527 HOST_WIDE_INT offset = INTVAL (xop1);
4529 /* Try and fold the offset into a biasing of the base register and
4530 then offsetting that. Don't do this when optimizing for space
4531 since it can cause too many CSEs. */
4532 if (optimize_size && offset >= 0
4533 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4535 HOST_WIDE_INT delta;
4537 if (offset >= 256)
4538 delta = offset - (256 - GET_MODE_SIZE (mode));
4539 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4540 delta = 31 * GET_MODE_SIZE (mode);
4541 else
4542 delta = offset & (~31 * GET_MODE_SIZE (mode));
4544 xop0 = force_operand (plus_constant (xop0, offset - delta),
4545 NULL_RTX);
4546 x = plus_constant (xop0, delta);
4548 else if (offset < 0 && offset > -256)
4549 /* Small negative offsets are best done with a subtract before the
4550 dereference, forcing these into a register normally takes two
4551 instructions. */
4552 x = force_operand (x, NULL_RTX);
4553 else
4555 /* For the remaining cases, force the constant into a register. */
4556 xop1 = force_reg (SImode, xop1);
4557 x = gen_rtx_PLUS (SImode, xop0, xop1);
4560 else if (GET_CODE (x) == PLUS
4561 && s_register_operand (XEXP (x, 1), SImode)
4562 && !s_register_operand (XEXP (x, 0), SImode))
4564 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4566 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4569 if (flag_pic)
4571 /* We need to find and carefully transform any SYMBOL and LABEL
4572 references; so go back to the original address expression. */
4573 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4575 if (new_x != orig_x)
4576 x = new_x;
4579 return x;
4583 thumb_legitimize_reload_address (rtx *x_p,
4584 enum machine_mode mode,
4585 int opnum, int type,
4586 int ind_levels ATTRIBUTE_UNUSED)
4588 rtx x = *x_p;
4590 if (GET_CODE (x) == PLUS
4591 && GET_MODE_SIZE (mode) < 4
4592 && REG_P (XEXP (x, 0))
4593 && XEXP (x, 0) == stack_pointer_rtx
4594 && GET_CODE (XEXP (x, 1)) == CONST_INT
4595 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4597 rtx orig_x = x;
4599 x = copy_rtx (x);
4600 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4601 Pmode, VOIDmode, 0, 0, opnum, type);
4602 return x;
4605 /* If both registers are hi-regs, then it's better to reload the
4606 entire expression rather than each register individually. That
4607 only requires one reload register rather than two. */
4608 if (GET_CODE (x) == PLUS
4609 && REG_P (XEXP (x, 0))
4610 && REG_P (XEXP (x, 1))
4611 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4612 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4614 rtx orig_x = x;
4616 x = copy_rtx (x);
4617 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4618 Pmode, VOIDmode, 0, 0, opnum, type);
4619 return x;
4622 return NULL;
4625 /* Test for various thread-local symbols. */
4627 /* Return TRUE if X is a thread-local symbol. */
4629 static bool
4630 arm_tls_symbol_p (rtx x)
4632 if (! TARGET_HAVE_TLS)
4633 return false;
4635 if (GET_CODE (x) != SYMBOL_REF)
4636 return false;
4638 return SYMBOL_REF_TLS_MODEL (x) != 0;
4641 /* Helper for arm_tls_referenced_p. */
4643 static int
4644 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4646 if (GET_CODE (*x) == SYMBOL_REF)
4647 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4649 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4650 TLS offsets, not real symbol references. */
4651 if (GET_CODE (*x) == UNSPEC
4652 && XINT (*x, 1) == UNSPEC_TLS)
4653 return -1;
4655 return 0;
4658 /* Return TRUE if X contains any TLS symbol references. */
4660 bool
4661 arm_tls_referenced_p (rtx x)
4663 if (! TARGET_HAVE_TLS)
4664 return false;
4666 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4669 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4671 bool
4672 arm_cannot_force_const_mem (rtx x)
4674 rtx base, offset;
4676 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4678 split_const (x, &base, &offset);
4679 if (GET_CODE (base) == SYMBOL_REF
4680 && !offset_within_block_p (base, INTVAL (offset)))
4681 return true;
4683 return arm_tls_referenced_p (x);
4686 #define REG_OR_SUBREG_REG(X) \
4687 (GET_CODE (X) == REG \
4688 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4690 #define REG_OR_SUBREG_RTX(X) \
4691 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4693 #ifndef COSTS_N_INSNS
4694 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4695 #endif
4696 static inline int
4697 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4699 enum machine_mode mode = GET_MODE (x);
4701 switch (code)
4703 case ASHIFT:
4704 case ASHIFTRT:
4705 case LSHIFTRT:
4706 case ROTATERT:
4707 case PLUS:
4708 case MINUS:
4709 case COMPARE:
4710 case NEG:
4711 case NOT:
4712 return COSTS_N_INSNS (1);
4714 case MULT:
4715 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4717 int cycles = 0;
4718 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4720 while (i)
4722 i >>= 2;
4723 cycles++;
4725 return COSTS_N_INSNS (2) + cycles;
4727 return COSTS_N_INSNS (1) + 16;
4729 case SET:
4730 return (COSTS_N_INSNS (1)
4731 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4732 + GET_CODE (SET_DEST (x)) == MEM));
4734 case CONST_INT:
4735 if (outer == SET)
4737 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4738 return 0;
4739 if (thumb_shiftable_const (INTVAL (x)))
4740 return COSTS_N_INSNS (2);
4741 return COSTS_N_INSNS (3);
4743 else if ((outer == PLUS || outer == COMPARE)
4744 && INTVAL (x) < 256 && INTVAL (x) > -256)
4745 return 0;
4746 else if (outer == AND
4747 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4748 return COSTS_N_INSNS (1);
4749 else if (outer == ASHIFT || outer == ASHIFTRT
4750 || outer == LSHIFTRT)
4751 return 0;
4752 return COSTS_N_INSNS (2);
4754 case CONST:
4755 case CONST_DOUBLE:
4756 case LABEL_REF:
4757 case SYMBOL_REF:
4758 return COSTS_N_INSNS (3);
4760 case UDIV:
4761 case UMOD:
4762 case DIV:
4763 case MOD:
4764 return 100;
4766 case TRUNCATE:
4767 return 99;
4769 case AND:
4770 case XOR:
4771 case IOR:
4772 /* XXX guess. */
4773 return 8;
4775 case MEM:
4776 /* XXX another guess. */
4777 /* Memory costs quite a lot for the first word, but subsequent words
4778 load at the equivalent of a single insn each. */
4779 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4780 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4781 ? 4 : 0));
4783 case IF_THEN_ELSE:
4784 /* XXX a guess. */
4785 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4786 return 14;
4787 return 2;
4789 case ZERO_EXTEND:
4790 /* XXX still guessing. */
4791 switch (GET_MODE (XEXP (x, 0)))
4793 case QImode:
4794 return (1 + (mode == DImode ? 4 : 0)
4795 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4797 case HImode:
4798 return (4 + (mode == DImode ? 4 : 0)
4799 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4801 case SImode:
4802 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4804 default:
4805 return 99;
4808 default:
4809 return 99;
4814 /* Worker routine for arm_rtx_costs. */
4815 /* ??? This needs updating for thumb2. */
4816 static inline int
4817 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4819 enum machine_mode mode = GET_MODE (x);
4820 enum rtx_code subcode;
4821 int extra_cost;
4823 switch (code)
4825 case MEM:
4826 /* Memory costs quite a lot for the first word, but subsequent words
4827 load at the equivalent of a single insn each. */
4828 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4829 + (GET_CODE (x) == SYMBOL_REF
4830 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4832 case DIV:
4833 case MOD:
4834 case UDIV:
4835 case UMOD:
4836 return optimize_size ? COSTS_N_INSNS (2) : 100;
4838 case ROTATE:
4839 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4840 return 4;
4841 /* Fall through */
4842 case ROTATERT:
4843 if (mode != SImode)
4844 return 8;
4845 /* Fall through */
4846 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4847 if (mode == DImode)
4848 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4849 + ((GET_CODE (XEXP (x, 0)) == REG
4850 || (GET_CODE (XEXP (x, 0)) == SUBREG
4851 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4852 ? 0 : 8));
4853 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4854 || (GET_CODE (XEXP (x, 0)) == SUBREG
4855 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4856 ? 0 : 4)
4857 + ((GET_CODE (XEXP (x, 1)) == REG
4858 || (GET_CODE (XEXP (x, 1)) == SUBREG
4859 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4860 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4861 ? 0 : 4));
4863 case MINUS:
4864 if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
4866 extra_cost = rtx_cost (XEXP (x, 1), code);
4867 if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
4868 extra_cost += 4 * ARM_NUM_REGS (mode);
4869 return extra_cost;
4872 if (mode == DImode)
4873 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4874 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4875 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4876 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4877 ? 0 : 8));
4879 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4880 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4881 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4882 && arm_const_double_rtx (XEXP (x, 1))))
4883 ? 0 : 8)
4884 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4885 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4886 && arm_const_double_rtx (XEXP (x, 0))))
4887 ? 0 : 8));
4889 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4890 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4891 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4892 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4893 || subcode == ASHIFTRT || subcode == LSHIFTRT
4894 || subcode == ROTATE || subcode == ROTATERT
4895 || (subcode == MULT
4896 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4897 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4898 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4899 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4900 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4901 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4902 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4903 return 1;
4904 /* Fall through */
4906 case PLUS:
4907 if (arm_arch6 && mode == SImode
4908 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4909 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4910 return 1 + (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM ? 10 : 0)
4911 + (GET_CODE (XEXP (x, 1)) == MEM ? 10 : 0);
4913 if (GET_CODE (XEXP (x, 0)) == MULT)
4915 extra_cost = rtx_cost (XEXP (x, 0), code);
4916 if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4917 extra_cost += 4 * ARM_NUM_REGS (mode);
4918 return extra_cost;
4921 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4922 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4923 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4924 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4925 && arm_const_double_rtx (XEXP (x, 1))))
4926 ? 0 : 8));
4928 /* Fall through */
4929 case AND: case XOR: case IOR:
4930 extra_cost = 0;
4932 /* Normally the frame registers will be spilt into reg+const during
4933 reload, so it is a bad idea to combine them with other instructions,
4934 since then they might not be moved outside of loops. As a compromise
4935 we allow integration with ops that have a constant as their second
4936 operand. */
4937 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4938 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4939 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4940 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4941 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4942 extra_cost = 4;
4944 if (mode == DImode)
4945 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4946 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4947 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4948 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4949 ? 0 : 8));
4951 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4952 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4953 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4954 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4955 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4956 ? 0 : 4));
4958 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4959 return (1 + extra_cost
4960 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4961 || subcode == LSHIFTRT || subcode == ASHIFTRT
4962 || subcode == ROTATE || subcode == ROTATERT
4963 || (subcode == MULT
4964 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4965 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4966 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4967 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4968 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4969 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4970 ? 0 : 4));
4972 return 8;
4974 case MULT:
4975 /* This should have been handled by the CPU specific routines. */
4976 gcc_unreachable ();
4978 case TRUNCATE:
4979 if (arm_arch3m && mode == SImode
4980 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4981 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4982 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4983 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4984 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4985 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4986 return 8;
4987 return 99;
4989 case NEG:
4990 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4991 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4992 /* Fall through */
4993 case NOT:
4994 if (mode == DImode)
4995 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4997 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4999 case IF_THEN_ELSE:
5000 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
5001 return 14;
5002 return 2;
5004 case COMPARE:
5005 return 1;
5007 case ABS:
5008 return 4 + (mode == DImode ? 4 : 0);
5010 case SIGN_EXTEND:
5011 if (arm_arch_thumb2 && mode == SImode)
5012 return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
5014 if (GET_MODE (XEXP (x, 0)) == QImode)
5015 return (4 + (mode == DImode ? 4 : 0)
5016 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5017 /* Fall through */
5018 case ZERO_EXTEND:
5019 if (arm_arch6 && mode == SImode)
5020 return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
5022 switch (GET_MODE (XEXP (x, 0)))
5024 case QImode:
5025 return (1 + (mode == DImode ? 4 : 0)
5026 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5028 case HImode:
5029 return (4 + (mode == DImode ? 4 : 0)
5030 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5032 case SImode:
5033 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5035 case V8QImode:
5036 case V4HImode:
5037 case V2SImode:
5038 case V4QImode:
5039 case V2HImode:
5040 return 1;
5042 default:
5043 gcc_unreachable ();
5045 gcc_unreachable ();
5047 case CONST_INT:
5048 if (const_ok_for_arm (INTVAL (x)))
5049 return outer == SET ? 2 : -1;
5050 else if (outer == AND
5051 && const_ok_for_arm (~INTVAL (x)))
5052 return -1;
5053 else if ((outer == COMPARE
5054 || outer == PLUS || outer == MINUS)
5055 && const_ok_for_arm (-INTVAL (x)))
5056 return -1;
5057 else
5058 return 5;
5060 case CONST:
5061 case LABEL_REF:
5062 case SYMBOL_REF:
5063 return 6;
5065 case CONST_DOUBLE:
5066 if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
5067 return outer == SET ? 2 : -1;
5068 else if ((outer == COMPARE || outer == PLUS)
5069 && neg_const_double_rtx_ok_for_fpa (x))
5070 return -1;
5071 return 7;
5073 default:
5074 return 99;
5078 /* RTX costs when optimizing for size. */
5079 static bool
5080 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
5082 enum machine_mode mode = GET_MODE (x);
5084 if (TARGET_THUMB1)
5086 /* XXX TBD. For now, use the standard costs. */
5087 *total = thumb1_rtx_costs (x, code, outer_code);
5088 return true;
5091 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
5092 switch (code)
5094 case MEM:
5095 /* A memory access costs 1 insn if the mode is small, or the address is
5096 a single register, otherwise it costs one insn per word. */
5097 if (REG_P (XEXP (x, 0)))
5098 *total = COSTS_N_INSNS (1);
5099 else
5100 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5101 return true;
5103 case DIV:
5104 case MOD:
5105 case UDIV:
5106 case UMOD:
5107 /* Needs a libcall, so it costs about this. */
5108 *total = COSTS_N_INSNS (2);
5109 return false;
5111 case ROTATE:
5112 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5114 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
5115 return true;
5117 /* Fall through */
5118 case ROTATERT:
5119 case ASHIFT:
5120 case LSHIFTRT:
5121 case ASHIFTRT:
5122 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5124 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
5125 return true;
5127 else if (mode == SImode)
5129 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
5130 /* Slightly disparage register shifts, but not by much. */
5131 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5132 *total += 1 + rtx_cost (XEXP (x, 1), code);
5133 return true;
5136 /* Needs a libcall. */
5137 *total = COSTS_N_INSNS (2);
5138 return false;
5140 case MINUS:
5141 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5143 *total = COSTS_N_INSNS (1);
5144 return false;
5147 if (mode == SImode)
5149 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5150 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5152 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5153 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5154 || subcode1 == ROTATE || subcode1 == ROTATERT
5155 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5156 || subcode1 == ASHIFTRT)
5158 /* It's just the cost of the two operands. */
5159 *total = 0;
5160 return false;
5163 *total = COSTS_N_INSNS (1);
5164 return false;
5167 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5168 return false;
5170 case PLUS:
5171 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5173 *total = COSTS_N_INSNS (1);
5174 return false;
5177 /* Fall through */
5178 case AND: case XOR: case IOR:
5179 if (mode == SImode)
5181 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5183 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5184 || subcode == LSHIFTRT || subcode == ASHIFTRT
5185 || (code == AND && subcode == NOT))
5187 /* It's just the cost of the two operands. */
5188 *total = 0;
5189 return false;
5193 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5194 return false;
5196 case MULT:
5197 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5198 return false;
5200 case NEG:
5201 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5202 *total = COSTS_N_INSNS (1);
5203 /* Fall through */
5204 case NOT:
5205 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5207 return false;
5209 case IF_THEN_ELSE:
5210 *total = 0;
5211 return false;
5213 case COMPARE:
5214 if (cc_register (XEXP (x, 0), VOIDmode))
5215 * total = 0;
5216 else
5217 *total = COSTS_N_INSNS (1);
5218 return false;
5220 case ABS:
5221 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5222 *total = COSTS_N_INSNS (1);
5223 else
5224 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5225 return false;
5227 case SIGN_EXTEND:
5228 *total = 0;
5229 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5231 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5232 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5234 if (mode == DImode)
5235 *total += COSTS_N_INSNS (1);
5236 return false;
5238 case ZERO_EXTEND:
5239 *total = 0;
5240 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5242 switch (GET_MODE (XEXP (x, 0)))
5244 case QImode:
5245 *total += COSTS_N_INSNS (1);
5246 break;
5248 case HImode:
5249 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5251 case SImode:
5252 break;
5254 default:
5255 *total += COSTS_N_INSNS (2);
5259 if (mode == DImode)
5260 *total += COSTS_N_INSNS (1);
5262 return false;
5264 case CONST_INT:
5265 if (const_ok_for_arm (INTVAL (x)))
5266 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5267 else if (const_ok_for_arm (~INTVAL (x)))
5268 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5269 else if (const_ok_for_arm (-INTVAL (x)))
5271 if (outer_code == COMPARE || outer_code == PLUS
5272 || outer_code == MINUS)
5273 *total = 0;
5274 else
5275 *total = COSTS_N_INSNS (1);
5277 else
5278 *total = COSTS_N_INSNS (2);
5279 return true;
5281 case CONST:
5282 case LABEL_REF:
5283 case SYMBOL_REF:
5284 *total = COSTS_N_INSNS (2);
5285 return true;
5287 case CONST_DOUBLE:
5288 *total = COSTS_N_INSNS (4);
5289 return true;
5291 default:
5292 if (mode != VOIDmode)
5293 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5294 else
5295 *total = COSTS_N_INSNS (4); /* How knows? */
5296 return false;
5300 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5301 supported on any "slowmul" cores, so it can be ignored. */
5303 static bool
5304 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5306 enum machine_mode mode = GET_MODE (x);
5308 if (TARGET_THUMB)
5310 *total = thumb1_rtx_costs (x, code, outer_code);
5311 return true;
5314 switch (code)
5316 case MULT:
5317 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5318 || mode == DImode)
5320 *total = 30;
5321 return true;
5324 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5326 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5327 & (unsigned HOST_WIDE_INT) 0xffffffff);
5328 int cost, const_ok = const_ok_for_arm (i);
5329 int j, booth_unit_size;
5331 /* Tune as appropriate. */
5332 cost = const_ok ? 4 : 8;
5333 booth_unit_size = 2;
5334 for (j = 0; i && j < 32; j += booth_unit_size)
5336 i >>= booth_unit_size;
5337 cost += 2;
5340 *total = cost;
5341 return true;
5344 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5345 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5346 return true;
5348 default:
5349 *total = arm_rtx_costs_1 (x, code, outer_code);
5350 return true;
5355 /* RTX cost for cores with a fast multiply unit (M variants). */
5357 static bool
5358 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5360 enum machine_mode mode = GET_MODE (x);
5362 if (TARGET_THUMB1)
5364 *total = thumb1_rtx_costs (x, code, outer_code);
5365 return true;
5368 /* ??? should thumb2 use different costs? */
5369 switch (code)
5371 case MULT:
5372 /* There is no point basing this on the tuning, since it is always the
5373 fast variant if it exists at all. */
5374 if (mode == DImode
5375 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5376 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5377 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5379 *total = 8;
5380 return true;
5384 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5385 || mode == DImode)
5387 *total = 30;
5388 return true;
5391 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5393 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5394 & (unsigned HOST_WIDE_INT) 0xffffffff);
5395 int cost, const_ok = const_ok_for_arm (i);
5396 int j, booth_unit_size;
5398 /* Tune as appropriate. */
5399 cost = const_ok ? 4 : 8;
5400 booth_unit_size = 8;
5401 for (j = 0; i && j < 32; j += booth_unit_size)
5403 i >>= booth_unit_size;
5404 cost += 2;
5407 *total = cost;
5408 return true;
5411 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5412 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5413 return true;
5415 default:
5416 *total = arm_rtx_costs_1 (x, code, outer_code);
5417 return true;
5422 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5423 so it can be ignored. */
5425 static bool
5426 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
5428 enum machine_mode mode = GET_MODE (x);
5430 if (TARGET_THUMB)
5432 *total = thumb1_rtx_costs (x, code, outer_code);
5433 return true;
5436 switch (code)
5438 case MULT:
5439 /* There is no point basing this on the tuning, since it is always the
5440 fast variant if it exists at all. */
5441 if (mode == DImode
5442 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5443 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5444 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5446 *total = 8;
5447 return true;
5451 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5452 || mode == DImode)
5454 *total = 30;
5455 return true;
5458 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5460 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5461 & (unsigned HOST_WIDE_INT) 0xffffffff);
5462 int cost, const_ok = const_ok_for_arm (i);
5463 unsigned HOST_WIDE_INT masked_const;
5465 /* The cost will be related to two insns.
5466 First a load of the constant (MOV or LDR), then a multiply. */
5467 cost = 2;
5468 if (! const_ok)
5469 cost += 1; /* LDR is probably more expensive because
5470 of longer result latency. */
5471 masked_const = i & 0xffff8000;
5472 if (masked_const != 0 && masked_const != 0xffff8000)
5474 masked_const = i & 0xf8000000;
5475 if (masked_const == 0 || masked_const == 0xf8000000)
5476 cost += 1;
5477 else
5478 cost += 2;
5480 *total = cost;
5481 return true;
5484 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5485 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5486 return true;
5488 case COMPARE:
5489 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5490 will stall until the multiplication is complete. */
5491 if (GET_CODE (XEXP (x, 0)) == MULT)
5492 *total = 4 + rtx_cost (XEXP (x, 0), code);
5493 else
5494 *total = arm_rtx_costs_1 (x, code, outer_code);
5495 return true;
5497 default:
5498 *total = arm_rtx_costs_1 (x, code, outer_code);
5499 return true;
5504 /* RTX costs for 9e (and later) cores. */
5506 static bool
5507 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5509 enum machine_mode mode = GET_MODE (x);
5510 int nonreg_cost;
5511 int cost;
5513 if (TARGET_THUMB1)
5515 switch (code)
5517 case MULT:
5518 *total = COSTS_N_INSNS (3);
5519 return true;
5521 default:
5522 *total = thumb1_rtx_costs (x, code, outer_code);
5523 return true;
5527 switch (code)
5529 case MULT:
5530 /* There is no point basing this on the tuning, since it is always the
5531 fast variant if it exists at all. */
5532 if (mode == DImode
5533 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5534 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5535 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5537 *total = 3;
5538 return true;
5542 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5544 *total = 30;
5545 return true;
5547 if (mode == DImode)
5549 cost = 7;
5550 nonreg_cost = 8;
5552 else
5554 cost = 2;
5555 nonreg_cost = 4;
5559 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5560 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5561 return true;
5563 default:
5564 *total = arm_rtx_costs_1 (x, code, outer_code);
5565 return true;
5568 /* All address computations that can be done are free, but rtx cost returns
5569 the same for practically all of them. So we weight the different types
5570 of address here in the order (most pref first):
5571 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5572 static inline int
5573 arm_arm_address_cost (rtx x)
5575 enum rtx_code c = GET_CODE (x);
5577 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5578 return 0;
5579 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5580 return 10;
5582 if (c == PLUS || c == MINUS)
5584 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5585 return 2;
5587 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5588 return 3;
5590 return 4;
5593 return 6;
5596 static inline int
5597 arm_thumb_address_cost (rtx x)
5599 enum rtx_code c = GET_CODE (x);
5601 if (c == REG)
5602 return 1;
5603 if (c == PLUS
5604 && GET_CODE (XEXP (x, 0)) == REG
5605 && GET_CODE (XEXP (x, 1)) == CONST_INT)
5606 return 1;
5608 return 2;
5611 static int
5612 arm_address_cost (rtx x)
5614 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5617 static int
5618 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5620 rtx i_pat, d_pat;
5622 /* Some true dependencies can have a higher cost depending
5623 on precisely how certain input operands are used. */
5624 if (arm_tune_xscale
5625 && REG_NOTE_KIND (link) == 0
5626 && recog_memoized (insn) >= 0
5627 && recog_memoized (dep) >= 0)
5629 int shift_opnum = get_attr_shift (insn);
5630 enum attr_type attr_type = get_attr_type (dep);
5632 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5633 operand for INSN. If we have a shifted input operand and the
5634 instruction we depend on is another ALU instruction, then we may
5635 have to account for an additional stall. */
5636 if (shift_opnum != 0
5637 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5639 rtx shifted_operand;
5640 int opno;
5642 /* Get the shifted operand. */
5643 extract_insn (insn);
5644 shifted_operand = recog_data.operand[shift_opnum];
5646 /* Iterate over all the operands in DEP. If we write an operand
5647 that overlaps with SHIFTED_OPERAND, then we have increase the
5648 cost of this dependency. */
5649 extract_insn (dep);
5650 preprocess_constraints ();
5651 for (opno = 0; opno < recog_data.n_operands; opno++)
5653 /* We can ignore strict inputs. */
5654 if (recog_data.operand_type[opno] == OP_IN)
5655 continue;
5657 if (reg_overlap_mentioned_p (recog_data.operand[opno],
5658 shifted_operand))
5659 return 2;
5664 /* XXX This is not strictly true for the FPA. */
5665 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5666 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5667 return 0;
5669 /* Call insns don't incur a stall, even if they follow a load. */
5670 if (REG_NOTE_KIND (link) == 0
5671 && GET_CODE (insn) == CALL_INSN)
5672 return 1;
5674 if ((i_pat = single_set (insn)) != NULL
5675 && GET_CODE (SET_SRC (i_pat)) == MEM
5676 && (d_pat = single_set (dep)) != NULL
5677 && GET_CODE (SET_DEST (d_pat)) == MEM)
5679 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5680 /* This is a load after a store, there is no conflict if the load reads
5681 from a cached area. Assume that loads from the stack, and from the
5682 constant pool are cached, and that others will miss. This is a
5683 hack. */
5685 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5686 || reg_mentioned_p (stack_pointer_rtx, src_mem)
5687 || reg_mentioned_p (frame_pointer_rtx, src_mem)
5688 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5689 return 1;
5692 return cost;
5695 static int fp_consts_inited = 0;
5697 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5698 static const char * const strings_fp[8] =
5700 "0", "1", "2", "3",
5701 "4", "5", "0.5", "10"
5704 static REAL_VALUE_TYPE values_fp[8];
5706 static void
5707 init_fp_table (void)
5709 int i;
5710 REAL_VALUE_TYPE r;
5712 if (TARGET_VFP)
5713 fp_consts_inited = 1;
5714 else
5715 fp_consts_inited = 8;
5717 for (i = 0; i < fp_consts_inited; i++)
5719 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5720 values_fp[i] = r;
5724 /* Return TRUE if rtx X is a valid immediate FP constant. */
5726 arm_const_double_rtx (rtx x)
5728 REAL_VALUE_TYPE r;
5729 int i;
5731 if (!fp_consts_inited)
5732 init_fp_table ();
5734 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5735 if (REAL_VALUE_MINUS_ZERO (r))
5736 return 0;
5738 for (i = 0; i < fp_consts_inited; i++)
5739 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5740 return 1;
5742 return 0;
5745 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5747 neg_const_double_rtx_ok_for_fpa (rtx x)
5749 REAL_VALUE_TYPE r;
5750 int i;
5752 if (!fp_consts_inited)
5753 init_fp_table ();
5755 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5756 r = REAL_VALUE_NEGATE (r);
5757 if (REAL_VALUE_MINUS_ZERO (r))
5758 return 0;
5760 for (i = 0; i < 8; i++)
5761 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5762 return 1;
5764 return 0;
5768 /* VFPv3 has a fairly wide range of representable immediates, formed from
5769 "quarter-precision" floating-point values. These can be evaluated using this
5770 formula (with ^ for exponentiation):
5772 -1^s * n * 2^-r
5774 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5775 16 <= n <= 31 and 0 <= r <= 7.
5777 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5779 - A (most-significant) is the sign bit.
5780 - BCD are the exponent (encoded as r XOR 3).
5781 - EFGH are the mantissa (encoded as n - 16).
5784 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5785 fconst[sd] instruction, or -1 if X isn't suitable. */
5786 static int
5787 vfp3_const_double_index (rtx x)
5789 REAL_VALUE_TYPE r, m;
5790 int sign, exponent;
5791 unsigned HOST_WIDE_INT mantissa, mant_hi;
5792 unsigned HOST_WIDE_INT mask;
5793 HOST_WIDE_INT m1, m2;
5794 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
5796 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
5797 return -1;
5799 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5801 /* We can't represent these things, so detect them first. */
5802 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
5803 return -1;
5805 /* Extract sign, exponent and mantissa. */
5806 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
5807 r = REAL_VALUE_ABS (r);
5808 exponent = REAL_EXP (&r);
5809 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5810 highest (sign) bit, with a fixed binary point at bit point_pos.
5811 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5812 bits for the mantissa, this may fail (low bits would be lost). */
5813 real_ldexp (&m, &r, point_pos - exponent);
5814 REAL_VALUE_TO_INT (&m1, &m2, m);
5815 mantissa = m1;
5816 mant_hi = m2;
5818 /* If there are bits set in the low part of the mantissa, we can't
5819 represent this value. */
5820 if (mantissa != 0)
5821 return -1;
5823 /* Now make it so that mantissa contains the most-significant bits, and move
5824 the point_pos to indicate that the least-significant bits have been
5825 discarded. */
5826 point_pos -= HOST_BITS_PER_WIDE_INT;
5827 mantissa = mant_hi;
5829 /* We can permit four significant bits of mantissa only, plus a high bit
5830 which is always 1. */
5831 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
5832 if ((mantissa & mask) != 0)
5833 return -1;
5835 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5836 mantissa >>= point_pos - 5;
5838 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5839 floating-point immediate zero with Neon using an integer-zero load, but
5840 that case is handled elsewhere.) */
5841 if (mantissa == 0)
5842 return -1;
5844 gcc_assert (mantissa >= 16 && mantissa <= 31);
5846 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5847 normalized significands are in the range [1, 2). (Our mantissa is shifted
5848 left 4 places at this point relative to normalized IEEE754 values). GCC
5849 internally uses [0.5, 1) (see real.c), so the exponent returned from
5850 REAL_EXP must be altered. */
5851 exponent = 5 - exponent;
5853 if (exponent < 0 || exponent > 7)
5854 return -1;
5856 /* Sign, mantissa and exponent are now in the correct form to plug into the
5857 formula described in the comment above. */
5858 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
5861 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5863 vfp3_const_double_rtx (rtx x)
5865 if (!TARGET_VFP3)
5866 return 0;
5868 return vfp3_const_double_index (x) != -1;
5871 /* Recognize immediates which can be used in various Neon instructions. Legal
5872 immediates are described by the following table (for VMVN variants, the
5873 bitwise inverse of the constant shown is recognized. In either case, VMOV
5874 is output and the correct instruction to use for a given constant is chosen
5875 by the assembler). The constant shown is replicated across all elements of
5876 the destination vector.
5878 insn elems variant constant (binary)
5879 ---- ----- ------- -----------------
5880 vmov i32 0 00000000 00000000 00000000 abcdefgh
5881 vmov i32 1 00000000 00000000 abcdefgh 00000000
5882 vmov i32 2 00000000 abcdefgh 00000000 00000000
5883 vmov i32 3 abcdefgh 00000000 00000000 00000000
5884 vmov i16 4 00000000 abcdefgh
5885 vmov i16 5 abcdefgh 00000000
5886 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5887 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5888 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5889 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5890 vmvn i16 10 00000000 abcdefgh
5891 vmvn i16 11 abcdefgh 00000000
5892 vmov i32 12 00000000 00000000 abcdefgh 11111111
5893 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5894 vmov i32 14 00000000 abcdefgh 11111111 11111111
5895 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5896 vmov i8 16 abcdefgh
5897 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5898 eeeeeeee ffffffff gggggggg hhhhhhhh
5899 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
5901 For case 18, B = !b. Representable values are exactly those accepted by
5902 vfp3_const_double_index, but are output as floating-point numbers rather
5903 than indices.
5905 Variants 0-5 (inclusive) may also be used as immediates for the second
5906 operand of VORR/VBIC instructions.
5908 The INVERSE argument causes the bitwise inverse of the given operand to be
5909 recognized instead (used for recognizing legal immediates for the VAND/VORN
5910 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
5911 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
5912 output, rather than the real insns vbic/vorr).
5914 INVERSE makes no difference to the recognition of float vectors.
5916 The return value is the variant of immediate as shown in the above table, or
5917 -1 if the given value doesn't match any of the listed patterns.
5919 static int
5920 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5921 rtx *modconst, int *elementwidth)
5923 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
5924 matches = 1; \
5925 for (i = 0; i < idx; i += (STRIDE)) \
5926 if (!(TEST)) \
5927 matches = 0; \
5928 if (matches) \
5930 immtype = (CLASS); \
5931 elsize = (ELSIZE); \
5932 break; \
5935 unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5936 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5937 unsigned char bytes[16];
5938 int immtype = -1, matches;
5939 unsigned int invmask = inverse ? 0xff : 0;
5941 /* Vectors of float constants. */
5942 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5944 rtx el0 = CONST_VECTOR_ELT (op, 0);
5945 REAL_VALUE_TYPE r0;
5947 if (!vfp3_const_double_rtx (el0))
5948 return -1;
5950 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
5952 for (i = 1; i < n_elts; i++)
5954 rtx elt = CONST_VECTOR_ELT (op, i);
5955 REAL_VALUE_TYPE re;
5957 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
5959 if (!REAL_VALUES_EQUAL (r0, re))
5960 return -1;
5963 if (modconst)
5964 *modconst = CONST_VECTOR_ELT (op, 0);
5966 if (elementwidth)
5967 *elementwidth = 0;
5969 return 18;
5972 /* Splat vector constant out into a byte vector. */
5973 for (i = 0; i < n_elts; i++)
5975 rtx el = CONST_VECTOR_ELT (op, i);
5976 unsigned HOST_WIDE_INT elpart;
5977 unsigned int part, parts;
5979 if (GET_CODE (el) == CONST_INT)
5981 elpart = INTVAL (el);
5982 parts = 1;
5984 else if (GET_CODE (el) == CONST_DOUBLE)
5986 elpart = CONST_DOUBLE_LOW (el);
5987 parts = 2;
5989 else
5990 gcc_unreachable ();
5992 for (part = 0; part < parts; part++)
5994 unsigned int byte;
5995 for (byte = 0; byte < innersize; byte++)
5997 bytes[idx++] = (elpart & 0xff) ^ invmask;
5998 elpart >>= BITS_PER_UNIT;
6000 if (GET_CODE (el) == CONST_DOUBLE)
6001 elpart = CONST_DOUBLE_HIGH (el);
6005 /* Sanity check. */
6006 gcc_assert (idx == GET_MODE_SIZE (mode));
6010 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6011 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6013 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6014 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6016 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6017 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6019 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6020 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6022 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6024 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6026 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6027 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6029 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6030 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6032 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6033 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6035 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6036 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6038 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6040 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6042 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6043 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6045 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6046 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6048 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6049 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6051 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6052 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6054 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6056 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6057 && bytes[i] == bytes[(i + 8) % idx]);
6059 while (0);
6061 if (immtype == -1)
6062 return -1;
6064 if (elementwidth)
6065 *elementwidth = elsize;
6067 if (modconst)
6069 unsigned HOST_WIDE_INT imm = 0;
6071 /* Un-invert bytes of recognized vector, if necessary. */
6072 if (invmask != 0)
6073 for (i = 0; i < idx; i++)
6074 bytes[i] ^= invmask;
6076 if (immtype == 17)
6078 /* FIXME: Broken on 32-bit H_W_I hosts. */
6079 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6081 for (i = 0; i < 8; i++)
6082 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6083 << (i * BITS_PER_UNIT);
6085 *modconst = GEN_INT (imm);
6087 else
6089 unsigned HOST_WIDE_INT imm = 0;
6091 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6092 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6094 *modconst = GEN_INT (imm);
6098 return immtype;
6099 #undef CHECK
6102 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6103 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6104 float elements), and a modified constant (whatever should be output for a
6105 VMOV) in *MODCONST. */
6108 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6109 rtx *modconst, int *elementwidth)
6111 rtx tmpconst;
6112 int tmpwidth;
6113 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6115 if (retval == -1)
6116 return 0;
6118 if (modconst)
6119 *modconst = tmpconst;
6121 if (elementwidth)
6122 *elementwidth = tmpwidth;
6124 return 1;
6127 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6128 the immediate is valid, write a constant suitable for using as an operand
6129 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6130 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6133 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6134 rtx *modconst, int *elementwidth)
6136 rtx tmpconst;
6137 int tmpwidth;
6138 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6140 if (retval < 0 || retval > 5)
6141 return 0;
6143 if (modconst)
6144 *modconst = tmpconst;
6146 if (elementwidth)
6147 *elementwidth = tmpwidth;
6149 return 1;
6152 /* Return a string suitable for output of Neon immediate logic operation
6153 MNEM. */
6155 char *
6156 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6157 int inverse, int quad)
6159 int width, is_valid;
6160 static char templ[40];
6162 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6164 gcc_assert (is_valid != 0);
6166 if (quad)
6167 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6168 else
6169 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6171 return templ;
6174 /* Output a sequence of pairwise operations to implement a reduction.
6175 NOTE: We do "too much work" here, because pairwise operations work on two
6176 registers-worth of operands in one go. Unfortunately we can't exploit those
6177 extra calculations to do the full operation in fewer steps, I don't think.
6178 Although all vector elements of the result but the first are ignored, we
6179 actually calculate the same result in each of the elements. An alternative
6180 such as initially loading a vector with zero to use as each of the second
6181 operands would use up an additional register and take an extra instruction,
6182 for no particular gain. */
6184 void
6185 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6186 rtx (*reduc) (rtx, rtx, rtx))
6188 enum machine_mode inner = GET_MODE_INNER (mode);
6189 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6190 rtx tmpsum = op1;
6192 for (i = parts / 2; i >= 1; i /= 2)
6194 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6195 emit_insn (reduc (dest, tmpsum, tmpsum));
6196 tmpsum = dest;
6200 /* Initialize a vector with non-constant elements. FIXME: We can do better
6201 than the current implementation (building a vector on the stack and then
6202 loading it) in many cases. See rs6000.c. */
6204 void
6205 neon_expand_vector_init (rtx target, rtx vals)
6207 enum machine_mode mode = GET_MODE (target);
6208 enum machine_mode inner = GET_MODE_INNER (mode);
6209 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6210 rtx mem;
6212 gcc_assert (VECTOR_MODE_P (mode));
6214 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6215 for (i = 0; i < n_elts; i++)
6216 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6217 XVECEXP (vals, 0, i));
6219 emit_move_insn (target, mem);
6222 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6223 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6224 reported source locations are bogus. */
6226 static void
6227 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6228 const char *err)
6230 HOST_WIDE_INT lane;
6232 gcc_assert (GET_CODE (operand) == CONST_INT);
6234 lane = INTVAL (operand);
6236 if (lane < low || lane >= high)
6237 error (err);
6240 /* Bounds-check lanes. */
6242 void
6243 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6245 bounds_check (operand, low, high, "lane out of range");
6248 /* Bounds-check constants. */
6250 void
6251 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6253 bounds_check (operand, low, high, "constant out of range");
6256 HOST_WIDE_INT
6257 neon_element_bits (enum machine_mode mode)
6259 if (mode == DImode)
6260 return GET_MODE_BITSIZE (mode);
6261 else
6262 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6266 /* Predicates for `match_operand' and `match_operator'. */
6268 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6270 cirrus_memory_offset (rtx op)
6272 /* Reject eliminable registers. */
6273 if (! (reload_in_progress || reload_completed)
6274 && ( reg_mentioned_p (frame_pointer_rtx, op)
6275 || reg_mentioned_p (arg_pointer_rtx, op)
6276 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6277 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6278 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6279 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6280 return 0;
6282 if (GET_CODE (op) == MEM)
6284 rtx ind;
6286 ind = XEXP (op, 0);
6288 /* Match: (mem (reg)). */
6289 if (GET_CODE (ind) == REG)
6290 return 1;
6292 /* Match:
6293 (mem (plus (reg)
6294 (const))). */
6295 if (GET_CODE (ind) == PLUS
6296 && GET_CODE (XEXP (ind, 0)) == REG
6297 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6298 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6299 return 1;
6302 return 0;
6305 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6306 WB is true if full writeback address modes are allowed and is false
6307 if limited writeback address modes (POST_INC and PRE_DEC) are
6308 allowed. */
6311 arm_coproc_mem_operand (rtx op, bool wb)
6313 rtx ind;
6315 /* Reject eliminable registers. */
6316 if (! (reload_in_progress || reload_completed)
6317 && ( reg_mentioned_p (frame_pointer_rtx, op)
6318 || reg_mentioned_p (arg_pointer_rtx, op)
6319 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6320 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6321 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6322 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6323 return FALSE;
6325 /* Constants are converted into offsets from labels. */
6326 if (GET_CODE (op) != MEM)
6327 return FALSE;
6329 ind = XEXP (op, 0);
6331 if (reload_completed
6332 && (GET_CODE (ind) == LABEL_REF
6333 || (GET_CODE (ind) == CONST
6334 && GET_CODE (XEXP (ind, 0)) == PLUS
6335 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6336 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6337 return TRUE;
6339 /* Match: (mem (reg)). */
6340 if (GET_CODE (ind) == REG)
6341 return arm_address_register_rtx_p (ind, 0);
6343 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6344 acceptable in any case (subject to verification by
6345 arm_address_register_rtx_p). We need WB to be true to accept
6346 PRE_INC and POST_DEC. */
6347 if (GET_CODE (ind) == POST_INC
6348 || GET_CODE (ind) == PRE_DEC
6349 || (wb
6350 && (GET_CODE (ind) == PRE_INC
6351 || GET_CODE (ind) == POST_DEC)))
6352 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6354 if (wb
6355 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6356 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6357 && GET_CODE (XEXP (ind, 1)) == PLUS
6358 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6359 ind = XEXP (ind, 1);
6361 /* Match:
6362 (plus (reg)
6363 (const)). */
6364 if (GET_CODE (ind) == PLUS
6365 && GET_CODE (XEXP (ind, 0)) == REG
6366 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6367 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6368 && INTVAL (XEXP (ind, 1)) > -1024
6369 && INTVAL (XEXP (ind, 1)) < 1024
6370 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6371 return TRUE;
6373 return FALSE;
6376 /* Return TRUE if OP is a memory operand which we can load or store a vector
6377 to/from. If CORE is true, we're moving from ARM registers not Neon
6378 registers. */
6380 neon_vector_mem_operand (rtx op, bool core)
6382 rtx ind;
6384 /* Reject eliminable registers. */
6385 if (! (reload_in_progress || reload_completed)
6386 && ( reg_mentioned_p (frame_pointer_rtx, op)
6387 || reg_mentioned_p (arg_pointer_rtx, op)
6388 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6389 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6390 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6391 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6392 return FALSE;
6394 /* Constants are converted into offsets from labels. */
6395 if (GET_CODE (op) != MEM)
6396 return FALSE;
6398 ind = XEXP (op, 0);
6400 if (reload_completed
6401 && (GET_CODE (ind) == LABEL_REF
6402 || (GET_CODE (ind) == CONST
6403 && GET_CODE (XEXP (ind, 0)) == PLUS
6404 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6405 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6406 return TRUE;
6408 /* Match: (mem (reg)). */
6409 if (GET_CODE (ind) == REG)
6410 return arm_address_register_rtx_p (ind, 0);
6412 /* Allow post-increment with Neon registers. */
6413 if (!core && GET_CODE (ind) == POST_INC)
6414 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6416 #if 0
6417 /* FIXME: We can support this too if we use VLD1/VST1. */
6418 if (!core
6419 && GET_CODE (ind) == POST_MODIFY
6420 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6421 && GET_CODE (XEXP (ind, 1)) == PLUS
6422 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6423 ind = XEXP (ind, 1);
6424 #endif
6426 /* Match:
6427 (plus (reg)
6428 (const)). */
6429 if (!core
6430 && GET_CODE (ind) == PLUS
6431 && GET_CODE (XEXP (ind, 0)) == REG
6432 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6433 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6434 && INTVAL (XEXP (ind, 1)) > -1024
6435 && INTVAL (XEXP (ind, 1)) < 1016
6436 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6437 return TRUE;
6439 return FALSE;
6442 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6443 type. */
6445 neon_struct_mem_operand (rtx op)
6447 rtx ind;
6449 /* Reject eliminable registers. */
6450 if (! (reload_in_progress || reload_completed)
6451 && ( reg_mentioned_p (frame_pointer_rtx, op)
6452 || reg_mentioned_p (arg_pointer_rtx, op)
6453 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6454 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6455 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6456 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6457 return FALSE;
6459 /* Constants are converted into offsets from labels. */
6460 if (GET_CODE (op) != MEM)
6461 return FALSE;
6463 ind = XEXP (op, 0);
6465 if (reload_completed
6466 && (GET_CODE (ind) == LABEL_REF
6467 || (GET_CODE (ind) == CONST
6468 && GET_CODE (XEXP (ind, 0)) == PLUS
6469 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6470 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6471 return TRUE;
6473 /* Match: (mem (reg)). */
6474 if (GET_CODE (ind) == REG)
6475 return arm_address_register_rtx_p (ind, 0);
6477 return FALSE;
6480 /* Return true if X is a register that will be eliminated later on. */
6482 arm_eliminable_register (rtx x)
6484 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
6485 || REGNO (x) == ARG_POINTER_REGNUM
6486 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
6487 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
6490 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6491 coprocessor registers. Otherwise return NO_REGS. */
6493 enum reg_class
6494 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
6496 if (TARGET_NEON
6497 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6498 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6499 && neon_vector_mem_operand (x, FALSE))
6500 return NO_REGS;
6502 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
6503 return NO_REGS;
6505 return GENERAL_REGS;
6508 /* Values which must be returned in the most-significant end of the return
6509 register. */
6511 static bool
6512 arm_return_in_msb (const_tree valtype)
6514 return (TARGET_AAPCS_BASED
6515 && BYTES_BIG_ENDIAN
6516 && (AGGREGATE_TYPE_P (valtype)
6517 || TREE_CODE (valtype) == COMPLEX_TYPE));
6520 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6521 Use by the Cirrus Maverick code which has to workaround
6522 a hardware bug triggered by such instructions. */
6523 static bool
6524 arm_memory_load_p (rtx insn)
6526 rtx body, lhs, rhs;;
6528 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
6529 return false;
6531 body = PATTERN (insn);
6533 if (GET_CODE (body) != SET)
6534 return false;
6536 lhs = XEXP (body, 0);
6537 rhs = XEXP (body, 1);
6539 lhs = REG_OR_SUBREG_RTX (lhs);
6541 /* If the destination is not a general purpose
6542 register we do not have to worry. */
6543 if (GET_CODE (lhs) != REG
6544 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
6545 return false;
6547 /* As well as loads from memory we also have to react
6548 to loads of invalid constants which will be turned
6549 into loads from the minipool. */
6550 return (GET_CODE (rhs) == MEM
6551 || GET_CODE (rhs) == SYMBOL_REF
6552 || note_invalid_constants (insn, -1, false));
6555 /* Return TRUE if INSN is a Cirrus instruction. */
6556 static bool
6557 arm_cirrus_insn_p (rtx insn)
6559 enum attr_cirrus attr;
6561 /* get_attr cannot accept USE or CLOBBER. */
6562 if (!insn
6563 || GET_CODE (insn) != INSN
6564 || GET_CODE (PATTERN (insn)) == USE
6565 || GET_CODE (PATTERN (insn)) == CLOBBER)
6566 return 0;
6568 attr = get_attr_cirrus (insn);
6570 return attr != CIRRUS_NOT;
6573 /* Cirrus reorg for invalid instruction combinations. */
6574 static void
6575 cirrus_reorg (rtx first)
6577 enum attr_cirrus attr;
6578 rtx body = PATTERN (first);
6579 rtx t;
6580 int nops;
6582 /* Any branch must be followed by 2 non Cirrus instructions. */
6583 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
6585 nops = 0;
6586 t = next_nonnote_insn (first);
6588 if (arm_cirrus_insn_p (t))
6589 ++ nops;
6591 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6592 ++ nops;
6594 while (nops --)
6595 emit_insn_after (gen_nop (), first);
6597 return;
6600 /* (float (blah)) is in parallel with a clobber. */
6601 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
6602 body = XVECEXP (body, 0, 0);
6604 if (GET_CODE (body) == SET)
6606 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
6608 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6609 be followed by a non Cirrus insn. */
6610 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
6612 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
6613 emit_insn_after (gen_nop (), first);
6615 return;
6617 else if (arm_memory_load_p (first))
6619 unsigned int arm_regno;
6621 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6622 ldr/cfmv64hr combination where the Rd field is the same
6623 in both instructions must be split with a non Cirrus
6624 insn. Example:
6626 ldr r0, blah
6628 cfmvsr mvf0, r0. */
6630 /* Get Arm register number for ldr insn. */
6631 if (GET_CODE (lhs) == REG)
6632 arm_regno = REGNO (lhs);
6633 else
6635 gcc_assert (GET_CODE (rhs) == REG);
6636 arm_regno = REGNO (rhs);
6639 /* Next insn. */
6640 first = next_nonnote_insn (first);
6642 if (! arm_cirrus_insn_p (first))
6643 return;
6645 body = PATTERN (first);
6647 /* (float (blah)) is in parallel with a clobber. */
6648 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
6649 body = XVECEXP (body, 0, 0);
6651 if (GET_CODE (body) == FLOAT)
6652 body = XEXP (body, 0);
6654 if (get_attr_cirrus (first) == CIRRUS_MOVE
6655 && GET_CODE (XEXP (body, 1)) == REG
6656 && arm_regno == REGNO (XEXP (body, 1)))
6657 emit_insn_after (gen_nop (), first);
6659 return;
6663 /* get_attr cannot accept USE or CLOBBER. */
6664 if (!first
6665 || GET_CODE (first) != INSN
6666 || GET_CODE (PATTERN (first)) == USE
6667 || GET_CODE (PATTERN (first)) == CLOBBER)
6668 return;
6670 attr = get_attr_cirrus (first);
6672 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6673 must be followed by a non-coprocessor instruction. */
6674 if (attr == CIRRUS_COMPARE)
6676 nops = 0;
6678 t = next_nonnote_insn (first);
6680 if (arm_cirrus_insn_p (t))
6681 ++ nops;
6683 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6684 ++ nops;
6686 while (nops --)
6687 emit_insn_after (gen_nop (), first);
6689 return;
6693 /* Return TRUE if X references a SYMBOL_REF. */
6695 symbol_mentioned_p (rtx x)
6697 const char * fmt;
6698 int i;
6700 if (GET_CODE (x) == SYMBOL_REF)
6701 return 1;
6703 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6704 are constant offsets, not symbols. */
6705 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6706 return 0;
6708 fmt = GET_RTX_FORMAT (GET_CODE (x));
6710 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6712 if (fmt[i] == 'E')
6714 int j;
6716 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6717 if (symbol_mentioned_p (XVECEXP (x, i, j)))
6718 return 1;
6720 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
6721 return 1;
6724 return 0;
6727 /* Return TRUE if X references a LABEL_REF. */
6729 label_mentioned_p (rtx x)
6731 const char * fmt;
6732 int i;
6734 if (GET_CODE (x) == LABEL_REF)
6735 return 1;
6737 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6738 instruction, but they are constant offsets, not symbols. */
6739 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6740 return 0;
6742 fmt = GET_RTX_FORMAT (GET_CODE (x));
6743 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6745 if (fmt[i] == 'E')
6747 int j;
6749 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6750 if (label_mentioned_p (XVECEXP (x, i, j)))
6751 return 1;
6753 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
6754 return 1;
6757 return 0;
6761 tls_mentioned_p (rtx x)
6763 switch (GET_CODE (x))
6765 case CONST:
6766 return tls_mentioned_p (XEXP (x, 0));
6768 case UNSPEC:
6769 if (XINT (x, 1) == UNSPEC_TLS)
6770 return 1;
6772 default:
6773 return 0;
6777 /* Must not copy a SET whose source operand is PC-relative. */
6779 static bool
6780 arm_cannot_copy_insn_p (rtx insn)
6782 rtx pat = PATTERN (insn);
6784 if (GET_CODE (pat) == SET)
6786 rtx rhs = SET_SRC (pat);
6788 if (GET_CODE (rhs) == UNSPEC
6789 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
6790 return TRUE;
6792 if (GET_CODE (rhs) == MEM
6793 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
6794 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
6795 return TRUE;
6798 return FALSE;
6801 enum rtx_code
6802 minmax_code (rtx x)
6804 enum rtx_code code = GET_CODE (x);
6806 switch (code)
6808 case SMAX:
6809 return GE;
6810 case SMIN:
6811 return LE;
6812 case UMIN:
6813 return LEU;
6814 case UMAX:
6815 return GEU;
6816 default:
6817 gcc_unreachable ();
6821 /* Return 1 if memory locations are adjacent. */
6823 adjacent_mem_locations (rtx a, rtx b)
6825 /* We don't guarantee to preserve the order of these memory refs. */
6826 if (volatile_refs_p (a) || volatile_refs_p (b))
6827 return 0;
6829 if ((GET_CODE (XEXP (a, 0)) == REG
6830 || (GET_CODE (XEXP (a, 0)) == PLUS
6831 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
6832 && (GET_CODE (XEXP (b, 0)) == REG
6833 || (GET_CODE (XEXP (b, 0)) == PLUS
6834 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
6836 HOST_WIDE_INT val0 = 0, val1 = 0;
6837 rtx reg0, reg1;
6838 int val_diff;
6840 if (GET_CODE (XEXP (a, 0)) == PLUS)
6842 reg0 = XEXP (XEXP (a, 0), 0);
6843 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
6845 else
6846 reg0 = XEXP (a, 0);
6848 if (GET_CODE (XEXP (b, 0)) == PLUS)
6850 reg1 = XEXP (XEXP (b, 0), 0);
6851 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
6853 else
6854 reg1 = XEXP (b, 0);
6856 /* Don't accept any offset that will require multiple
6857 instructions to handle, since this would cause the
6858 arith_adjacentmem pattern to output an overlong sequence. */
6859 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
6860 return 0;
6862 /* Don't allow an eliminable register: register elimination can make
6863 the offset too large. */
6864 if (arm_eliminable_register (reg0))
6865 return 0;
6867 val_diff = val1 - val0;
6869 if (arm_ld_sched)
6871 /* If the target has load delay slots, then there's no benefit
6872 to using an ldm instruction unless the offset is zero and
6873 we are optimizing for size. */
6874 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
6875 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
6876 && (val_diff == 4 || val_diff == -4));
6879 return ((REGNO (reg0) == REGNO (reg1))
6880 && (val_diff == 4 || val_diff == -4));
6883 return 0;
6887 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6888 HOST_WIDE_INT *load_offset)
6890 int unsorted_regs[4];
6891 HOST_WIDE_INT unsorted_offsets[4];
6892 int order[4];
6893 int base_reg = -1;
6894 int i;
6896 /* Can only handle 2, 3, or 4 insns at present,
6897 though could be easily extended if required. */
6898 gcc_assert (nops >= 2 && nops <= 4);
6900 /* Loop over the operands and check that the memory references are
6901 suitable (i.e. immediate offsets from the same base register). At
6902 the same time, extract the target register, and the memory
6903 offsets. */
6904 for (i = 0; i < nops; i++)
6906 rtx reg;
6907 rtx offset;
6909 /* Convert a subreg of a mem into the mem itself. */
6910 if (GET_CODE (operands[nops + i]) == SUBREG)
6911 operands[nops + i] = alter_subreg (operands + (nops + i));
6913 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6915 /* Don't reorder volatile memory references; it doesn't seem worth
6916 looking for the case where the order is ok anyway. */
6917 if (MEM_VOLATILE_P (operands[nops + i]))
6918 return 0;
6920 offset = const0_rtx;
6922 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6923 || (GET_CODE (reg) == SUBREG
6924 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6925 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6926 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6927 == REG)
6928 || (GET_CODE (reg) == SUBREG
6929 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6930 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6931 == CONST_INT)))
6933 if (i == 0)
6935 base_reg = REGNO (reg);
6936 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6937 ? REGNO (operands[i])
6938 : REGNO (SUBREG_REG (operands[i])));
6939 order[0] = 0;
6941 else
6943 if (base_reg != (int) REGNO (reg))
6944 /* Not addressed from the same base register. */
6945 return 0;
6947 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6948 ? REGNO (operands[i])
6949 : REGNO (SUBREG_REG (operands[i])));
6950 if (unsorted_regs[i] < unsorted_regs[order[0]])
6951 order[0] = i;
6954 /* If it isn't an integer register, or if it overwrites the
6955 base register but isn't the last insn in the list, then
6956 we can't do this. */
6957 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
6958 || (i != nops - 1 && unsorted_regs[i] == base_reg))
6959 return 0;
6961 unsorted_offsets[i] = INTVAL (offset);
6963 else
6964 /* Not a suitable memory address. */
6965 return 0;
6968 /* All the useful information has now been extracted from the
6969 operands into unsorted_regs and unsorted_offsets; additionally,
6970 order[0] has been set to the lowest numbered register in the
6971 list. Sort the registers into order, and check that the memory
6972 offsets are ascending and adjacent. */
6974 for (i = 1; i < nops; i++)
6976 int j;
6978 order[i] = order[i - 1];
6979 for (j = 0; j < nops; j++)
6980 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6981 && (order[i] == order[i - 1]
6982 || unsorted_regs[j] < unsorted_regs[order[i]]))
6983 order[i] = j;
6985 /* Have we found a suitable register? if not, one must be used more
6986 than once. */
6987 if (order[i] == order[i - 1])
6988 return 0;
6990 /* Is the memory address adjacent and ascending? */
6991 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
6992 return 0;
6995 if (base)
6997 *base = base_reg;
6999 for (i = 0; i < nops; i++)
7000 regs[i] = unsorted_regs[order[i]];
7002 *load_offset = unsorted_offsets[order[0]];
7005 if (unsorted_offsets[order[0]] == 0)
7006 return 1; /* ldmia */
7008 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
7009 return 2; /* ldmib */
7011 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
7012 return 3; /* ldmda */
7014 if (unsorted_offsets[order[nops - 1]] == -4)
7015 return 4; /* ldmdb */
7017 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7018 if the offset isn't small enough. The reason 2 ldrs are faster
7019 is because these ARMs are able to do more than one cache access
7020 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7021 whilst the ARM8 has a double bandwidth cache. This means that
7022 these cores can do both an instruction fetch and a data fetch in
7023 a single cycle, so the trick of calculating the address into a
7024 scratch register (one of the result regs) and then doing a load
7025 multiple actually becomes slower (and no smaller in code size).
7026 That is the transformation
7028 ldr rd1, [rbase + offset]
7029 ldr rd2, [rbase + offset + 4]
7033 add rd1, rbase, offset
7034 ldmia rd1, {rd1, rd2}
7036 produces worse code -- '3 cycles + any stalls on rd2' instead of
7037 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7038 access per cycle, the first sequence could never complete in less
7039 than 6 cycles, whereas the ldm sequence would only take 5 and
7040 would make better use of sequential accesses if not hitting the
7041 cache.
7043 We cheat here and test 'arm_ld_sched' which we currently know to
7044 only be true for the ARM8, ARM9 and StrongARM. If this ever
7045 changes, then the test below needs to be reworked. */
7046 if (nops == 2 && arm_ld_sched)
7047 return 0;
7049 /* Can't do it without setting up the offset, only do this if it takes
7050 no more than one insn. */
7051 return (const_ok_for_arm (unsorted_offsets[order[0]])
7052 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7055 const char *
7056 emit_ldm_seq (rtx *operands, int nops)
7058 int regs[4];
7059 int base_reg;
7060 HOST_WIDE_INT offset;
7061 char buf[100];
7062 int i;
7064 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7066 case 1:
7067 strcpy (buf, "ldm%(ia%)\t");
7068 break;
7070 case 2:
7071 strcpy (buf, "ldm%(ib%)\t");
7072 break;
7074 case 3:
7075 strcpy (buf, "ldm%(da%)\t");
7076 break;
7078 case 4:
7079 strcpy (buf, "ldm%(db%)\t");
7080 break;
7082 case 5:
7083 if (offset >= 0)
7084 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7085 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7086 (long) offset);
7087 else
7088 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7089 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7090 (long) -offset);
7091 output_asm_insn (buf, operands);
7092 base_reg = regs[0];
7093 strcpy (buf, "ldm%(ia%)\t");
7094 break;
7096 default:
7097 gcc_unreachable ();
7100 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7101 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7103 for (i = 1; i < nops; i++)
7104 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7105 reg_names[regs[i]]);
7107 strcat (buf, "}\t%@ phole ldm");
7109 output_asm_insn (buf, operands);
7110 return "";
7114 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7115 HOST_WIDE_INT * load_offset)
7117 int unsorted_regs[4];
7118 HOST_WIDE_INT unsorted_offsets[4];
7119 int order[4];
7120 int base_reg = -1;
7121 int i;
7123 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7124 extended if required. */
7125 gcc_assert (nops >= 2 && nops <= 4);
7127 /* Loop over the operands and check that the memory references are
7128 suitable (i.e. immediate offsets from the same base register). At
7129 the same time, extract the target register, and the memory
7130 offsets. */
7131 for (i = 0; i < nops; i++)
7133 rtx reg;
7134 rtx offset;
7136 /* Convert a subreg of a mem into the mem itself. */
7137 if (GET_CODE (operands[nops + i]) == SUBREG)
7138 operands[nops + i] = alter_subreg (operands + (nops + i));
7140 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7142 /* Don't reorder volatile memory references; it doesn't seem worth
7143 looking for the case where the order is ok anyway. */
7144 if (MEM_VOLATILE_P (operands[nops + i]))
7145 return 0;
7147 offset = const0_rtx;
7149 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7150 || (GET_CODE (reg) == SUBREG
7151 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7152 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7153 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7154 == REG)
7155 || (GET_CODE (reg) == SUBREG
7156 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7157 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7158 == CONST_INT)))
7160 if (i == 0)
7162 base_reg = REGNO (reg);
7163 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7164 ? REGNO (operands[i])
7165 : REGNO (SUBREG_REG (operands[i])));
7166 order[0] = 0;
7168 else
7170 if (base_reg != (int) REGNO (reg))
7171 /* Not addressed from the same base register. */
7172 return 0;
7174 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7175 ? REGNO (operands[i])
7176 : REGNO (SUBREG_REG (operands[i])));
7177 if (unsorted_regs[i] < unsorted_regs[order[0]])
7178 order[0] = i;
7181 /* If it isn't an integer register, then we can't do this. */
7182 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7183 return 0;
7185 unsorted_offsets[i] = INTVAL (offset);
7187 else
7188 /* Not a suitable memory address. */
7189 return 0;
7192 /* All the useful information has now been extracted from the
7193 operands into unsorted_regs and unsorted_offsets; additionally,
7194 order[0] has been set to the lowest numbered register in the
7195 list. Sort the registers into order, and check that the memory
7196 offsets are ascending and adjacent. */
7198 for (i = 1; i < nops; i++)
7200 int j;
7202 order[i] = order[i - 1];
7203 for (j = 0; j < nops; j++)
7204 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7205 && (order[i] == order[i - 1]
7206 || unsorted_regs[j] < unsorted_regs[order[i]]))
7207 order[i] = j;
7209 /* Have we found a suitable register? if not, one must be used more
7210 than once. */
7211 if (order[i] == order[i - 1])
7212 return 0;
7214 /* Is the memory address adjacent and ascending? */
7215 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7216 return 0;
7219 if (base)
7221 *base = base_reg;
7223 for (i = 0; i < nops; i++)
7224 regs[i] = unsorted_regs[order[i]];
7226 *load_offset = unsorted_offsets[order[0]];
7229 if (unsorted_offsets[order[0]] == 0)
7230 return 1; /* stmia */
7232 if (unsorted_offsets[order[0]] == 4)
7233 return 2; /* stmib */
7235 if (unsorted_offsets[order[nops - 1]] == 0)
7236 return 3; /* stmda */
7238 if (unsorted_offsets[order[nops - 1]] == -4)
7239 return 4; /* stmdb */
7241 return 0;
7244 const char *
7245 emit_stm_seq (rtx *operands, int nops)
7247 int regs[4];
7248 int base_reg;
7249 HOST_WIDE_INT offset;
7250 char buf[100];
7251 int i;
7253 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7255 case 1:
7256 strcpy (buf, "stm%(ia%)\t");
7257 break;
7259 case 2:
7260 strcpy (buf, "stm%(ib%)\t");
7261 break;
7263 case 3:
7264 strcpy (buf, "stm%(da%)\t");
7265 break;
7267 case 4:
7268 strcpy (buf, "stm%(db%)\t");
7269 break;
7271 default:
7272 gcc_unreachable ();
7275 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7276 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7278 for (i = 1; i < nops; i++)
7279 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7280 reg_names[regs[i]]);
7282 strcat (buf, "}\t%@ phole stm");
7284 output_asm_insn (buf, operands);
7285 return "";
7288 /* Routines for use in generating RTL. */
7291 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7292 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7294 HOST_WIDE_INT offset = *offsetp;
7295 int i = 0, j;
7296 rtx result;
7297 int sign = up ? 1 : -1;
7298 rtx mem, addr;
7300 /* XScale has load-store double instructions, but they have stricter
7301 alignment requirements than load-store multiple, so we cannot
7302 use them.
7304 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7305 the pipeline until completion.
7307 NREGS CYCLES
7313 An ldr instruction takes 1-3 cycles, but does not block the
7314 pipeline.
7316 NREGS CYCLES
7317 1 1-3
7318 2 2-6
7319 3 3-9
7320 4 4-12
7322 Best case ldr will always win. However, the more ldr instructions
7323 we issue, the less likely we are to be able to schedule them well.
7324 Using ldr instructions also increases code size.
7326 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7327 for counts of 3 or 4 regs. */
7328 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7330 rtx seq;
7332 start_sequence ();
7334 for (i = 0; i < count; i++)
7336 addr = plus_constant (from, i * 4 * sign);
7337 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7338 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7339 offset += 4 * sign;
7342 if (write_back)
7344 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7345 *offsetp = offset;
7348 seq = get_insns ();
7349 end_sequence ();
7351 return seq;
7354 result = gen_rtx_PARALLEL (VOIDmode,
7355 rtvec_alloc (count + (write_back ? 1 : 0)));
7356 if (write_back)
7358 XVECEXP (result, 0, 0)
7359 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7360 i = 1;
7361 count++;
7364 for (j = 0; i < count; i++, j++)
7366 addr = plus_constant (from, j * 4 * sign);
7367 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7368 XVECEXP (result, 0, i)
7369 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7370 offset += 4 * sign;
7373 if (write_back)
7374 *offsetp = offset;
7376 return result;
7380 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7381 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7383 HOST_WIDE_INT offset = *offsetp;
7384 int i = 0, j;
7385 rtx result;
7386 int sign = up ? 1 : -1;
7387 rtx mem, addr;
7389 /* See arm_gen_load_multiple for discussion of
7390 the pros/cons of ldm/stm usage for XScale. */
7391 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7393 rtx seq;
7395 start_sequence ();
7397 for (i = 0; i < count; i++)
7399 addr = plus_constant (to, i * 4 * sign);
7400 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7401 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7402 offset += 4 * sign;
7405 if (write_back)
7407 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7408 *offsetp = offset;
7411 seq = get_insns ();
7412 end_sequence ();
7414 return seq;
7417 result = gen_rtx_PARALLEL (VOIDmode,
7418 rtvec_alloc (count + (write_back ? 1 : 0)));
7419 if (write_back)
7421 XVECEXP (result, 0, 0)
7422 = gen_rtx_SET (VOIDmode, to,
7423 plus_constant (to, count * 4 * sign));
7424 i = 1;
7425 count++;
7428 for (j = 0; i < count; i++, j++)
7430 addr = plus_constant (to, j * 4 * sign);
7431 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7432 XVECEXP (result, 0, i)
7433 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7434 offset += 4 * sign;
7437 if (write_back)
7438 *offsetp = offset;
7440 return result;
7444 arm_gen_movmemqi (rtx *operands)
7446 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7447 HOST_WIDE_INT srcoffset, dstoffset;
7448 int i;
7449 rtx src, dst, srcbase, dstbase;
7450 rtx part_bytes_reg = NULL;
7451 rtx mem;
7453 if (GET_CODE (operands[2]) != CONST_INT
7454 || GET_CODE (operands[3]) != CONST_INT
7455 || INTVAL (operands[2]) > 64
7456 || INTVAL (operands[3]) & 3)
7457 return 0;
7459 dstbase = operands[0];
7460 srcbase = operands[1];
7462 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
7463 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
7465 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
7466 out_words_to_go = INTVAL (operands[2]) / 4;
7467 last_bytes = INTVAL (operands[2]) & 3;
7468 dstoffset = srcoffset = 0;
7470 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
7471 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
7473 for (i = 0; in_words_to_go >= 2; i+=4)
7475 if (in_words_to_go > 4)
7476 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
7477 srcbase, &srcoffset));
7478 else
7479 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
7480 FALSE, srcbase, &srcoffset));
7482 if (out_words_to_go)
7484 if (out_words_to_go > 4)
7485 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
7486 dstbase, &dstoffset));
7487 else if (out_words_to_go != 1)
7488 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
7489 dst, TRUE,
7490 (last_bytes == 0
7491 ? FALSE : TRUE),
7492 dstbase, &dstoffset));
7493 else
7495 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7496 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
7497 if (last_bytes != 0)
7499 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
7500 dstoffset += 4;
7505 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
7506 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
7509 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7510 if (out_words_to_go)
7512 rtx sreg;
7514 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7515 sreg = copy_to_reg (mem);
7517 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7518 emit_move_insn (mem, sreg);
7519 in_words_to_go--;
7521 gcc_assert (!in_words_to_go); /* Sanity check */
7524 if (in_words_to_go)
7526 gcc_assert (in_words_to_go > 0);
7528 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7529 part_bytes_reg = copy_to_mode_reg (SImode, mem);
7532 gcc_assert (!last_bytes || part_bytes_reg);
7534 if (BYTES_BIG_ENDIAN && last_bytes)
7536 rtx tmp = gen_reg_rtx (SImode);
7538 /* The bytes we want are in the top end of the word. */
7539 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
7540 GEN_INT (8 * (4 - last_bytes))));
7541 part_bytes_reg = tmp;
7543 while (last_bytes)
7545 mem = adjust_automodify_address (dstbase, QImode,
7546 plus_constant (dst, last_bytes - 1),
7547 dstoffset + last_bytes - 1);
7548 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7550 if (--last_bytes)
7552 tmp = gen_reg_rtx (SImode);
7553 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
7554 part_bytes_reg = tmp;
7559 else
7561 if (last_bytes > 1)
7563 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
7564 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
7565 last_bytes -= 2;
7566 if (last_bytes)
7568 rtx tmp = gen_reg_rtx (SImode);
7569 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
7570 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
7571 part_bytes_reg = tmp;
7572 dstoffset += 2;
7576 if (last_bytes)
7578 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
7579 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7583 return 1;
7586 /* Select a dominance comparison mode if possible for a test of the general
7587 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7588 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7589 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7590 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7591 In all cases OP will be either EQ or NE, but we don't need to know which
7592 here. If we are unable to support a dominance comparison we return
7593 CC mode. This will then fail to match for the RTL expressions that
7594 generate this call. */
7595 enum machine_mode
7596 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
7598 enum rtx_code cond1, cond2;
7599 int swapped = 0;
7601 /* Currently we will probably get the wrong result if the individual
7602 comparisons are not simple. This also ensures that it is safe to
7603 reverse a comparison if necessary. */
7604 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
7605 != CCmode)
7606 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
7607 != CCmode))
7608 return CCmode;
7610 /* The if_then_else variant of this tests the second condition if the
7611 first passes, but is true if the first fails. Reverse the first
7612 condition to get a true "inclusive-or" expression. */
7613 if (cond_or == DOM_CC_NX_OR_Y)
7614 cond1 = reverse_condition (cond1);
7616 /* If the comparisons are not equal, and one doesn't dominate the other,
7617 then we can't do this. */
7618 if (cond1 != cond2
7619 && !comparison_dominates_p (cond1, cond2)
7620 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
7621 return CCmode;
7623 if (swapped)
7625 enum rtx_code temp = cond1;
7626 cond1 = cond2;
7627 cond2 = temp;
7630 switch (cond1)
7632 case EQ:
7633 if (cond_or == DOM_CC_X_AND_Y)
7634 return CC_DEQmode;
7636 switch (cond2)
7638 case EQ: return CC_DEQmode;
7639 case LE: return CC_DLEmode;
7640 case LEU: return CC_DLEUmode;
7641 case GE: return CC_DGEmode;
7642 case GEU: return CC_DGEUmode;
7643 default: gcc_unreachable ();
7646 case LT:
7647 if (cond_or == DOM_CC_X_AND_Y)
7648 return CC_DLTmode;
7650 switch (cond2)
7652 case LT:
7653 return CC_DLTmode;
7654 case LE:
7655 return CC_DLEmode;
7656 case NE:
7657 return CC_DNEmode;
7658 default:
7659 gcc_unreachable ();
7662 case GT:
7663 if (cond_or == DOM_CC_X_AND_Y)
7664 return CC_DGTmode;
7666 switch (cond2)
7668 case GT:
7669 return CC_DGTmode;
7670 case GE:
7671 return CC_DGEmode;
7672 case NE:
7673 return CC_DNEmode;
7674 default:
7675 gcc_unreachable ();
7678 case LTU:
7679 if (cond_or == DOM_CC_X_AND_Y)
7680 return CC_DLTUmode;
7682 switch (cond2)
7684 case LTU:
7685 return CC_DLTUmode;
7686 case LEU:
7687 return CC_DLEUmode;
7688 case NE:
7689 return CC_DNEmode;
7690 default:
7691 gcc_unreachable ();
7694 case GTU:
7695 if (cond_or == DOM_CC_X_AND_Y)
7696 return CC_DGTUmode;
7698 switch (cond2)
7700 case GTU:
7701 return CC_DGTUmode;
7702 case GEU:
7703 return CC_DGEUmode;
7704 case NE:
7705 return CC_DNEmode;
7706 default:
7707 gcc_unreachable ();
7710 /* The remaining cases only occur when both comparisons are the
7711 same. */
7712 case NE:
7713 gcc_assert (cond1 == cond2);
7714 return CC_DNEmode;
7716 case LE:
7717 gcc_assert (cond1 == cond2);
7718 return CC_DLEmode;
7720 case GE:
7721 gcc_assert (cond1 == cond2);
7722 return CC_DGEmode;
7724 case LEU:
7725 gcc_assert (cond1 == cond2);
7726 return CC_DLEUmode;
7728 case GEU:
7729 gcc_assert (cond1 == cond2);
7730 return CC_DGEUmode;
7732 default:
7733 gcc_unreachable ();
7737 enum machine_mode
7738 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
7740 /* All floating point compares return CCFP if it is an equality
7741 comparison, and CCFPE otherwise. */
7742 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
7744 switch (op)
7746 case EQ:
7747 case NE:
7748 case UNORDERED:
7749 case ORDERED:
7750 case UNLT:
7751 case UNLE:
7752 case UNGT:
7753 case UNGE:
7754 case UNEQ:
7755 case LTGT:
7756 return CCFPmode;
7758 case LT:
7759 case LE:
7760 case GT:
7761 case GE:
7762 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
7763 return CCFPmode;
7764 return CCFPEmode;
7766 default:
7767 gcc_unreachable ();
7771 /* A compare with a shifted operand. Because of canonicalization, the
7772 comparison will have to be swapped when we emit the assembler. */
7773 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
7774 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7775 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
7776 || GET_CODE (x) == ROTATERT))
7777 return CC_SWPmode;
7779 /* This operation is performed swapped, but since we only rely on the Z
7780 flag we don't need an additional mode. */
7781 if (GET_MODE (y) == SImode && REG_P (y)
7782 && GET_CODE (x) == NEG
7783 && (op == EQ || op == NE))
7784 return CC_Zmode;
7786 /* This is a special case that is used by combine to allow a
7787 comparison of a shifted byte load to be split into a zero-extend
7788 followed by a comparison of the shifted integer (only valid for
7789 equalities and unsigned inequalities). */
7790 if (GET_MODE (x) == SImode
7791 && GET_CODE (x) == ASHIFT
7792 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
7793 && GET_CODE (XEXP (x, 0)) == SUBREG
7794 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
7795 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
7796 && (op == EQ || op == NE
7797 || op == GEU || op == GTU || op == LTU || op == LEU)
7798 && GET_CODE (y) == CONST_INT)
7799 return CC_Zmode;
7801 /* A construct for a conditional compare, if the false arm contains
7802 0, then both conditions must be true, otherwise either condition
7803 must be true. Not all conditions are possible, so CCmode is
7804 returned if it can't be done. */
7805 if (GET_CODE (x) == IF_THEN_ELSE
7806 && (XEXP (x, 2) == const0_rtx
7807 || XEXP (x, 2) == const1_rtx)
7808 && COMPARISON_P (XEXP (x, 0))
7809 && COMPARISON_P (XEXP (x, 1)))
7810 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7811 INTVAL (XEXP (x, 2)));
7813 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7814 if (GET_CODE (x) == AND
7815 && COMPARISON_P (XEXP (x, 0))
7816 && COMPARISON_P (XEXP (x, 1)))
7817 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7818 DOM_CC_X_AND_Y);
7820 if (GET_CODE (x) == IOR
7821 && COMPARISON_P (XEXP (x, 0))
7822 && COMPARISON_P (XEXP (x, 1)))
7823 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7824 DOM_CC_X_OR_Y);
7826 /* An operation (on Thumb) where we want to test for a single bit.
7827 This is done by shifting that bit up into the top bit of a
7828 scratch register; we can then branch on the sign bit. */
7829 if (TARGET_THUMB1
7830 && GET_MODE (x) == SImode
7831 && (op == EQ || op == NE)
7832 && GET_CODE (x) == ZERO_EXTRACT
7833 && XEXP (x, 1) == const1_rtx)
7834 return CC_Nmode;
7836 /* An operation that sets the condition codes as a side-effect, the
7837 V flag is not set correctly, so we can only use comparisons where
7838 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7839 instead.) */
7840 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7841 if (GET_MODE (x) == SImode
7842 && y == const0_rtx
7843 && (op == EQ || op == NE || op == LT || op == GE)
7844 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
7845 || GET_CODE (x) == AND || GET_CODE (x) == IOR
7846 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
7847 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
7848 || GET_CODE (x) == LSHIFTRT
7849 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7850 || GET_CODE (x) == ROTATERT
7851 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
7852 return CC_NOOVmode;
7854 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
7855 return CC_Zmode;
7857 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
7858 && GET_CODE (x) == PLUS
7859 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
7860 return CC_Cmode;
7862 return CCmode;
7865 /* X and Y are two things to compare using CODE. Emit the compare insn and
7866 return the rtx for register 0 in the proper mode. FP means this is a
7867 floating point compare: I don't think that it is needed on the arm. */
7869 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
7871 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
7872 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
7874 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
7876 return cc_reg;
7879 /* Generate a sequence of insns that will generate the correct return
7880 address mask depending on the physical architecture that the program
7881 is running on. */
7883 arm_gen_return_addr_mask (void)
7885 rtx reg = gen_reg_rtx (Pmode);
7887 emit_insn (gen_return_addr_mask (reg));
7888 return reg;
7891 void
7892 arm_reload_in_hi (rtx *operands)
7894 rtx ref = operands[1];
7895 rtx base, scratch;
7896 HOST_WIDE_INT offset = 0;
7898 if (GET_CODE (ref) == SUBREG)
7900 offset = SUBREG_BYTE (ref);
7901 ref = SUBREG_REG (ref);
7904 if (GET_CODE (ref) == REG)
7906 /* We have a pseudo which has been spilt onto the stack; there
7907 are two cases here: the first where there is a simple
7908 stack-slot replacement and a second where the stack-slot is
7909 out of range, or is used as a subreg. */
7910 if (reg_equiv_mem[REGNO (ref)])
7912 ref = reg_equiv_mem[REGNO (ref)];
7913 base = find_replacement (&XEXP (ref, 0));
7915 else
7916 /* The slot is out of range, or was dressed up in a SUBREG. */
7917 base = reg_equiv_address[REGNO (ref)];
7919 else
7920 base = find_replacement (&XEXP (ref, 0));
7922 /* Handle the case where the address is too complex to be offset by 1. */
7923 if (GET_CODE (base) == MINUS
7924 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
7926 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7928 emit_set_insn (base_plus, base);
7929 base = base_plus;
7931 else if (GET_CODE (base) == PLUS)
7933 /* The addend must be CONST_INT, or we would have dealt with it above. */
7934 HOST_WIDE_INT hi, lo;
7936 offset += INTVAL (XEXP (base, 1));
7937 base = XEXP (base, 0);
7939 /* Rework the address into a legal sequence of insns. */
7940 /* Valid range for lo is -4095 -> 4095 */
7941 lo = (offset >= 0
7942 ? (offset & 0xfff)
7943 : -((-offset) & 0xfff));
7945 /* Corner case, if lo is the max offset then we would be out of range
7946 once we have added the additional 1 below, so bump the msb into the
7947 pre-loading insn(s). */
7948 if (lo == 4095)
7949 lo &= 0x7ff;
7951 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7952 ^ (HOST_WIDE_INT) 0x80000000)
7953 - (HOST_WIDE_INT) 0x80000000);
7955 gcc_assert (hi + lo == offset);
7957 if (hi != 0)
7959 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7961 /* Get the base address; addsi3 knows how to handle constants
7962 that require more than one insn. */
7963 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7964 base = base_plus;
7965 offset = lo;
7969 /* Operands[2] may overlap operands[0] (though it won't overlap
7970 operands[1]), that's why we asked for a DImode reg -- so we can
7971 use the bit that does not overlap. */
7972 if (REGNO (operands[2]) == REGNO (operands[0]))
7973 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7974 else
7975 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
7977 emit_insn (gen_zero_extendqisi2 (scratch,
7978 gen_rtx_MEM (QImode,
7979 plus_constant (base,
7980 offset))));
7981 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
7982 gen_rtx_MEM (QImode,
7983 plus_constant (base,
7984 offset + 1))));
7985 if (!BYTES_BIG_ENDIAN)
7986 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7987 gen_rtx_IOR (SImode,
7988 gen_rtx_ASHIFT
7989 (SImode,
7990 gen_rtx_SUBREG (SImode, operands[0], 0),
7991 GEN_INT (8)),
7992 scratch));
7993 else
7994 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7995 gen_rtx_IOR (SImode,
7996 gen_rtx_ASHIFT (SImode, scratch,
7997 GEN_INT (8)),
7998 gen_rtx_SUBREG (SImode, operands[0], 0)));
8001 /* Handle storing a half-word to memory during reload by synthesizing as two
8002 byte stores. Take care not to clobber the input values until after we
8003 have moved them somewhere safe. This code assumes that if the DImode
8004 scratch in operands[2] overlaps either the input value or output address
8005 in some way, then that value must die in this insn (we absolutely need
8006 two scratch registers for some corner cases). */
8007 void
8008 arm_reload_out_hi (rtx *operands)
8010 rtx ref = operands[0];
8011 rtx outval = operands[1];
8012 rtx base, scratch;
8013 HOST_WIDE_INT offset = 0;
8015 if (GET_CODE (ref) == SUBREG)
8017 offset = SUBREG_BYTE (ref);
8018 ref = SUBREG_REG (ref);
8021 if (GET_CODE (ref) == REG)
8023 /* We have a pseudo which has been spilt onto the stack; there
8024 are two cases here: the first where there is a simple
8025 stack-slot replacement and a second where the stack-slot is
8026 out of range, or is used as a subreg. */
8027 if (reg_equiv_mem[REGNO (ref)])
8029 ref = reg_equiv_mem[REGNO (ref)];
8030 base = find_replacement (&XEXP (ref, 0));
8032 else
8033 /* The slot is out of range, or was dressed up in a SUBREG. */
8034 base = reg_equiv_address[REGNO (ref)];
8036 else
8037 base = find_replacement (&XEXP (ref, 0));
8039 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8041 /* Handle the case where the address is too complex to be offset by 1. */
8042 if (GET_CODE (base) == MINUS
8043 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8045 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8047 /* Be careful not to destroy OUTVAL. */
8048 if (reg_overlap_mentioned_p (base_plus, outval))
8050 /* Updating base_plus might destroy outval, see if we can
8051 swap the scratch and base_plus. */
8052 if (!reg_overlap_mentioned_p (scratch, outval))
8054 rtx tmp = scratch;
8055 scratch = base_plus;
8056 base_plus = tmp;
8058 else
8060 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8062 /* Be conservative and copy OUTVAL into the scratch now,
8063 this should only be necessary if outval is a subreg
8064 of something larger than a word. */
8065 /* XXX Might this clobber base? I can't see how it can,
8066 since scratch is known to overlap with OUTVAL, and
8067 must be wider than a word. */
8068 emit_insn (gen_movhi (scratch_hi, outval));
8069 outval = scratch_hi;
8073 emit_set_insn (base_plus, base);
8074 base = base_plus;
8076 else if (GET_CODE (base) == PLUS)
8078 /* The addend must be CONST_INT, or we would have dealt with it above. */
8079 HOST_WIDE_INT hi, lo;
8081 offset += INTVAL (XEXP (base, 1));
8082 base = XEXP (base, 0);
8084 /* Rework the address into a legal sequence of insns. */
8085 /* Valid range for lo is -4095 -> 4095 */
8086 lo = (offset >= 0
8087 ? (offset & 0xfff)
8088 : -((-offset) & 0xfff));
8090 /* Corner case, if lo is the max offset then we would be out of range
8091 once we have added the additional 1 below, so bump the msb into the
8092 pre-loading insn(s). */
8093 if (lo == 4095)
8094 lo &= 0x7ff;
8096 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8097 ^ (HOST_WIDE_INT) 0x80000000)
8098 - (HOST_WIDE_INT) 0x80000000);
8100 gcc_assert (hi + lo == offset);
8102 if (hi != 0)
8104 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8106 /* Be careful not to destroy OUTVAL. */
8107 if (reg_overlap_mentioned_p (base_plus, outval))
8109 /* Updating base_plus might destroy outval, see if we
8110 can swap the scratch and base_plus. */
8111 if (!reg_overlap_mentioned_p (scratch, outval))
8113 rtx tmp = scratch;
8114 scratch = base_plus;
8115 base_plus = tmp;
8117 else
8119 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8121 /* Be conservative and copy outval into scratch now,
8122 this should only be necessary if outval is a
8123 subreg of something larger than a word. */
8124 /* XXX Might this clobber base? I can't see how it
8125 can, since scratch is known to overlap with
8126 outval. */
8127 emit_insn (gen_movhi (scratch_hi, outval));
8128 outval = scratch_hi;
8132 /* Get the base address; addsi3 knows how to handle constants
8133 that require more than one insn. */
8134 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8135 base = base_plus;
8136 offset = lo;
8140 if (BYTES_BIG_ENDIAN)
8142 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8143 plus_constant (base, offset + 1)),
8144 gen_lowpart (QImode, outval)));
8145 emit_insn (gen_lshrsi3 (scratch,
8146 gen_rtx_SUBREG (SImode, outval, 0),
8147 GEN_INT (8)));
8148 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8149 gen_lowpart (QImode, scratch)));
8151 else
8153 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8154 gen_lowpart (QImode, outval)));
8155 emit_insn (gen_lshrsi3 (scratch,
8156 gen_rtx_SUBREG (SImode, outval, 0),
8157 GEN_INT (8)));
8158 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8159 plus_constant (base, offset + 1)),
8160 gen_lowpart (QImode, scratch)));
8164 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8165 (padded to the size of a word) should be passed in a register. */
8167 static bool
8168 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8170 if (TARGET_AAPCS_BASED)
8171 return must_pass_in_stack_var_size (mode, type);
8172 else
8173 return must_pass_in_stack_var_size_or_pad (mode, type);
8177 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8178 Return true if an argument passed on the stack should be padded upwards,
8179 i.e. if the least-significant byte has useful data.
8180 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8181 aggregate types are placed in the lowest memory address. */
8183 bool
8184 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8186 if (!TARGET_AAPCS_BASED)
8187 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8189 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8190 return false;
8192 return true;
8196 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8197 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8198 byte of the register has useful data, and return the opposite if the
8199 most significant byte does.
8200 For AAPCS, small aggregates and small complex types are always padded
8201 upwards. */
8203 bool
8204 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8205 tree type, int first ATTRIBUTE_UNUSED)
8207 if (TARGET_AAPCS_BASED
8208 && BYTES_BIG_ENDIAN
8209 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8210 && int_size_in_bytes (type) <= 4)
8211 return true;
8213 /* Otherwise, use default padding. */
8214 return !BYTES_BIG_ENDIAN;
8218 /* Print a symbolic form of X to the debug file, F. */
8219 static void
8220 arm_print_value (FILE *f, rtx x)
8222 switch (GET_CODE (x))
8224 case CONST_INT:
8225 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8226 return;
8228 case CONST_DOUBLE:
8229 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8230 return;
8232 case CONST_VECTOR:
8234 int i;
8236 fprintf (f, "<");
8237 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8239 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8240 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8241 fputc (',', f);
8243 fprintf (f, ">");
8245 return;
8247 case CONST_STRING:
8248 fprintf (f, "\"%s\"", XSTR (x, 0));
8249 return;
8251 case SYMBOL_REF:
8252 fprintf (f, "`%s'", XSTR (x, 0));
8253 return;
8255 case LABEL_REF:
8256 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8257 return;
8259 case CONST:
8260 arm_print_value (f, XEXP (x, 0));
8261 return;
8263 case PLUS:
8264 arm_print_value (f, XEXP (x, 0));
8265 fprintf (f, "+");
8266 arm_print_value (f, XEXP (x, 1));
8267 return;
8269 case PC:
8270 fprintf (f, "pc");
8271 return;
8273 default:
8274 fprintf (f, "????");
8275 return;
8279 /* Routines for manipulation of the constant pool. */
8281 /* Arm instructions cannot load a large constant directly into a
8282 register; they have to come from a pc relative load. The constant
8283 must therefore be placed in the addressable range of the pc
8284 relative load. Depending on the precise pc relative load
8285 instruction the range is somewhere between 256 bytes and 4k. This
8286 means that we often have to dump a constant inside a function, and
8287 generate code to branch around it.
8289 It is important to minimize this, since the branches will slow
8290 things down and make the code larger.
8292 Normally we can hide the table after an existing unconditional
8293 branch so that there is no interruption of the flow, but in the
8294 worst case the code looks like this:
8296 ldr rn, L1
8298 b L2
8299 align
8300 L1: .long value
8304 ldr rn, L3
8306 b L4
8307 align
8308 L3: .long value
8312 We fix this by performing a scan after scheduling, which notices
8313 which instructions need to have their operands fetched from the
8314 constant table and builds the table.
8316 The algorithm starts by building a table of all the constants that
8317 need fixing up and all the natural barriers in the function (places
8318 where a constant table can be dropped without breaking the flow).
8319 For each fixup we note how far the pc-relative replacement will be
8320 able to reach and the offset of the instruction into the function.
8322 Having built the table we then group the fixes together to form
8323 tables that are as large as possible (subject to addressing
8324 constraints) and emit each table of constants after the last
8325 barrier that is within range of all the instructions in the group.
8326 If a group does not contain a barrier, then we forcibly create one
8327 by inserting a jump instruction into the flow. Once the table has
8328 been inserted, the insns are then modified to reference the
8329 relevant entry in the pool.
8331 Possible enhancements to the algorithm (not implemented) are:
8333 1) For some processors and object formats, there may be benefit in
8334 aligning the pools to the start of cache lines; this alignment
8335 would need to be taken into account when calculating addressability
8336 of a pool. */
8338 /* These typedefs are located at the start of this file, so that
8339 they can be used in the prototypes there. This comment is to
8340 remind readers of that fact so that the following structures
8341 can be understood more easily.
8343 typedef struct minipool_node Mnode;
8344 typedef struct minipool_fixup Mfix; */
8346 struct minipool_node
8348 /* Doubly linked chain of entries. */
8349 Mnode * next;
8350 Mnode * prev;
8351 /* The maximum offset into the code that this entry can be placed. While
8352 pushing fixes for forward references, all entries are sorted in order
8353 of increasing max_address. */
8354 HOST_WIDE_INT max_address;
8355 /* Similarly for an entry inserted for a backwards ref. */
8356 HOST_WIDE_INT min_address;
8357 /* The number of fixes referencing this entry. This can become zero
8358 if we "unpush" an entry. In this case we ignore the entry when we
8359 come to emit the code. */
8360 int refcount;
8361 /* The offset from the start of the minipool. */
8362 HOST_WIDE_INT offset;
8363 /* The value in table. */
8364 rtx value;
8365 /* The mode of value. */
8366 enum machine_mode mode;
8367 /* The size of the value. With iWMMXt enabled
8368 sizes > 4 also imply an alignment of 8-bytes. */
8369 int fix_size;
8372 struct minipool_fixup
8374 Mfix * next;
8375 rtx insn;
8376 HOST_WIDE_INT address;
8377 rtx * loc;
8378 enum machine_mode mode;
8379 int fix_size;
8380 rtx value;
8381 Mnode * minipool;
8382 HOST_WIDE_INT forwards;
8383 HOST_WIDE_INT backwards;
8386 /* Fixes less than a word need padding out to a word boundary. */
8387 #define MINIPOOL_FIX_SIZE(mode) \
8388 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8390 static Mnode * minipool_vector_head;
8391 static Mnode * minipool_vector_tail;
8392 static rtx minipool_vector_label;
8393 static int minipool_pad;
8395 /* The linked list of all minipool fixes required for this function. */
8396 Mfix * minipool_fix_head;
8397 Mfix * minipool_fix_tail;
8398 /* The fix entry for the current minipool, once it has been placed. */
8399 Mfix * minipool_barrier;
8401 /* Determines if INSN is the start of a jump table. Returns the end
8402 of the TABLE or NULL_RTX. */
8403 static rtx
8404 is_jump_table (rtx insn)
8406 rtx table;
8408 if (GET_CODE (insn) == JUMP_INSN
8409 && JUMP_LABEL (insn) != NULL
8410 && ((table = next_real_insn (JUMP_LABEL (insn)))
8411 == next_real_insn (insn))
8412 && table != NULL
8413 && GET_CODE (table) == JUMP_INSN
8414 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8415 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8416 return table;
8418 return NULL_RTX;
8421 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8422 #define JUMP_TABLES_IN_TEXT_SECTION 0
8423 #endif
8425 static HOST_WIDE_INT
8426 get_jump_table_size (rtx insn)
8428 /* ADDR_VECs only take room if read-only data does into the text
8429 section. */
8430 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8432 rtx body = PATTERN (insn);
8433 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8434 HOST_WIDE_INT size;
8435 HOST_WIDE_INT modesize;
8437 modesize = GET_MODE_SIZE (GET_MODE (body));
8438 size = modesize * XVECLEN (body, elt);
8439 switch (modesize)
8441 case 1:
8442 /* Round up size of TBB table to a halfword boundary. */
8443 size = (size + 1) & ~(HOST_WIDE_INT)1;
8444 break;
8445 case 2:
8446 /* No padding necessary for TBH. */
8447 break;
8448 case 4:
8449 /* Add two bytes for alignment on Thumb. */
8450 if (TARGET_THUMB)
8451 size += 2;
8452 break;
8453 default:
8454 gcc_unreachable ();
8456 return size;
8459 return 0;
8462 /* Move a minipool fix MP from its current location to before MAX_MP.
8463 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8464 constraints may need updating. */
8465 static Mnode *
8466 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
8467 HOST_WIDE_INT max_address)
8469 /* The code below assumes these are different. */
8470 gcc_assert (mp != max_mp);
8472 if (max_mp == NULL)
8474 if (max_address < mp->max_address)
8475 mp->max_address = max_address;
8477 else
8479 if (max_address > max_mp->max_address - mp->fix_size)
8480 mp->max_address = max_mp->max_address - mp->fix_size;
8481 else
8482 mp->max_address = max_address;
8484 /* Unlink MP from its current position. Since max_mp is non-null,
8485 mp->prev must be non-null. */
8486 mp->prev->next = mp->next;
8487 if (mp->next != NULL)
8488 mp->next->prev = mp->prev;
8489 else
8490 minipool_vector_tail = mp->prev;
8492 /* Re-insert it before MAX_MP. */
8493 mp->next = max_mp;
8494 mp->prev = max_mp->prev;
8495 max_mp->prev = mp;
8497 if (mp->prev != NULL)
8498 mp->prev->next = mp;
8499 else
8500 minipool_vector_head = mp;
8503 /* Save the new entry. */
8504 max_mp = mp;
8506 /* Scan over the preceding entries and adjust their addresses as
8507 required. */
8508 while (mp->prev != NULL
8509 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8511 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8512 mp = mp->prev;
8515 return max_mp;
8518 /* Add a constant to the minipool for a forward reference. Returns the
8519 node added or NULL if the constant will not fit in this pool. */
8520 static Mnode *
8521 add_minipool_forward_ref (Mfix *fix)
8523 /* If set, max_mp is the first pool_entry that has a lower
8524 constraint than the one we are trying to add. */
8525 Mnode * max_mp = NULL;
8526 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
8527 Mnode * mp;
8529 /* If the minipool starts before the end of FIX->INSN then this FIX
8530 can not be placed into the current pool. Furthermore, adding the
8531 new constant pool entry may cause the pool to start FIX_SIZE bytes
8532 earlier. */
8533 if (minipool_vector_head &&
8534 (fix->address + get_attr_length (fix->insn)
8535 >= minipool_vector_head->max_address - fix->fix_size))
8536 return NULL;
8538 /* Scan the pool to see if a constant with the same value has
8539 already been added. While we are doing this, also note the
8540 location where we must insert the constant if it doesn't already
8541 exist. */
8542 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8544 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8545 && fix->mode == mp->mode
8546 && (GET_CODE (fix->value) != CODE_LABEL
8547 || (CODE_LABEL_NUMBER (fix->value)
8548 == CODE_LABEL_NUMBER (mp->value)))
8549 && rtx_equal_p (fix->value, mp->value))
8551 /* More than one fix references this entry. */
8552 mp->refcount++;
8553 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
8556 /* Note the insertion point if necessary. */
8557 if (max_mp == NULL
8558 && mp->max_address > max_address)
8559 max_mp = mp;
8561 /* If we are inserting an 8-bytes aligned quantity and
8562 we have not already found an insertion point, then
8563 make sure that all such 8-byte aligned quantities are
8564 placed at the start of the pool. */
8565 if (ARM_DOUBLEWORD_ALIGN
8566 && max_mp == NULL
8567 && fix->fix_size >= 8
8568 && mp->fix_size < 8)
8570 max_mp = mp;
8571 max_address = mp->max_address;
8575 /* The value is not currently in the minipool, so we need to create
8576 a new entry for it. If MAX_MP is NULL, the entry will be put on
8577 the end of the list since the placement is less constrained than
8578 any existing entry. Otherwise, we insert the new fix before
8579 MAX_MP and, if necessary, adjust the constraints on the other
8580 entries. */
8581 mp = XNEW (Mnode);
8582 mp->fix_size = fix->fix_size;
8583 mp->mode = fix->mode;
8584 mp->value = fix->value;
8585 mp->refcount = 1;
8586 /* Not yet required for a backwards ref. */
8587 mp->min_address = -65536;
8589 if (max_mp == NULL)
8591 mp->max_address = max_address;
8592 mp->next = NULL;
8593 mp->prev = minipool_vector_tail;
8595 if (mp->prev == NULL)
8597 minipool_vector_head = mp;
8598 minipool_vector_label = gen_label_rtx ();
8600 else
8601 mp->prev->next = mp;
8603 minipool_vector_tail = mp;
8605 else
8607 if (max_address > max_mp->max_address - mp->fix_size)
8608 mp->max_address = max_mp->max_address - mp->fix_size;
8609 else
8610 mp->max_address = max_address;
8612 mp->next = max_mp;
8613 mp->prev = max_mp->prev;
8614 max_mp->prev = mp;
8615 if (mp->prev != NULL)
8616 mp->prev->next = mp;
8617 else
8618 minipool_vector_head = mp;
8621 /* Save the new entry. */
8622 max_mp = mp;
8624 /* Scan over the preceding entries and adjust their addresses as
8625 required. */
8626 while (mp->prev != NULL
8627 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8629 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8630 mp = mp->prev;
8633 return max_mp;
8636 static Mnode *
8637 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
8638 HOST_WIDE_INT min_address)
8640 HOST_WIDE_INT offset;
8642 /* The code below assumes these are different. */
8643 gcc_assert (mp != min_mp);
8645 if (min_mp == NULL)
8647 if (min_address > mp->min_address)
8648 mp->min_address = min_address;
8650 else
8652 /* We will adjust this below if it is too loose. */
8653 mp->min_address = min_address;
8655 /* Unlink MP from its current position. Since min_mp is non-null,
8656 mp->next must be non-null. */
8657 mp->next->prev = mp->prev;
8658 if (mp->prev != NULL)
8659 mp->prev->next = mp->next;
8660 else
8661 minipool_vector_head = mp->next;
8663 /* Reinsert it after MIN_MP. */
8664 mp->prev = min_mp;
8665 mp->next = min_mp->next;
8666 min_mp->next = mp;
8667 if (mp->next != NULL)
8668 mp->next->prev = mp;
8669 else
8670 minipool_vector_tail = mp;
8673 min_mp = mp;
8675 offset = 0;
8676 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8678 mp->offset = offset;
8679 if (mp->refcount > 0)
8680 offset += mp->fix_size;
8682 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
8683 mp->next->min_address = mp->min_address + mp->fix_size;
8686 return min_mp;
8689 /* Add a constant to the minipool for a backward reference. Returns the
8690 node added or NULL if the constant will not fit in this pool.
8692 Note that the code for insertion for a backwards reference can be
8693 somewhat confusing because the calculated offsets for each fix do
8694 not take into account the size of the pool (which is still under
8695 construction. */
8696 static Mnode *
8697 add_minipool_backward_ref (Mfix *fix)
8699 /* If set, min_mp is the last pool_entry that has a lower constraint
8700 than the one we are trying to add. */
8701 Mnode *min_mp = NULL;
8702 /* This can be negative, since it is only a constraint. */
8703 HOST_WIDE_INT min_address = fix->address - fix->backwards;
8704 Mnode *mp;
8706 /* If we can't reach the current pool from this insn, or if we can't
8707 insert this entry at the end of the pool without pushing other
8708 fixes out of range, then we don't try. This ensures that we
8709 can't fail later on. */
8710 if (min_address >= minipool_barrier->address
8711 || (minipool_vector_tail->min_address + fix->fix_size
8712 >= minipool_barrier->address))
8713 return NULL;
8715 /* Scan the pool to see if a constant with the same value has
8716 already been added. While we are doing this, also note the
8717 location where we must insert the constant if it doesn't already
8718 exist. */
8719 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
8721 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8722 && fix->mode == mp->mode
8723 && (GET_CODE (fix->value) != CODE_LABEL
8724 || (CODE_LABEL_NUMBER (fix->value)
8725 == CODE_LABEL_NUMBER (mp->value)))
8726 && rtx_equal_p (fix->value, mp->value)
8727 /* Check that there is enough slack to move this entry to the
8728 end of the table (this is conservative). */
8729 && (mp->max_address
8730 > (minipool_barrier->address
8731 + minipool_vector_tail->offset
8732 + minipool_vector_tail->fix_size)))
8734 mp->refcount++;
8735 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
8738 if (min_mp != NULL)
8739 mp->min_address += fix->fix_size;
8740 else
8742 /* Note the insertion point if necessary. */
8743 if (mp->min_address < min_address)
8745 /* For now, we do not allow the insertion of 8-byte alignment
8746 requiring nodes anywhere but at the start of the pool. */
8747 if (ARM_DOUBLEWORD_ALIGN
8748 && fix->fix_size >= 8 && mp->fix_size < 8)
8749 return NULL;
8750 else
8751 min_mp = mp;
8753 else if (mp->max_address
8754 < minipool_barrier->address + mp->offset + fix->fix_size)
8756 /* Inserting before this entry would push the fix beyond
8757 its maximum address (which can happen if we have
8758 re-located a forwards fix); force the new fix to come
8759 after it. */
8760 min_mp = mp;
8761 min_address = mp->min_address + fix->fix_size;
8763 /* If we are inserting an 8-bytes aligned quantity and
8764 we have not already found an insertion point, then
8765 make sure that all such 8-byte aligned quantities are
8766 placed at the start of the pool. */
8767 else if (ARM_DOUBLEWORD_ALIGN
8768 && min_mp == NULL
8769 && fix->fix_size >= 8
8770 && mp->fix_size < 8)
8772 min_mp = mp;
8773 min_address = mp->min_address + fix->fix_size;
8778 /* We need to create a new entry. */
8779 mp = XNEW (Mnode);
8780 mp->fix_size = fix->fix_size;
8781 mp->mode = fix->mode;
8782 mp->value = fix->value;
8783 mp->refcount = 1;
8784 mp->max_address = minipool_barrier->address + 65536;
8786 mp->min_address = min_address;
8788 if (min_mp == NULL)
8790 mp->prev = NULL;
8791 mp->next = minipool_vector_head;
8793 if (mp->next == NULL)
8795 minipool_vector_tail = mp;
8796 minipool_vector_label = gen_label_rtx ();
8798 else
8799 mp->next->prev = mp;
8801 minipool_vector_head = mp;
8803 else
8805 mp->next = min_mp->next;
8806 mp->prev = min_mp;
8807 min_mp->next = mp;
8809 if (mp->next != NULL)
8810 mp->next->prev = mp;
8811 else
8812 minipool_vector_tail = mp;
8815 /* Save the new entry. */
8816 min_mp = mp;
8818 if (mp->prev)
8819 mp = mp->prev;
8820 else
8821 mp->offset = 0;
8823 /* Scan over the following entries and adjust their offsets. */
8824 while (mp->next != NULL)
8826 if (mp->next->min_address < mp->min_address + mp->fix_size)
8827 mp->next->min_address = mp->min_address + mp->fix_size;
8829 if (mp->refcount)
8830 mp->next->offset = mp->offset + mp->fix_size;
8831 else
8832 mp->next->offset = mp->offset;
8834 mp = mp->next;
8837 return min_mp;
8840 static void
8841 assign_minipool_offsets (Mfix *barrier)
8843 HOST_WIDE_INT offset = 0;
8844 Mnode *mp;
8846 minipool_barrier = barrier;
8848 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8850 mp->offset = offset;
8852 if (mp->refcount > 0)
8853 offset += mp->fix_size;
8857 /* Output the literal table */
8858 static void
8859 dump_minipool (rtx scan)
8861 Mnode * mp;
8862 Mnode * nmp;
8863 int align64 = 0;
8865 if (ARM_DOUBLEWORD_ALIGN)
8866 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8867 if (mp->refcount > 0 && mp->fix_size >= 8)
8869 align64 = 1;
8870 break;
8873 if (dump_file)
8874 fprintf (dump_file,
8875 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8876 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
8878 scan = emit_label_after (gen_label_rtx (), scan);
8879 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
8880 scan = emit_label_after (minipool_vector_label, scan);
8882 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
8884 if (mp->refcount > 0)
8886 if (dump_file)
8888 fprintf (dump_file,
8889 ";; Offset %u, min %ld, max %ld ",
8890 (unsigned) mp->offset, (unsigned long) mp->min_address,
8891 (unsigned long) mp->max_address);
8892 arm_print_value (dump_file, mp->value);
8893 fputc ('\n', dump_file);
8896 switch (mp->fix_size)
8898 #ifdef HAVE_consttable_1
8899 case 1:
8900 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
8901 break;
8903 #endif
8904 #ifdef HAVE_consttable_2
8905 case 2:
8906 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
8907 break;
8909 #endif
8910 #ifdef HAVE_consttable_4
8911 case 4:
8912 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
8913 break;
8915 #endif
8916 #ifdef HAVE_consttable_8
8917 case 8:
8918 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
8919 break;
8921 #endif
8922 #ifdef HAVE_consttable_16
8923 case 16:
8924 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
8925 break;
8927 #endif
8928 default:
8929 gcc_unreachable ();
8933 nmp = mp->next;
8934 free (mp);
8937 minipool_vector_head = minipool_vector_tail = NULL;
8938 scan = emit_insn_after (gen_consttable_end (), scan);
8939 scan = emit_barrier_after (scan);
8942 /* Return the cost of forcibly inserting a barrier after INSN. */
8943 static int
8944 arm_barrier_cost (rtx insn)
8946 /* Basing the location of the pool on the loop depth is preferable,
8947 but at the moment, the basic block information seems to be
8948 corrupt by this stage of the compilation. */
8949 int base_cost = 50;
8950 rtx next = next_nonnote_insn (insn);
8952 if (next != NULL && GET_CODE (next) == CODE_LABEL)
8953 base_cost -= 20;
8955 switch (GET_CODE (insn))
8957 case CODE_LABEL:
8958 /* It will always be better to place the table before the label, rather
8959 than after it. */
8960 return 50;
8962 case INSN:
8963 case CALL_INSN:
8964 return base_cost;
8966 case JUMP_INSN:
8967 return base_cost - 10;
8969 default:
8970 return base_cost + 10;
8974 /* Find the best place in the insn stream in the range
8975 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
8976 Create the barrier by inserting a jump and add a new fix entry for
8977 it. */
8978 static Mfix *
8979 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
8981 HOST_WIDE_INT count = 0;
8982 rtx barrier;
8983 rtx from = fix->insn;
8984 /* The instruction after which we will insert the jump. */
8985 rtx selected = NULL;
8986 int selected_cost;
8987 /* The address at which the jump instruction will be placed. */
8988 HOST_WIDE_INT selected_address;
8989 Mfix * new_fix;
8990 HOST_WIDE_INT max_count = max_address - fix->address;
8991 rtx label = gen_label_rtx ();
8993 selected_cost = arm_barrier_cost (from);
8994 selected_address = fix->address;
8996 while (from && count < max_count)
8998 rtx tmp;
8999 int new_cost;
9001 /* This code shouldn't have been called if there was a natural barrier
9002 within range. */
9003 gcc_assert (GET_CODE (from) != BARRIER);
9005 /* Count the length of this insn. */
9006 count += get_attr_length (from);
9008 /* If there is a jump table, add its length. */
9009 tmp = is_jump_table (from);
9010 if (tmp != NULL)
9012 count += get_jump_table_size (tmp);
9014 /* Jump tables aren't in a basic block, so base the cost on
9015 the dispatch insn. If we select this location, we will
9016 still put the pool after the table. */
9017 new_cost = arm_barrier_cost (from);
9019 if (count < max_count
9020 && (!selected || new_cost <= selected_cost))
9022 selected = tmp;
9023 selected_cost = new_cost;
9024 selected_address = fix->address + count;
9027 /* Continue after the dispatch table. */
9028 from = NEXT_INSN (tmp);
9029 continue;
9032 new_cost = arm_barrier_cost (from);
9034 if (count < max_count
9035 && (!selected || new_cost <= selected_cost))
9037 selected = from;
9038 selected_cost = new_cost;
9039 selected_address = fix->address + count;
9042 from = NEXT_INSN (from);
9045 /* Make sure that we found a place to insert the jump. */
9046 gcc_assert (selected);
9048 /* Create a new JUMP_INSN that branches around a barrier. */
9049 from = emit_jump_insn_after (gen_jump (label), selected);
9050 JUMP_LABEL (from) = label;
9051 barrier = emit_barrier_after (from);
9052 emit_label_after (label, barrier);
9054 /* Create a minipool barrier entry for the new barrier. */
9055 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9056 new_fix->insn = barrier;
9057 new_fix->address = selected_address;
9058 new_fix->next = fix->next;
9059 fix->next = new_fix;
9061 return new_fix;
9064 /* Record that there is a natural barrier in the insn stream at
9065 ADDRESS. */
9066 static void
9067 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9069 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9071 fix->insn = insn;
9072 fix->address = address;
9074 fix->next = NULL;
9075 if (minipool_fix_head != NULL)
9076 minipool_fix_tail->next = fix;
9077 else
9078 minipool_fix_head = fix;
9080 minipool_fix_tail = fix;
9083 /* Record INSN, which will need fixing up to load a value from the
9084 minipool. ADDRESS is the offset of the insn since the start of the
9085 function; LOC is a pointer to the part of the insn which requires
9086 fixing; VALUE is the constant that must be loaded, which is of type
9087 MODE. */
9088 static void
9089 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9090 enum machine_mode mode, rtx value)
9092 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9094 fix->insn = insn;
9095 fix->address = address;
9096 fix->loc = loc;
9097 fix->mode = mode;
9098 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9099 fix->value = value;
9100 fix->forwards = get_attr_pool_range (insn);
9101 fix->backwards = get_attr_neg_pool_range (insn);
9102 fix->minipool = NULL;
9104 /* If an insn doesn't have a range defined for it, then it isn't
9105 expecting to be reworked by this code. Better to stop now than
9106 to generate duff assembly code. */
9107 gcc_assert (fix->forwards || fix->backwards);
9109 /* If an entry requires 8-byte alignment then assume all constant pools
9110 require 4 bytes of padding. Trying to do this later on a per-pool
9111 basis is awkward because existing pool entries have to be modified. */
9112 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9113 minipool_pad = 4;
9115 if (dump_file)
9117 fprintf (dump_file,
9118 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9119 GET_MODE_NAME (mode),
9120 INSN_UID (insn), (unsigned long) address,
9121 -1 * (long)fix->backwards, (long)fix->forwards);
9122 arm_print_value (dump_file, fix->value);
9123 fprintf (dump_file, "\n");
9126 /* Add it to the chain of fixes. */
9127 fix->next = NULL;
9129 if (minipool_fix_head != NULL)
9130 minipool_fix_tail->next = fix;
9131 else
9132 minipool_fix_head = fix;
9134 minipool_fix_tail = fix;
9137 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9138 Returns the number of insns needed, or 99 if we don't know how to
9139 do it. */
9141 arm_const_double_inline_cost (rtx val)
9143 rtx lowpart, highpart;
9144 enum machine_mode mode;
9146 mode = GET_MODE (val);
9148 if (mode == VOIDmode)
9149 mode = DImode;
9151 gcc_assert (GET_MODE_SIZE (mode) == 8);
9153 lowpart = gen_lowpart (SImode, val);
9154 highpart = gen_highpart_mode (SImode, mode, val);
9156 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9157 gcc_assert (GET_CODE (highpart) == CONST_INT);
9159 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9160 NULL_RTX, NULL_RTX, 0, 0)
9161 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9162 NULL_RTX, NULL_RTX, 0, 0));
9165 /* Return true if it is worthwhile to split a 64-bit constant into two
9166 32-bit operations. This is the case if optimizing for size, or
9167 if we have load delay slots, or if one 32-bit part can be done with
9168 a single data operation. */
9169 bool
9170 arm_const_double_by_parts (rtx val)
9172 enum machine_mode mode = GET_MODE (val);
9173 rtx part;
9175 if (optimize_size || arm_ld_sched)
9176 return true;
9178 if (mode == VOIDmode)
9179 mode = DImode;
9181 part = gen_highpart_mode (SImode, mode, val);
9183 gcc_assert (GET_CODE (part) == CONST_INT);
9185 if (const_ok_for_arm (INTVAL (part))
9186 || const_ok_for_arm (~INTVAL (part)))
9187 return true;
9189 part = gen_lowpart (SImode, val);
9191 gcc_assert (GET_CODE (part) == CONST_INT);
9193 if (const_ok_for_arm (INTVAL (part))
9194 || const_ok_for_arm (~INTVAL (part)))
9195 return true;
9197 return false;
9200 /* Scan INSN and note any of its operands that need fixing.
9201 If DO_PUSHES is false we do not actually push any of the fixups
9202 needed. The function returns TRUE if any fixups were needed/pushed.
9203 This is used by arm_memory_load_p() which needs to know about loads
9204 of constants that will be converted into minipool loads. */
9205 static bool
9206 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9208 bool result = false;
9209 int opno;
9211 extract_insn (insn);
9213 if (!constrain_operands (1))
9214 fatal_insn_not_found (insn);
9216 if (recog_data.n_alternatives == 0)
9217 return false;
9219 /* Fill in recog_op_alt with information about the constraints of
9220 this insn. */
9221 preprocess_constraints ();
9223 for (opno = 0; opno < recog_data.n_operands; opno++)
9225 /* Things we need to fix can only occur in inputs. */
9226 if (recog_data.operand_type[opno] != OP_IN)
9227 continue;
9229 /* If this alternative is a memory reference, then any mention
9230 of constants in this alternative is really to fool reload
9231 into allowing us to accept one there. We need to fix them up
9232 now so that we output the right code. */
9233 if (recog_op_alt[opno][which_alternative].memory_ok)
9235 rtx op = recog_data.operand[opno];
9237 if (CONSTANT_P (op))
9239 if (do_pushes)
9240 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9241 recog_data.operand_mode[opno], op);
9242 result = true;
9244 else if (GET_CODE (op) == MEM
9245 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9246 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9248 if (do_pushes)
9250 rtx cop = avoid_constant_pool_reference (op);
9252 /* Casting the address of something to a mode narrower
9253 than a word can cause avoid_constant_pool_reference()
9254 to return the pool reference itself. That's no good to
9255 us here. Lets just hope that we can use the
9256 constant pool value directly. */
9257 if (op == cop)
9258 cop = get_pool_constant (XEXP (op, 0));
9260 push_minipool_fix (insn, address,
9261 recog_data.operand_loc[opno],
9262 recog_data.operand_mode[opno], cop);
9265 result = true;
9270 return result;
9273 /* Gcc puts the pool in the wrong place for ARM, since we can only
9274 load addresses a limited distance around the pc. We do some
9275 special munging to move the constant pool values to the correct
9276 point in the code. */
9277 static void
9278 arm_reorg (void)
9280 rtx insn;
9281 HOST_WIDE_INT address = 0;
9282 Mfix * fix;
9284 minipool_fix_head = minipool_fix_tail = NULL;
9286 /* The first insn must always be a note, or the code below won't
9287 scan it properly. */
9288 insn = get_insns ();
9289 gcc_assert (GET_CODE (insn) == NOTE);
9290 minipool_pad = 0;
9292 /* Scan all the insns and record the operands that will need fixing. */
9293 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9295 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9296 && (arm_cirrus_insn_p (insn)
9297 || GET_CODE (insn) == JUMP_INSN
9298 || arm_memory_load_p (insn)))
9299 cirrus_reorg (insn);
9301 if (GET_CODE (insn) == BARRIER)
9302 push_minipool_barrier (insn, address);
9303 else if (INSN_P (insn))
9305 rtx table;
9307 note_invalid_constants (insn, address, true);
9308 address += get_attr_length (insn);
9310 /* If the insn is a vector jump, add the size of the table
9311 and skip the table. */
9312 if ((table = is_jump_table (insn)) != NULL)
9314 address += get_jump_table_size (table);
9315 insn = table;
9320 fix = minipool_fix_head;
9322 /* Now scan the fixups and perform the required changes. */
9323 while (fix)
9325 Mfix * ftmp;
9326 Mfix * fdel;
9327 Mfix * last_added_fix;
9328 Mfix * last_barrier = NULL;
9329 Mfix * this_fix;
9331 /* Skip any further barriers before the next fix. */
9332 while (fix && GET_CODE (fix->insn) == BARRIER)
9333 fix = fix->next;
9335 /* No more fixes. */
9336 if (fix == NULL)
9337 break;
9339 last_added_fix = NULL;
9341 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9343 if (GET_CODE (ftmp->insn) == BARRIER)
9345 if (ftmp->address >= minipool_vector_head->max_address)
9346 break;
9348 last_barrier = ftmp;
9350 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9351 break;
9353 last_added_fix = ftmp; /* Keep track of the last fix added. */
9356 /* If we found a barrier, drop back to that; any fixes that we
9357 could have reached but come after the barrier will now go in
9358 the next mini-pool. */
9359 if (last_barrier != NULL)
9361 /* Reduce the refcount for those fixes that won't go into this
9362 pool after all. */
9363 for (fdel = last_barrier->next;
9364 fdel && fdel != ftmp;
9365 fdel = fdel->next)
9367 fdel->minipool->refcount--;
9368 fdel->minipool = NULL;
9371 ftmp = last_barrier;
9373 else
9375 /* ftmp is first fix that we can't fit into this pool and
9376 there no natural barriers that we could use. Insert a
9377 new barrier in the code somewhere between the previous
9378 fix and this one, and arrange to jump around it. */
9379 HOST_WIDE_INT max_address;
9381 /* The last item on the list of fixes must be a barrier, so
9382 we can never run off the end of the list of fixes without
9383 last_barrier being set. */
9384 gcc_assert (ftmp);
9386 max_address = minipool_vector_head->max_address;
9387 /* Check that there isn't another fix that is in range that
9388 we couldn't fit into this pool because the pool was
9389 already too large: we need to put the pool before such an
9390 instruction. The pool itself may come just after the
9391 fix because create_fix_barrier also allows space for a
9392 jump instruction. */
9393 if (ftmp->address < max_address)
9394 max_address = ftmp->address + 1;
9396 last_barrier = create_fix_barrier (last_added_fix, max_address);
9399 assign_minipool_offsets (last_barrier);
9401 while (ftmp)
9403 if (GET_CODE (ftmp->insn) != BARRIER
9404 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9405 == NULL))
9406 break;
9408 ftmp = ftmp->next;
9411 /* Scan over the fixes we have identified for this pool, fixing them
9412 up and adding the constants to the pool itself. */
9413 for (this_fix = fix; this_fix && ftmp != this_fix;
9414 this_fix = this_fix->next)
9415 if (GET_CODE (this_fix->insn) != BARRIER)
9417 rtx addr
9418 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9419 minipool_vector_label),
9420 this_fix->minipool->offset);
9421 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9424 dump_minipool (last_barrier->insn);
9425 fix = ftmp;
9428 /* From now on we must synthesize any constants that we can't handle
9429 directly. This can happen if the RTL gets split during final
9430 instruction generation. */
9431 after_arm_reorg = 1;
9433 /* Free the minipool memory. */
9434 obstack_free (&minipool_obstack, minipool_startobj);
9437 /* Routines to output assembly language. */
9439 /* If the rtx is the correct value then return the string of the number.
9440 In this way we can ensure that valid double constants are generated even
9441 when cross compiling. */
9442 const char *
9443 fp_immediate_constant (rtx x)
9445 REAL_VALUE_TYPE r;
9446 int i;
9448 if (!fp_consts_inited)
9449 init_fp_table ();
9451 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9452 for (i = 0; i < 8; i++)
9453 if (REAL_VALUES_EQUAL (r, values_fp[i]))
9454 return strings_fp[i];
9456 gcc_unreachable ();
9459 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9460 static const char *
9461 fp_const_from_val (REAL_VALUE_TYPE *r)
9463 int i;
9465 if (!fp_consts_inited)
9466 init_fp_table ();
9468 for (i = 0; i < 8; i++)
9469 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
9470 return strings_fp[i];
9472 gcc_unreachable ();
9475 /* Output the operands of a LDM/STM instruction to STREAM.
9476 MASK is the ARM register set mask of which only bits 0-15 are important.
9477 REG is the base register, either the frame pointer or the stack pointer,
9478 INSTR is the possibly suffixed load or store instruction.
9479 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9481 static void
9482 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
9483 unsigned long mask, int rfe)
9485 unsigned i;
9486 bool not_first = FALSE;
9488 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
9489 fputc ('\t', stream);
9490 asm_fprintf (stream, instr, reg);
9491 fputc ('{', stream);
9493 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9494 if (mask & (1 << i))
9496 if (not_first)
9497 fprintf (stream, ", ");
9499 asm_fprintf (stream, "%r", i);
9500 not_first = TRUE;
9503 if (rfe)
9504 fprintf (stream, "}^\n");
9505 else
9506 fprintf (stream, "}\n");
9510 /* Output a FLDMD instruction to STREAM.
9511 BASE if the register containing the address.
9512 REG and COUNT specify the register range.
9513 Extra registers may be added to avoid hardware bugs.
9515 We output FLDMD even for ARMv5 VFP implementations. Although
9516 FLDMD is technically not supported until ARMv6, it is believed
9517 that all VFP implementations support its use in this context. */
9519 static void
9520 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
9522 int i;
9524 /* Workaround ARM10 VFPr1 bug. */
9525 if (count == 2 && !arm_arch6)
9527 if (reg == 15)
9528 reg--;
9529 count++;
9532 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9533 load into multiple parts if we have to handle more than 16 registers. */
9534 if (count > 16)
9536 vfp_output_fldmd (stream, base, reg, 16);
9537 vfp_output_fldmd (stream, base, reg + 16, count - 16);
9538 return;
9541 fputc ('\t', stream);
9542 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
9544 for (i = reg; i < reg + count; i++)
9546 if (i > reg)
9547 fputs (", ", stream);
9548 asm_fprintf (stream, "d%d", i);
9550 fputs ("}\n", stream);
9555 /* Output the assembly for a store multiple. */
9557 const char *
9558 vfp_output_fstmd (rtx * operands)
9560 char pattern[100];
9561 int p;
9562 int base;
9563 int i;
9565 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
9566 p = strlen (pattern);
9568 gcc_assert (GET_CODE (operands[1]) == REG);
9570 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
9571 for (i = 1; i < XVECLEN (operands[2], 0); i++)
9573 p += sprintf (&pattern[p], ", d%d", base + i);
9575 strcpy (&pattern[p], "}");
9577 output_asm_insn (pattern, operands);
9578 return "";
9582 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9583 number of bytes pushed. */
9585 static int
9586 vfp_emit_fstmd (int base_reg, int count)
9588 rtx par;
9589 rtx dwarf;
9590 rtx tmp, reg;
9591 int i;
9593 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9594 register pairs are stored by a store multiple insn. We avoid this
9595 by pushing an extra pair. */
9596 if (count == 2 && !arm_arch6)
9598 if (base_reg == LAST_VFP_REGNUM - 3)
9599 base_reg -= 2;
9600 count++;
9603 /* FSTMD may not store more than 16 doubleword registers at once. Split
9604 larger stores into multiple parts (up to a maximum of two, in
9605 practice). */
9606 if (count > 16)
9608 int saved;
9609 /* NOTE: base_reg is an internal register number, so each D register
9610 counts as 2. */
9611 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
9612 saved += vfp_emit_fstmd (base_reg, 16);
9613 return saved;
9616 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9617 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9619 reg = gen_rtx_REG (DFmode, base_reg);
9620 base_reg += 2;
9622 XVECEXP (par, 0, 0)
9623 = gen_rtx_SET (VOIDmode,
9624 gen_frame_mem (BLKmode,
9625 gen_rtx_PRE_DEC (BLKmode,
9626 stack_pointer_rtx)),
9627 gen_rtx_UNSPEC (BLKmode,
9628 gen_rtvec (1, reg),
9629 UNSPEC_PUSH_MULT));
9631 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9632 plus_constant (stack_pointer_rtx, -(count * 8)));
9633 RTX_FRAME_RELATED_P (tmp) = 1;
9634 XVECEXP (dwarf, 0, 0) = tmp;
9636 tmp = gen_rtx_SET (VOIDmode,
9637 gen_frame_mem (DFmode, stack_pointer_rtx),
9638 reg);
9639 RTX_FRAME_RELATED_P (tmp) = 1;
9640 XVECEXP (dwarf, 0, 1) = tmp;
9642 for (i = 1; i < count; i++)
9644 reg = gen_rtx_REG (DFmode, base_reg);
9645 base_reg += 2;
9646 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9648 tmp = gen_rtx_SET (VOIDmode,
9649 gen_frame_mem (DFmode,
9650 plus_constant (stack_pointer_rtx,
9651 i * 8)),
9652 reg);
9653 RTX_FRAME_RELATED_P (tmp) = 1;
9654 XVECEXP (dwarf, 0, i + 1) = tmp;
9657 par = emit_insn (par);
9658 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9659 REG_NOTES (par));
9660 RTX_FRAME_RELATED_P (par) = 1;
9662 return count * 8;
9665 /* Emit a call instruction with pattern PAT. ADDR is the address of
9666 the call target. */
9668 void
9669 arm_emit_call_insn (rtx pat, rtx addr)
9671 rtx insn;
9673 insn = emit_call_insn (pat);
9675 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9676 If the call might use such an entry, add a use of the PIC register
9677 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9678 if (TARGET_VXWORKS_RTP
9679 && flag_pic
9680 && GET_CODE (addr) == SYMBOL_REF
9681 && (SYMBOL_REF_DECL (addr)
9682 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
9683 : !SYMBOL_REF_LOCAL_P (addr)))
9685 require_pic_register ();
9686 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
9690 /* Output a 'call' insn. */
9691 const char *
9692 output_call (rtx *operands)
9694 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
9696 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9697 if (REGNO (operands[0]) == LR_REGNUM)
9699 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
9700 output_asm_insn ("mov%?\t%0, %|lr", operands);
9703 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9705 if (TARGET_INTERWORK || arm_arch4t)
9706 output_asm_insn ("bx%?\t%0", operands);
9707 else
9708 output_asm_insn ("mov%?\t%|pc, %0", operands);
9710 return "";
9713 /* Output a 'call' insn that is a reference in memory. */
9714 const char *
9715 output_call_mem (rtx *operands)
9717 if (TARGET_INTERWORK && !arm_arch5)
9719 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9720 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9721 output_asm_insn ("bx%?\t%|ip", operands);
9723 else if (regno_use_in (LR_REGNUM, operands[0]))
9725 /* LR is used in the memory address. We load the address in the
9726 first instruction. It's safe to use IP as the target of the
9727 load since the call will kill it anyway. */
9728 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9729 if (arm_arch5)
9730 output_asm_insn ("blx%?\t%|ip", operands);
9731 else
9733 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9734 if (arm_arch4t)
9735 output_asm_insn ("bx%?\t%|ip", operands);
9736 else
9737 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
9740 else
9742 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9743 output_asm_insn ("ldr%?\t%|pc, %0", operands);
9746 return "";
9750 /* Output a move from arm registers to an fpa registers.
9751 OPERANDS[0] is an fpa register.
9752 OPERANDS[1] is the first registers of an arm register pair. */
9753 const char *
9754 output_mov_long_double_fpa_from_arm (rtx *operands)
9756 int arm_reg0 = REGNO (operands[1]);
9757 rtx ops[3];
9759 gcc_assert (arm_reg0 != IP_REGNUM);
9761 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9762 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9763 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9765 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9766 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
9768 return "";
9771 /* Output a move from an fpa register to arm registers.
9772 OPERANDS[0] is the first registers of an arm register pair.
9773 OPERANDS[1] is an fpa register. */
9774 const char *
9775 output_mov_long_double_arm_from_fpa (rtx *operands)
9777 int arm_reg0 = REGNO (operands[0]);
9778 rtx ops[3];
9780 gcc_assert (arm_reg0 != IP_REGNUM);
9782 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9783 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9784 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9786 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
9787 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9788 return "";
9791 /* Output a move from arm registers to arm registers of a long double
9792 OPERANDS[0] is the destination.
9793 OPERANDS[1] is the source. */
9794 const char *
9795 output_mov_long_double_arm_from_arm (rtx *operands)
9797 /* We have to be careful here because the two might overlap. */
9798 int dest_start = REGNO (operands[0]);
9799 int src_start = REGNO (operands[1]);
9800 rtx ops[2];
9801 int i;
9803 if (dest_start < src_start)
9805 for (i = 0; i < 3; i++)
9807 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9808 ops[1] = gen_rtx_REG (SImode, src_start + i);
9809 output_asm_insn ("mov%?\t%0, %1", ops);
9812 else
9814 for (i = 2; i >= 0; i--)
9816 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9817 ops[1] = gen_rtx_REG (SImode, src_start + i);
9818 output_asm_insn ("mov%?\t%0, %1", ops);
9822 return "";
9826 /* Output a move from arm registers to an fpa registers.
9827 OPERANDS[0] is an fpa register.
9828 OPERANDS[1] is the first registers of an arm register pair. */
9829 const char *
9830 output_mov_double_fpa_from_arm (rtx *operands)
9832 int arm_reg0 = REGNO (operands[1]);
9833 rtx ops[2];
9835 gcc_assert (arm_reg0 != IP_REGNUM);
9837 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9838 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9839 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
9840 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
9841 return "";
9844 /* Output a move from an fpa register to arm registers.
9845 OPERANDS[0] is the first registers of an arm register pair.
9846 OPERANDS[1] is an fpa register. */
9847 const char *
9848 output_mov_double_arm_from_fpa (rtx *operands)
9850 int arm_reg0 = REGNO (operands[0]);
9851 rtx ops[2];
9853 gcc_assert (arm_reg0 != IP_REGNUM);
9855 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9856 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9857 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
9858 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
9859 return "";
9862 /* Output a move between double words.
9863 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9864 or MEM<-REG and all MEMs must be offsettable addresses. */
9865 const char *
9866 output_move_double (rtx *operands)
9868 enum rtx_code code0 = GET_CODE (operands[0]);
9869 enum rtx_code code1 = GET_CODE (operands[1]);
9870 rtx otherops[3];
9872 if (code0 == REG)
9874 int reg0 = REGNO (operands[0]);
9876 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9878 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
9880 switch (GET_CODE (XEXP (operands[1], 0)))
9882 case REG:
9883 if (TARGET_LDRD)
9884 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
9885 else
9886 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
9887 break;
9889 case PRE_INC:
9890 gcc_assert (TARGET_LDRD);
9891 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
9892 break;
9894 case PRE_DEC:
9895 if (TARGET_LDRD)
9896 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
9897 else
9898 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
9899 break;
9901 case POST_INC:
9902 if (TARGET_LDRD)
9903 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
9904 else
9905 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
9906 break;
9908 case POST_DEC:
9909 gcc_assert (TARGET_LDRD);
9910 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
9911 break;
9913 case PRE_MODIFY:
9914 case POST_MODIFY:
9915 otherops[0] = operands[0];
9916 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
9917 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
9919 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
9921 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9923 /* Registers overlap so split out the increment. */
9924 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9925 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
9927 else
9929 /* IWMMXT allows offsets larger than ldrd can handle,
9930 fix these up with a pair of ldr. */
9931 if (GET_CODE (otherops[2]) == CONST_INT
9932 && (INTVAL(otherops[2]) <= -256
9933 || INTVAL(otherops[2]) >= 256))
9935 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
9936 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9937 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9939 else
9940 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
9943 else
9945 /* IWMMXT allows offsets larger than ldrd can handle,
9946 fix these up with a pair of ldr. */
9947 if (GET_CODE (otherops[2]) == CONST_INT
9948 && (INTVAL(otherops[2]) <= -256
9949 || INTVAL(otherops[2]) >= 256))
9951 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9952 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9953 otherops[0] = operands[0];
9954 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
9956 else
9957 /* We only allow constant increments, so this is safe. */
9958 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
9960 break;
9962 case LABEL_REF:
9963 case CONST:
9964 /* We might be able to use ldrd %0, %1 here. However the range is
9965 different to ldr/adr, and it is broken on some ARMv7-M
9966 implementations. */
9967 output_asm_insn ("adr%?\t%0, %1", operands);
9968 if (TARGET_LDRD)
9969 output_asm_insn ("ldr%(d%)\t%0, [%0]", operands);
9970 else
9971 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
9972 break;
9974 /* ??? This needs checking for thumb2. */
9975 default:
9976 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
9977 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
9979 otherops[0] = operands[0];
9980 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
9981 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
9983 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
9985 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
9987 switch ((int) INTVAL (otherops[2]))
9989 case -8:
9990 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
9991 return "";
9992 case -4:
9993 if (TARGET_THUMB2)
9994 break;
9995 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
9996 return "";
9997 case 4:
9998 if (TARGET_THUMB2)
9999 break;
10000 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
10001 return "";
10004 if (TARGET_LDRD
10005 && (GET_CODE (otherops[2]) == REG
10006 || (GET_CODE (otherops[2]) == CONST_INT
10007 && INTVAL (otherops[2]) > -256
10008 && INTVAL (otherops[2]) < 256)))
10010 if (reg_overlap_mentioned_p (otherops[0],
10011 otherops[2]))
10013 /* Swap base and index registers over to
10014 avoid a conflict. */
10015 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
10016 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
10018 /* If both registers conflict, it will usually
10019 have been fixed by a splitter. */
10020 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
10022 output_asm_insn ("add%?\t%1, %1, %2", otherops);
10023 output_asm_insn ("ldr%(d%)\t%0, [%1]",
10024 otherops);
10026 else
10027 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
10028 return "";
10031 if (GET_CODE (otherops[2]) == CONST_INT)
10033 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10034 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10035 else
10036 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10038 else
10039 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10041 else
10042 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10044 if (TARGET_LDRD)
10045 return "ldr%(d%)\t%0, [%0]";
10047 return "ldm%(ia%)\t%0, %M0";
10049 else
10051 otherops[1] = adjust_address (operands[1], SImode, 4);
10052 /* Take care of overlapping base/data reg. */
10053 if (reg_mentioned_p (operands[0], operands[1]))
10055 output_asm_insn ("ldr%?\t%0, %1", otherops);
10056 output_asm_insn ("ldr%?\t%0, %1", operands);
10058 else
10060 output_asm_insn ("ldr%?\t%0, %1", operands);
10061 output_asm_insn ("ldr%?\t%0, %1", otherops);
10066 else
10068 /* Constraints should ensure this. */
10069 gcc_assert (code0 == MEM && code1 == REG);
10070 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10072 switch (GET_CODE (XEXP (operands[0], 0)))
10074 case REG:
10075 if (TARGET_LDRD)
10076 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
10077 else
10078 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10079 break;
10081 case PRE_INC:
10082 gcc_assert (TARGET_LDRD);
10083 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10084 break;
10086 case PRE_DEC:
10087 if (TARGET_LDRD)
10088 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10089 else
10090 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10091 break;
10093 case POST_INC:
10094 if (TARGET_LDRD)
10095 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
10096 else
10097 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10098 break;
10100 case POST_DEC:
10101 gcc_assert (TARGET_LDRD);
10102 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10103 break;
10105 case PRE_MODIFY:
10106 case POST_MODIFY:
10107 otherops[0] = operands[1];
10108 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10109 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10111 /* IWMMXT allows offsets larger than ldrd can handle,
10112 fix these up with a pair of ldr. */
10113 if (GET_CODE (otherops[2]) == CONST_INT
10114 && (INTVAL(otherops[2]) <= -256
10115 || INTVAL(otherops[2]) >= 256))
10117 rtx reg1;
10118 reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10119 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10121 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10122 otherops[0] = reg1;
10123 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10125 else
10127 otherops[0] = reg1;
10128 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10129 otherops[0] = operands[1];
10130 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10133 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10134 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10135 else
10136 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10137 break;
10139 case PLUS:
10140 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10141 if (GET_CODE (otherops[2]) == CONST_INT && !TARGET_LDRD)
10143 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10145 case -8:
10146 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10147 return "";
10149 case -4:
10150 if (TARGET_THUMB2)
10151 break;
10152 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10153 return "";
10155 case 4:
10156 if (TARGET_THUMB2)
10157 break;
10158 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10159 return "";
10162 if (TARGET_LDRD
10163 && (GET_CODE (otherops[2]) == REG
10164 || (GET_CODE (otherops[2]) == CONST_INT
10165 && INTVAL (otherops[2]) > -256
10166 && INTVAL (otherops[2]) < 256)))
10168 otherops[0] = operands[1];
10169 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10170 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10171 return "";
10173 /* Fall through */
10175 default:
10176 otherops[0] = adjust_address (operands[0], SImode, 4);
10177 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10178 output_asm_insn ("str%?\t%1, %0", operands);
10179 output_asm_insn ("str%?\t%1, %0", otherops);
10183 return "";
10186 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10187 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10189 const char *
10190 output_move_quad (rtx *operands)
10192 if (REG_P (operands[0]))
10194 /* Load, or reg->reg move. */
10196 if (MEM_P (operands[1]))
10198 switch (GET_CODE (XEXP (operands[1], 0)))
10200 case REG:
10201 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10202 break;
10204 case LABEL_REF:
10205 case CONST:
10206 output_asm_insn ("adr%?\t%0, %1", operands);
10207 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10208 break;
10210 default:
10211 gcc_unreachable ();
10214 else
10216 rtx ops[2];
10217 int dest, src, i;
10219 gcc_assert (REG_P (operands[1]));
10221 dest = REGNO (operands[0]);
10222 src = REGNO (operands[1]);
10224 /* This seems pretty dumb, but hopefully GCC won't try to do it
10225 very often. */
10226 if (dest < src)
10227 for (i = 0; i < 4; i++)
10229 ops[0] = gen_rtx_REG (SImode, dest + i);
10230 ops[1] = gen_rtx_REG (SImode, src + i);
10231 output_asm_insn ("mov%?\t%0, %1", ops);
10233 else
10234 for (i = 3; i >= 0; i--)
10236 ops[0] = gen_rtx_REG (SImode, dest + i);
10237 ops[1] = gen_rtx_REG (SImode, src + i);
10238 output_asm_insn ("mov%?\t%0, %1", ops);
10242 else
10244 gcc_assert (MEM_P (operands[0]));
10245 gcc_assert (REG_P (operands[1]));
10246 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10248 switch (GET_CODE (XEXP (operands[0], 0)))
10250 case REG:
10251 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10252 break;
10254 default:
10255 gcc_unreachable ();
10259 return "";
10262 /* Output a VFP load or store instruction. */
10264 const char *
10265 output_move_vfp (rtx *operands)
10267 rtx reg, mem, addr, ops[2];
10268 int load = REG_P (operands[0]);
10269 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10270 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10271 const char *template;
10272 char buff[50];
10273 enum machine_mode mode;
10275 reg = operands[!load];
10276 mem = operands[load];
10278 mode = GET_MODE (reg);
10280 gcc_assert (REG_P (reg));
10281 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10282 gcc_assert (mode == SFmode
10283 || mode == DFmode
10284 || mode == SImode
10285 || mode == DImode
10286 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10287 gcc_assert (MEM_P (mem));
10289 addr = XEXP (mem, 0);
10291 switch (GET_CODE (addr))
10293 case PRE_DEC:
10294 template = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10295 ops[0] = XEXP (addr, 0);
10296 ops[1] = reg;
10297 break;
10299 case POST_INC:
10300 template = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10301 ops[0] = XEXP (addr, 0);
10302 ops[1] = reg;
10303 break;
10305 default:
10306 template = "f%s%c%%?\t%%%s0, %%1%s";
10307 ops[0] = reg;
10308 ops[1] = mem;
10309 break;
10312 sprintf (buff, template,
10313 load ? "ld" : "st",
10314 dp ? 'd' : 's',
10315 dp ? "P" : "",
10316 integer_p ? "\t%@ int" : "");
10317 output_asm_insn (buff, ops);
10319 return "";
10322 /* Output a Neon quad-word load or store, or a load or store for
10323 larger structure modes. We could also support post-modify forms using
10324 VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
10325 yet.
10326 WARNING: The ordering of elements in memory is weird in big-endian mode,
10327 because we use VSTM instead of VST1, to make it easy to make vector stores
10328 via ARM registers write values in the same order as stores direct from Neon
10329 registers. For example, the byte ordering of a quadword vector with 16-byte
10330 elements like this:
10332 [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
10334 will be (with lowest address first, h = most-significant byte,
10335 l = least-significant byte of element):
10337 [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
10338 e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
10340 When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
10341 rN in the order:
10343 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10345 So that STM/LDM can be used on vectors in ARM registers, and the same memory
10346 layout will result as if VSTM/VLDM were used. */
10348 const char *
10349 output_move_neon (rtx *operands)
10351 rtx reg, mem, addr, ops[2];
10352 int regno, load = REG_P (operands[0]);
10353 const char *template;
10354 char buff[50];
10355 enum machine_mode mode;
10357 reg = operands[!load];
10358 mem = operands[load];
10360 mode = GET_MODE (reg);
10362 gcc_assert (REG_P (reg));
10363 regno = REGNO (reg);
10364 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10365 || NEON_REGNO_OK_FOR_QUAD (regno));
10366 gcc_assert (VALID_NEON_DREG_MODE (mode)
10367 || VALID_NEON_QREG_MODE (mode)
10368 || VALID_NEON_STRUCT_MODE (mode));
10369 gcc_assert (MEM_P (mem));
10371 addr = XEXP (mem, 0);
10373 /* Strip off const from addresses like (const (plus (...))). */
10374 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10375 addr = XEXP (addr, 0);
10377 switch (GET_CODE (addr))
10379 case POST_INC:
10380 template = "v%smia%%?\t%%0!, %%h1";
10381 ops[0] = XEXP (addr, 0);
10382 ops[1] = reg;
10383 break;
10385 case POST_MODIFY:
10386 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10387 gcc_unreachable ();
10389 case LABEL_REF:
10390 case PLUS:
10392 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10393 int i;
10394 int overlap = -1;
10395 for (i = 0; i < nregs; i++)
10397 /* We're only using DImode here because it's a convenient size. */
10398 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10399 ops[1] = adjust_address (mem, SImode, 8 * i);
10400 if (reg_overlap_mentioned_p (ops[0], mem))
10402 gcc_assert (overlap == -1);
10403 overlap = i;
10405 else
10407 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10408 output_asm_insn (buff, ops);
10411 if (overlap != -1)
10413 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10414 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10415 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10416 output_asm_insn (buff, ops);
10419 return "";
10422 default:
10423 template = "v%smia%%?\t%%m0, %%h1";
10424 ops[0] = mem;
10425 ops[1] = reg;
10428 sprintf (buff, template, load ? "ld" : "st");
10429 output_asm_insn (buff, ops);
10431 return "";
10434 /* Output an ADD r, s, #n where n may be too big for one instruction.
10435 If adding zero to one register, output nothing. */
10436 const char *
10437 output_add_immediate (rtx *operands)
10439 HOST_WIDE_INT n = INTVAL (operands[2]);
10441 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
10443 if (n < 0)
10444 output_multi_immediate (operands,
10445 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10446 -n);
10447 else
10448 output_multi_immediate (operands,
10449 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10453 return "";
10456 /* Output a multiple immediate operation.
10457 OPERANDS is the vector of operands referred to in the output patterns.
10458 INSTR1 is the output pattern to use for the first constant.
10459 INSTR2 is the output pattern to use for subsequent constants.
10460 IMMED_OP is the index of the constant slot in OPERANDS.
10461 N is the constant value. */
10462 static const char *
10463 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
10464 int immed_op, HOST_WIDE_INT n)
10466 #if HOST_BITS_PER_WIDE_INT > 32
10467 n &= 0xffffffff;
10468 #endif
10470 if (n == 0)
10472 /* Quick and easy output. */
10473 operands[immed_op] = const0_rtx;
10474 output_asm_insn (instr1, operands);
10476 else
10478 int i;
10479 const char * instr = instr1;
10481 /* Note that n is never zero here (which would give no output). */
10482 for (i = 0; i < 32; i += 2)
10484 if (n & (3 << i))
10486 operands[immed_op] = GEN_INT (n & (255 << i));
10487 output_asm_insn (instr, operands);
10488 instr = instr2;
10489 i += 6;
10494 return "";
10497 /* Return the name of a shifter operation. */
10498 static const char *
10499 arm_shift_nmem(enum rtx_code code)
10501 switch (code)
10503 case ASHIFT:
10504 return ARM_LSL_NAME;
10506 case ASHIFTRT:
10507 return "asr";
10509 case LSHIFTRT:
10510 return "lsr";
10512 case ROTATERT:
10513 return "ror";
10515 default:
10516 abort();
10520 /* Return the appropriate ARM instruction for the operation code.
10521 The returned result should not be overwritten. OP is the rtx of the
10522 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10523 was shifted. */
10524 const char *
10525 arithmetic_instr (rtx op, int shift_first_arg)
10527 switch (GET_CODE (op))
10529 case PLUS:
10530 return "add";
10532 case MINUS:
10533 return shift_first_arg ? "rsb" : "sub";
10535 case IOR:
10536 return "orr";
10538 case XOR:
10539 return "eor";
10541 case AND:
10542 return "and";
10544 case ASHIFT:
10545 case ASHIFTRT:
10546 case LSHIFTRT:
10547 case ROTATERT:
10548 return arm_shift_nmem(GET_CODE(op));
10550 default:
10551 gcc_unreachable ();
10555 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10556 for the operation code. The returned result should not be overwritten.
10557 OP is the rtx code of the shift.
10558 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10559 shift. */
10560 static const char *
10561 shift_op (rtx op, HOST_WIDE_INT *amountp)
10563 const char * mnem;
10564 enum rtx_code code = GET_CODE (op);
10566 switch (GET_CODE (XEXP (op, 1)))
10568 case REG:
10569 case SUBREG:
10570 *amountp = -1;
10571 break;
10573 case CONST_INT:
10574 *amountp = INTVAL (XEXP (op, 1));
10575 break;
10577 default:
10578 gcc_unreachable ();
10581 switch (code)
10583 case ROTATE:
10584 gcc_assert (*amountp != -1);
10585 *amountp = 32 - *amountp;
10586 code = ROTATERT;
10588 /* Fall through. */
10590 case ASHIFT:
10591 case ASHIFTRT:
10592 case LSHIFTRT:
10593 case ROTATERT:
10594 mnem = arm_shift_nmem(code);
10595 break;
10597 case MULT:
10598 /* We never have to worry about the amount being other than a
10599 power of 2, since this case can never be reloaded from a reg. */
10600 gcc_assert (*amountp != -1);
10601 *amountp = int_log2 (*amountp);
10602 return ARM_LSL_NAME;
10604 default:
10605 gcc_unreachable ();
10608 if (*amountp != -1)
10610 /* This is not 100% correct, but follows from the desire to merge
10611 multiplication by a power of 2 with the recognizer for a
10612 shift. >=32 is not a valid shift for "lsl", so we must try and
10613 output a shift that produces the correct arithmetical result.
10614 Using lsr #32 is identical except for the fact that the carry bit
10615 is not set correctly if we set the flags; but we never use the
10616 carry bit from such an operation, so we can ignore that. */
10617 if (code == ROTATERT)
10618 /* Rotate is just modulo 32. */
10619 *amountp &= 31;
10620 else if (*amountp != (*amountp & 31))
10622 if (code == ASHIFT)
10623 mnem = "lsr";
10624 *amountp = 32;
10627 /* Shifts of 0 are no-ops. */
10628 if (*amountp == 0)
10629 return NULL;
10632 return mnem;
10635 /* Obtain the shift from the POWER of two. */
10637 static HOST_WIDE_INT
10638 int_log2 (HOST_WIDE_INT power)
10640 HOST_WIDE_INT shift = 0;
10642 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
10644 gcc_assert (shift <= 31);
10645 shift++;
10648 return shift;
10651 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10652 because /bin/as is horribly restrictive. The judgement about
10653 whether or not each character is 'printable' (and can be output as
10654 is) or not (and must be printed with an octal escape) must be made
10655 with reference to the *host* character set -- the situation is
10656 similar to that discussed in the comments above pp_c_char in
10657 c-pretty-print.c. */
10659 #define MAX_ASCII_LEN 51
10661 void
10662 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
10664 int i;
10665 int len_so_far = 0;
10667 fputs ("\t.ascii\t\"", stream);
10669 for (i = 0; i < len; i++)
10671 int c = p[i];
10673 if (len_so_far >= MAX_ASCII_LEN)
10675 fputs ("\"\n\t.ascii\t\"", stream);
10676 len_so_far = 0;
10679 if (ISPRINT (c))
10681 if (c == '\\' || c == '\"')
10683 putc ('\\', stream);
10684 len_so_far++;
10686 putc (c, stream);
10687 len_so_far++;
10689 else
10691 fprintf (stream, "\\%03o", c);
10692 len_so_far += 4;
10696 fputs ("\"\n", stream);
10699 /* Compute the register save mask for registers 0 through 12
10700 inclusive. This code is used by arm_compute_save_reg_mask. */
10702 static unsigned long
10703 arm_compute_save_reg0_reg12_mask (void)
10705 unsigned long func_type = arm_current_func_type ();
10706 unsigned long save_reg_mask = 0;
10707 unsigned int reg;
10709 if (IS_INTERRUPT (func_type))
10711 unsigned int max_reg;
10712 /* Interrupt functions must not corrupt any registers,
10713 even call clobbered ones. If this is a leaf function
10714 we can just examine the registers used by the RTL, but
10715 otherwise we have to assume that whatever function is
10716 called might clobber anything, and so we have to save
10717 all the call-clobbered registers as well. */
10718 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
10719 /* FIQ handlers have registers r8 - r12 banked, so
10720 we only need to check r0 - r7, Normal ISRs only
10721 bank r14 and r15, so we must check up to r12.
10722 r13 is the stack pointer which is always preserved,
10723 so we do not need to consider it here. */
10724 max_reg = 7;
10725 else
10726 max_reg = 12;
10728 for (reg = 0; reg <= max_reg; reg++)
10729 if (df_regs_ever_live_p (reg)
10730 || (! current_function_is_leaf && call_used_regs[reg]))
10731 save_reg_mask |= (1 << reg);
10733 /* Also save the pic base register if necessary. */
10734 if (flag_pic
10735 && !TARGET_SINGLE_PIC_BASE
10736 && arm_pic_register != INVALID_REGNUM
10737 && crtl->uses_pic_offset_table)
10738 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10740 else
10742 /* In the normal case we only need to save those registers
10743 which are call saved and which are used by this function. */
10744 for (reg = 0; reg <= 11; reg++)
10745 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
10746 save_reg_mask |= (1 << reg);
10748 /* Handle the frame pointer as a special case. */
10749 if (frame_pointer_needed)
10750 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10752 /* If we aren't loading the PIC register,
10753 don't stack it even though it may be live. */
10754 if (flag_pic
10755 && !TARGET_SINGLE_PIC_BASE
10756 && arm_pic_register != INVALID_REGNUM
10757 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
10758 || crtl->uses_pic_offset_table))
10759 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10761 /* The prologue will copy SP into R0, so save it. */
10762 if (IS_STACKALIGN (func_type))
10763 save_reg_mask |= 1;
10766 /* Save registers so the exception handler can modify them. */
10767 if (crtl->calls_eh_return)
10769 unsigned int i;
10771 for (i = 0; ; i++)
10773 reg = EH_RETURN_DATA_REGNO (i);
10774 if (reg == INVALID_REGNUM)
10775 break;
10776 save_reg_mask |= 1 << reg;
10780 return save_reg_mask;
10784 /* Compute a bit mask of which registers need to be
10785 saved on the stack for the current function.
10786 This is used by arm_get_frame_offsets, which may add extra registers. */
10788 static unsigned long
10789 arm_compute_save_reg_mask (void)
10791 unsigned int save_reg_mask = 0;
10792 unsigned long func_type = arm_current_func_type ();
10793 unsigned int reg;
10795 if (IS_NAKED (func_type))
10796 /* This should never really happen. */
10797 return 0;
10799 /* If we are creating a stack frame, then we must save the frame pointer,
10800 IP (which will hold the old stack pointer), LR and the PC. */
10801 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
10802 save_reg_mask |=
10803 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
10804 | (1 << IP_REGNUM)
10805 | (1 << LR_REGNUM)
10806 | (1 << PC_REGNUM);
10808 /* Volatile functions do not return, so there
10809 is no need to save any other registers. */
10810 if (IS_VOLATILE (func_type))
10811 return save_reg_mask;
10813 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
10815 /* Decide if we need to save the link register.
10816 Interrupt routines have their own banked link register,
10817 so they never need to save it.
10818 Otherwise if we do not use the link register we do not need to save
10819 it. If we are pushing other registers onto the stack however, we
10820 can save an instruction in the epilogue by pushing the link register
10821 now and then popping it back into the PC. This incurs extra memory
10822 accesses though, so we only do it when optimizing for size, and only
10823 if we know that we will not need a fancy return sequence. */
10824 if (df_regs_ever_live_p (LR_REGNUM)
10825 || (save_reg_mask
10826 && optimize_size
10827 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10828 && !crtl->calls_eh_return))
10829 save_reg_mask |= 1 << LR_REGNUM;
10831 if (cfun->machine->lr_save_eliminated)
10832 save_reg_mask &= ~ (1 << LR_REGNUM);
10834 if (TARGET_REALLY_IWMMXT
10835 && ((bit_count (save_reg_mask)
10836 + ARM_NUM_INTS (crtl->args.pretend_args_size)) % 2) != 0)
10838 /* The total number of registers that are going to be pushed
10839 onto the stack is odd. We need to ensure that the stack
10840 is 64-bit aligned before we start to save iWMMXt registers,
10841 and also before we start to create locals. (A local variable
10842 might be a double or long long which we will load/store using
10843 an iWMMXt instruction). Therefore we need to push another
10844 ARM register, so that the stack will be 64-bit aligned. We
10845 try to avoid using the arg registers (r0 -r3) as they might be
10846 used to pass values in a tail call. */
10847 for (reg = 4; reg <= 12; reg++)
10848 if ((save_reg_mask & (1 << reg)) == 0)
10849 break;
10851 if (reg <= 12)
10852 save_reg_mask |= (1 << reg);
10853 else
10855 cfun->machine->sibcall_blocked = 1;
10856 save_reg_mask |= (1 << 3);
10860 /* We may need to push an additional register for use initializing the
10861 PIC base register. */
10862 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
10863 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
10865 reg = thumb_find_work_register (1 << 4);
10866 if (!call_used_regs[reg])
10867 save_reg_mask |= (1 << reg);
10870 return save_reg_mask;
10874 /* Compute a bit mask of which registers need to be
10875 saved on the stack for the current function. */
10876 static unsigned long
10877 thumb1_compute_save_reg_mask (void)
10879 unsigned long mask;
10880 unsigned reg;
10882 mask = 0;
10883 for (reg = 0; reg < 12; reg ++)
10884 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
10885 mask |= 1 << reg;
10887 if (flag_pic
10888 && !TARGET_SINGLE_PIC_BASE
10889 && arm_pic_register != INVALID_REGNUM
10890 && crtl->uses_pic_offset_table)
10891 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10893 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
10894 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
10895 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
10897 /* LR will also be pushed if any lo regs are pushed. */
10898 if (mask & 0xff || thumb_force_lr_save ())
10899 mask |= (1 << LR_REGNUM);
10901 /* Make sure we have a low work register if we need one.
10902 We will need one if we are going to push a high register,
10903 but we are not currently intending to push a low register. */
10904 if ((mask & 0xff) == 0
10905 && ((mask & 0x0f00) || TARGET_BACKTRACE))
10907 /* Use thumb_find_work_register to choose which register
10908 we will use. If the register is live then we will
10909 have to push it. Use LAST_LO_REGNUM as our fallback
10910 choice for the register to select. */
10911 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
10912 /* Make sure the register returned by thumb_find_work_register is
10913 not part of the return value. */
10914 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
10915 reg = LAST_LO_REGNUM;
10917 if (! call_used_regs[reg])
10918 mask |= 1 << reg;
10921 return mask;
10925 /* Return the number of bytes required to save VFP registers. */
10926 static int
10927 arm_get_vfp_saved_size (void)
10929 unsigned int regno;
10930 int count;
10931 int saved;
10933 saved = 0;
10934 /* Space for saved VFP registers. */
10935 if (TARGET_HARD_FLOAT && TARGET_VFP)
10937 count = 0;
10938 for (regno = FIRST_VFP_REGNUM;
10939 regno < LAST_VFP_REGNUM;
10940 regno += 2)
10942 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
10943 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
10945 if (count > 0)
10947 /* Workaround ARM10 VFPr1 bug. */
10948 if (count == 2 && !arm_arch6)
10949 count++;
10950 saved += count * 8;
10952 count = 0;
10954 else
10955 count++;
10957 if (count > 0)
10959 if (count == 2 && !arm_arch6)
10960 count++;
10961 saved += count * 8;
10964 return saved;
10968 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
10969 everything bar the final return instruction. */
10970 const char *
10971 output_return_instruction (rtx operand, int really_return, int reverse)
10973 char conditional[10];
10974 char instr[100];
10975 unsigned reg;
10976 unsigned long live_regs_mask;
10977 unsigned long func_type;
10978 arm_stack_offsets *offsets;
10980 func_type = arm_current_func_type ();
10982 if (IS_NAKED (func_type))
10983 return "";
10985 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
10987 /* If this function was declared non-returning, and we have
10988 found a tail call, then we have to trust that the called
10989 function won't return. */
10990 if (really_return)
10992 rtx ops[2];
10994 /* Otherwise, trap an attempted return by aborting. */
10995 ops[0] = operand;
10996 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
10997 : "abort");
10998 assemble_external_libcall (ops[1]);
10999 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
11002 return "";
11005 gcc_assert (!cfun->calls_alloca || really_return);
11007 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
11009 return_used_this_function = 1;
11011 offsets = arm_get_frame_offsets ();
11012 live_regs_mask = offsets->saved_regs_mask;
11014 if (live_regs_mask)
11016 const char * return_reg;
11018 /* If we do not have any special requirements for function exit
11019 (e.g. interworking) then we can load the return address
11020 directly into the PC. Otherwise we must load it into LR. */
11021 if (really_return
11022 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
11023 return_reg = reg_names[PC_REGNUM];
11024 else
11025 return_reg = reg_names[LR_REGNUM];
11027 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
11029 /* There are three possible reasons for the IP register
11030 being saved. 1) a stack frame was created, in which case
11031 IP contains the old stack pointer, or 2) an ISR routine
11032 corrupted it, or 3) it was saved to align the stack on
11033 iWMMXt. In case 1, restore IP into SP, otherwise just
11034 restore IP. */
11035 if (frame_pointer_needed)
11037 live_regs_mask &= ~ (1 << IP_REGNUM);
11038 live_regs_mask |= (1 << SP_REGNUM);
11040 else
11041 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11044 /* On some ARM architectures it is faster to use LDR rather than
11045 LDM to load a single register. On other architectures, the
11046 cost is the same. In 26 bit mode, or for exception handlers,
11047 we have to use LDM to load the PC so that the CPSR is also
11048 restored. */
11049 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11050 if (live_regs_mask == (1U << reg))
11051 break;
11053 if (reg <= LAST_ARM_REGNUM
11054 && (reg != LR_REGNUM
11055 || ! really_return
11056 || ! IS_INTERRUPT (func_type)))
11058 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11059 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11061 else
11063 char *p;
11064 int first = 1;
11066 /* Generate the load multiple instruction to restore the
11067 registers. Note we can get here, even if
11068 frame_pointer_needed is true, but only if sp already
11069 points to the base of the saved core registers. */
11070 if (live_regs_mask & (1 << SP_REGNUM))
11072 unsigned HOST_WIDE_INT stack_adjust;
11074 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11075 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11077 if (stack_adjust && arm_arch5 && TARGET_ARM)
11078 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11079 else
11081 /* If we can't use ldmib (SA110 bug),
11082 then try to pop r3 instead. */
11083 if (stack_adjust)
11084 live_regs_mask |= 1 << 3;
11085 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11088 else
11089 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11091 p = instr + strlen (instr);
11093 for (reg = 0; reg <= SP_REGNUM; reg++)
11094 if (live_regs_mask & (1 << reg))
11096 int l = strlen (reg_names[reg]);
11098 if (first)
11099 first = 0;
11100 else
11102 memcpy (p, ", ", 2);
11103 p += 2;
11106 memcpy (p, "%|", 2);
11107 memcpy (p + 2, reg_names[reg], l);
11108 p += l + 2;
11111 if (live_regs_mask & (1 << LR_REGNUM))
11113 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11114 /* If returning from an interrupt, restore the CPSR. */
11115 if (IS_INTERRUPT (func_type))
11116 strcat (p, "^");
11118 else
11119 strcpy (p, "}");
11122 output_asm_insn (instr, & operand);
11124 /* See if we need to generate an extra instruction to
11125 perform the actual function return. */
11126 if (really_return
11127 && func_type != ARM_FT_INTERWORKED
11128 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11130 /* The return has already been handled
11131 by loading the LR into the PC. */
11132 really_return = 0;
11136 if (really_return)
11138 switch ((int) ARM_FUNC_TYPE (func_type))
11140 case ARM_FT_ISR:
11141 case ARM_FT_FIQ:
11142 /* ??? This is wrong for unified assembly syntax. */
11143 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11144 break;
11146 case ARM_FT_INTERWORKED:
11147 sprintf (instr, "bx%s\t%%|lr", conditional);
11148 break;
11150 case ARM_FT_EXCEPTION:
11151 /* ??? This is wrong for unified assembly syntax. */
11152 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11153 break;
11155 default:
11156 /* Use bx if it's available. */
11157 if (arm_arch5 || arm_arch4t)
11158 sprintf (instr, "bx%s\t%%|lr", conditional);
11159 else
11160 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11161 break;
11164 output_asm_insn (instr, & operand);
11167 return "";
11170 /* Write the function name into the code section, directly preceding
11171 the function prologue.
11173 Code will be output similar to this:
11175 .ascii "arm_poke_function_name", 0
11176 .align
11178 .word 0xff000000 + (t1 - t0)
11179 arm_poke_function_name
11180 mov ip, sp
11181 stmfd sp!, {fp, ip, lr, pc}
11182 sub fp, ip, #4
11184 When performing a stack backtrace, code can inspect the value
11185 of 'pc' stored at 'fp' + 0. If the trace function then looks
11186 at location pc - 12 and the top 8 bits are set, then we know
11187 that there is a function name embedded immediately preceding this
11188 location and has length ((pc[-3]) & 0xff000000).
11190 We assume that pc is declared as a pointer to an unsigned long.
11192 It is of no benefit to output the function name if we are assembling
11193 a leaf function. These function types will not contain a stack
11194 backtrace structure, therefore it is not possible to determine the
11195 function name. */
11196 void
11197 arm_poke_function_name (FILE *stream, const char *name)
11199 unsigned long alignlength;
11200 unsigned long length;
11201 rtx x;
11203 length = strlen (name) + 1;
11204 alignlength = ROUND_UP_WORD (length);
11206 ASM_OUTPUT_ASCII (stream, name, length);
11207 ASM_OUTPUT_ALIGN (stream, 2);
11208 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11209 assemble_aligned_integer (UNITS_PER_WORD, x);
11212 /* Place some comments into the assembler stream
11213 describing the current function. */
11214 static void
11215 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11217 unsigned long func_type;
11219 if (TARGET_THUMB1)
11221 thumb1_output_function_prologue (f, frame_size);
11222 return;
11225 /* Sanity check. */
11226 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11228 func_type = arm_current_func_type ();
11230 switch ((int) ARM_FUNC_TYPE (func_type))
11232 default:
11233 case ARM_FT_NORMAL:
11234 break;
11235 case ARM_FT_INTERWORKED:
11236 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11237 break;
11238 case ARM_FT_ISR:
11239 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11240 break;
11241 case ARM_FT_FIQ:
11242 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11243 break;
11244 case ARM_FT_EXCEPTION:
11245 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11246 break;
11249 if (IS_NAKED (func_type))
11250 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11252 if (IS_VOLATILE (func_type))
11253 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11255 if (IS_NESTED (func_type))
11256 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11257 if (IS_STACKALIGN (func_type))
11258 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11260 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11261 crtl->args.size,
11262 crtl->args.pretend_args_size, frame_size);
11264 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11265 frame_pointer_needed,
11266 cfun->machine->uses_anonymous_args);
11268 if (cfun->machine->lr_save_eliminated)
11269 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11271 if (crtl->calls_eh_return)
11272 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11274 return_used_this_function = 0;
11277 const char *
11278 arm_output_epilogue (rtx sibling)
11280 int reg;
11281 unsigned long saved_regs_mask;
11282 unsigned long func_type;
11283 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11284 frame that is $fp + 4 for a non-variadic function. */
11285 int floats_offset = 0;
11286 rtx operands[3];
11287 FILE * f = asm_out_file;
11288 unsigned int lrm_count = 0;
11289 int really_return = (sibling == NULL);
11290 int start_reg;
11291 arm_stack_offsets *offsets;
11293 /* If we have already generated the return instruction
11294 then it is futile to generate anything else. */
11295 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11296 return "";
11298 func_type = arm_current_func_type ();
11300 if (IS_NAKED (func_type))
11301 /* Naked functions don't have epilogues. */
11302 return "";
11304 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11306 rtx op;
11308 /* A volatile function should never return. Call abort. */
11309 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11310 assemble_external_libcall (op);
11311 output_asm_insn ("bl\t%a0", &op);
11313 return "";
11316 /* If we are throwing an exception, then we really must be doing a
11317 return, so we can't tail-call. */
11318 gcc_assert (!crtl->calls_eh_return || really_return);
11320 offsets = arm_get_frame_offsets ();
11321 saved_regs_mask = offsets->saved_regs_mask;
11323 if (TARGET_IWMMXT)
11324 lrm_count = bit_count (saved_regs_mask);
11326 floats_offset = offsets->saved_args;
11327 /* Compute how far away the floats will be. */
11328 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11329 if (saved_regs_mask & (1 << reg))
11330 floats_offset += 4;
11332 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
11334 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11335 int vfp_offset = offsets->frame;
11337 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11339 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11340 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11342 floats_offset += 12;
11343 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11344 reg, FP_REGNUM, floats_offset - vfp_offset);
11347 else
11349 start_reg = LAST_FPA_REGNUM;
11351 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11353 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11355 floats_offset += 12;
11357 /* We can't unstack more than four registers at once. */
11358 if (start_reg - reg == 3)
11360 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11361 reg, FP_REGNUM, floats_offset - vfp_offset);
11362 start_reg = reg - 1;
11365 else
11367 if (reg != start_reg)
11368 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11369 reg + 1, start_reg - reg,
11370 FP_REGNUM, floats_offset - vfp_offset);
11371 start_reg = reg - 1;
11375 /* Just in case the last register checked also needs unstacking. */
11376 if (reg != start_reg)
11377 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11378 reg + 1, start_reg - reg,
11379 FP_REGNUM, floats_offset - vfp_offset);
11382 if (TARGET_HARD_FLOAT && TARGET_VFP)
11384 int saved_size;
11386 /* The fldmd insns do not have base+offset addressing
11387 modes, so we use IP to hold the address. */
11388 saved_size = arm_get_vfp_saved_size ();
11390 if (saved_size > 0)
11392 floats_offset += saved_size;
11393 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
11394 FP_REGNUM, floats_offset - vfp_offset);
11396 start_reg = FIRST_VFP_REGNUM;
11397 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11399 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11400 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11402 if (start_reg != reg)
11403 vfp_output_fldmd (f, IP_REGNUM,
11404 (start_reg - FIRST_VFP_REGNUM) / 2,
11405 (reg - start_reg) / 2);
11406 start_reg = reg + 2;
11409 if (start_reg != reg)
11410 vfp_output_fldmd (f, IP_REGNUM,
11411 (start_reg - FIRST_VFP_REGNUM) / 2,
11412 (reg - start_reg) / 2);
11415 if (TARGET_IWMMXT)
11417 /* The frame pointer is guaranteed to be non-double-word aligned.
11418 This is because it is set to (old_stack_pointer - 4) and the
11419 old_stack_pointer was double word aligned. Thus the offset to
11420 the iWMMXt registers to be loaded must also be non-double-word
11421 sized, so that the resultant address *is* double-word aligned.
11422 We can ignore floats_offset since that was already included in
11423 the live_regs_mask. */
11424 lrm_count += (lrm_count % 2 ? 2 : 1);
11426 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
11427 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11429 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
11430 reg, FP_REGNUM, lrm_count * 4);
11431 lrm_count += 2;
11435 /* saved_regs_mask should contain the IP, which at the time of stack
11436 frame generation actually contains the old stack pointer. So a
11437 quick way to unwind the stack is just pop the IP register directly
11438 into the stack pointer. */
11439 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
11440 saved_regs_mask &= ~ (1 << IP_REGNUM);
11441 saved_regs_mask |= (1 << SP_REGNUM);
11443 /* There are two registers left in saved_regs_mask - LR and PC. We
11444 only need to restore the LR register (the return address), but to
11445 save time we can load it directly into the PC, unless we need a
11446 special function exit sequence, or we are not really returning. */
11447 if (really_return
11448 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11449 && !crtl->calls_eh_return)
11450 /* Delete the LR from the register mask, so that the LR on
11451 the stack is loaded into the PC in the register mask. */
11452 saved_regs_mask &= ~ (1 << LR_REGNUM);
11453 else
11454 saved_regs_mask &= ~ (1 << PC_REGNUM);
11456 /* We must use SP as the base register, because SP is one of the
11457 registers being restored. If an interrupt or page fault
11458 happens in the ldm instruction, the SP might or might not
11459 have been restored. That would be bad, as then SP will no
11460 longer indicate the safe area of stack, and we can get stack
11461 corruption. Using SP as the base register means that it will
11462 be reset correctly to the original value, should an interrupt
11463 occur. If the stack pointer already points at the right
11464 place, then omit the subtraction. */
11465 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
11466 || cfun->calls_alloca)
11467 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
11468 4 * bit_count (saved_regs_mask));
11469 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
11471 if (IS_INTERRUPT (func_type))
11472 /* Interrupt handlers will have pushed the
11473 IP onto the stack, so restore it now. */
11474 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
11476 else
11478 /* This branch is executed for ARM mode (non-apcs frames) and
11479 Thumb-2 mode. Frame layout is essentially the same for those
11480 cases, except that in ARM mode frame pointer points to the
11481 first saved register, while in Thumb-2 mode the frame pointer points
11482 to the last saved register.
11484 It is possible to make frame pointer point to last saved
11485 register in both cases, and remove some conditionals below.
11486 That means that fp setup in prologue would be just "mov fp, sp"
11487 and sp restore in epilogue would be just "mov sp, fp", whereas
11488 now we have to use add/sub in those cases. However, the value
11489 of that would be marginal, as both mov and add/sub are 32-bit
11490 in ARM mode, and it would require extra conditionals
11491 in arm_expand_prologue to distingish ARM-apcs-frame case
11492 (where frame pointer is required to point at first register)
11493 and ARM-non-apcs-frame. Therefore, such change is postponed
11494 until real need arise. */
11495 HOST_WIDE_INT amount;
11496 int rfe;
11497 /* Restore stack pointer if necessary. */
11498 if (TARGET_ARM && frame_pointer_needed)
11500 operands[0] = stack_pointer_rtx;
11501 operands[1] = hard_frame_pointer_rtx;
11503 operands[2] = GEN_INT (offsets->frame - offsets->saved_regs);
11504 output_add_immediate (operands);
11506 else
11508 if (frame_pointer_needed)
11510 /* For Thumb-2 restore sp from the frame pointer.
11511 Operand restrictions mean we have to incrememnt FP, then copy
11512 to SP. */
11513 amount = offsets->locals_base - offsets->saved_regs;
11514 operands[0] = hard_frame_pointer_rtx;
11516 else
11518 unsigned long count;
11519 operands[0] = stack_pointer_rtx;
11520 amount = offsets->outgoing_args - offsets->saved_regs;
11521 /* pop call clobbered registers if it avoids a
11522 separate stack adjustment. */
11523 count = offsets->saved_regs - offsets->saved_args;
11524 if (optimize_size
11525 && count != 0
11526 && !crtl->calls_eh_return
11527 && bit_count(saved_regs_mask) * 4 == count
11528 && !IS_INTERRUPT (func_type)
11529 && !crtl->tail_call_emit)
11531 unsigned long mask;
11532 mask = (1 << (arm_size_return_regs() / 4)) - 1;
11533 mask ^= 0xf;
11534 mask &= ~saved_regs_mask;
11535 reg = 0;
11536 while (bit_count (mask) * 4 > amount)
11538 while ((mask & (1 << reg)) == 0)
11539 reg++;
11540 mask &= ~(1 << reg);
11542 if (bit_count (mask) * 4 == amount) {
11543 amount = 0;
11544 saved_regs_mask |= mask;
11549 if (amount)
11551 operands[1] = operands[0];
11552 operands[2] = GEN_INT (amount);
11553 output_add_immediate (operands);
11555 if (frame_pointer_needed)
11556 asm_fprintf (f, "\tmov\t%r, %r\n",
11557 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
11560 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11562 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11563 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11564 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
11565 reg, SP_REGNUM);
11567 else
11569 start_reg = FIRST_FPA_REGNUM;
11571 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11573 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11575 if (reg - start_reg == 3)
11577 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
11578 start_reg, SP_REGNUM);
11579 start_reg = reg + 1;
11582 else
11584 if (reg != start_reg)
11585 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11586 start_reg, reg - start_reg,
11587 SP_REGNUM);
11589 start_reg = reg + 1;
11593 /* Just in case the last register checked also needs unstacking. */
11594 if (reg != start_reg)
11595 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11596 start_reg, reg - start_reg, SP_REGNUM);
11599 if (TARGET_HARD_FLOAT && TARGET_VFP)
11601 start_reg = FIRST_VFP_REGNUM;
11602 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11604 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11605 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11607 if (start_reg != reg)
11608 vfp_output_fldmd (f, SP_REGNUM,
11609 (start_reg - FIRST_VFP_REGNUM) / 2,
11610 (reg - start_reg) / 2);
11611 start_reg = reg + 2;
11614 if (start_reg != reg)
11615 vfp_output_fldmd (f, SP_REGNUM,
11616 (start_reg - FIRST_VFP_REGNUM) / 2,
11617 (reg - start_reg) / 2);
11619 if (TARGET_IWMMXT)
11620 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
11621 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11622 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
11624 /* If we can, restore the LR into the PC. */
11625 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
11626 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
11627 && !IS_STACKALIGN (func_type)
11628 && really_return
11629 && crtl->args.pretend_args_size == 0
11630 && saved_regs_mask & (1 << LR_REGNUM)
11631 && !crtl->calls_eh_return)
11633 saved_regs_mask &= ~ (1 << LR_REGNUM);
11634 saved_regs_mask |= (1 << PC_REGNUM);
11635 rfe = IS_INTERRUPT (func_type);
11637 else
11638 rfe = 0;
11640 /* Load the registers off the stack. If we only have one register
11641 to load use the LDR instruction - it is faster. For Thumb-2
11642 always use pop and the assembler will pick the best instruction.*/
11643 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
11644 && !IS_INTERRUPT(func_type))
11646 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
11648 else if (saved_regs_mask)
11650 if (saved_regs_mask & (1 << SP_REGNUM))
11651 /* Note - write back to the stack register is not enabled
11652 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11653 in the list of registers and if we add writeback the
11654 instruction becomes UNPREDICTABLE. */
11655 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
11656 rfe);
11657 else if (TARGET_ARM)
11658 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
11659 rfe);
11660 else
11661 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
11664 if (crtl->args.pretend_args_size)
11666 /* Unwind the pre-pushed regs. */
11667 operands[0] = operands[1] = stack_pointer_rtx;
11668 operands[2] = GEN_INT (crtl->args.pretend_args_size);
11669 output_add_immediate (operands);
11673 /* We may have already restored PC directly from the stack. */
11674 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
11675 return "";
11677 /* Stack adjustment for exception handler. */
11678 if (crtl->calls_eh_return)
11679 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
11680 ARM_EH_STACKADJ_REGNUM);
11682 /* Generate the return instruction. */
11683 switch ((int) ARM_FUNC_TYPE (func_type))
11685 case ARM_FT_ISR:
11686 case ARM_FT_FIQ:
11687 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
11688 break;
11690 case ARM_FT_EXCEPTION:
11691 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11692 break;
11694 case ARM_FT_INTERWORKED:
11695 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11696 break;
11698 default:
11699 if (IS_STACKALIGN (func_type))
11701 /* See comment in arm_expand_prologue. */
11702 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
11704 if (arm_arch5 || arm_arch4t)
11705 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11706 else
11707 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11708 break;
11711 return "";
11714 static void
11715 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11716 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
11718 arm_stack_offsets *offsets;
11720 if (TARGET_THUMB1)
11722 int regno;
11724 /* Emit any call-via-reg trampolines that are needed for v4t support
11725 of call_reg and call_value_reg type insns. */
11726 for (regno = 0; regno < LR_REGNUM; regno++)
11728 rtx label = cfun->machine->call_via[regno];
11730 if (label != NULL)
11732 switch_to_section (function_section (current_function_decl));
11733 targetm.asm_out.internal_label (asm_out_file, "L",
11734 CODE_LABEL_NUMBER (label));
11735 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
11739 /* ??? Probably not safe to set this here, since it assumes that a
11740 function will be emitted as assembly immediately after we generate
11741 RTL for it. This does not happen for inline functions. */
11742 return_used_this_function = 0;
11744 else /* TARGET_32BIT */
11746 /* We need to take into account any stack-frame rounding. */
11747 offsets = arm_get_frame_offsets ();
11749 gcc_assert (!use_return_insn (FALSE, NULL)
11750 || !return_used_this_function
11751 || offsets->saved_regs == offsets->outgoing_args
11752 || frame_pointer_needed);
11754 /* Reset the ARM-specific per-function variables. */
11755 after_arm_reorg = 0;
11759 /* Generate and emit an insn that we will recognize as a push_multi.
11760 Unfortunately, since this insn does not reflect very well the actual
11761 semantics of the operation, we need to annotate the insn for the benefit
11762 of DWARF2 frame unwind information. */
11763 static rtx
11764 emit_multi_reg_push (unsigned long mask)
11766 int num_regs = 0;
11767 int num_dwarf_regs;
11768 int i, j;
11769 rtx par;
11770 rtx dwarf;
11771 int dwarf_par_index;
11772 rtx tmp, reg;
11774 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11775 if (mask & (1 << i))
11776 num_regs++;
11778 gcc_assert (num_regs && num_regs <= 16);
11780 /* We don't record the PC in the dwarf frame information. */
11781 num_dwarf_regs = num_regs;
11782 if (mask & (1 << PC_REGNUM))
11783 num_dwarf_regs--;
11785 /* For the body of the insn we are going to generate an UNSPEC in
11786 parallel with several USEs. This allows the insn to be recognized
11787 by the push_multi pattern in the arm.md file. The insn looks
11788 something like this:
11790 (parallel [
11791 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11792 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11793 (use (reg:SI 11 fp))
11794 (use (reg:SI 12 ip))
11795 (use (reg:SI 14 lr))
11796 (use (reg:SI 15 pc))
11799 For the frame note however, we try to be more explicit and actually
11800 show each register being stored into the stack frame, plus a (single)
11801 decrement of the stack pointer. We do it this way in order to be
11802 friendly to the stack unwinding code, which only wants to see a single
11803 stack decrement per instruction. The RTL we generate for the note looks
11804 something like this:
11806 (sequence [
11807 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11808 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11809 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11810 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11811 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11814 This sequence is used both by the code to support stack unwinding for
11815 exceptions handlers and the code to generate dwarf2 frame debugging. */
11817 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
11818 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
11819 dwarf_par_index = 1;
11821 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11823 if (mask & (1 << i))
11825 reg = gen_rtx_REG (SImode, i);
11827 XVECEXP (par, 0, 0)
11828 = gen_rtx_SET (VOIDmode,
11829 gen_frame_mem (BLKmode,
11830 gen_rtx_PRE_DEC (BLKmode,
11831 stack_pointer_rtx)),
11832 gen_rtx_UNSPEC (BLKmode,
11833 gen_rtvec (1, reg),
11834 UNSPEC_PUSH_MULT));
11836 if (i != PC_REGNUM)
11838 tmp = gen_rtx_SET (VOIDmode,
11839 gen_frame_mem (SImode, stack_pointer_rtx),
11840 reg);
11841 RTX_FRAME_RELATED_P (tmp) = 1;
11842 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
11843 dwarf_par_index++;
11846 break;
11850 for (j = 1, i++; j < num_regs; i++)
11852 if (mask & (1 << i))
11854 reg = gen_rtx_REG (SImode, i);
11856 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
11858 if (i != PC_REGNUM)
11861 = gen_rtx_SET (VOIDmode,
11862 gen_frame_mem (SImode,
11863 plus_constant (stack_pointer_rtx,
11864 4 * j)),
11865 reg);
11866 RTX_FRAME_RELATED_P (tmp) = 1;
11867 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
11870 j++;
11874 par = emit_insn (par);
11876 tmp = gen_rtx_SET (VOIDmode,
11877 stack_pointer_rtx,
11878 plus_constant (stack_pointer_rtx, -4 * num_regs));
11879 RTX_FRAME_RELATED_P (tmp) = 1;
11880 XVECEXP (dwarf, 0, 0) = tmp;
11882 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11883 REG_NOTES (par));
11884 return par;
11887 /* Calculate the size of the return value that is passed in registers. */
11888 static unsigned
11889 arm_size_return_regs (void)
11891 enum machine_mode mode;
11893 if (crtl->return_rtx != 0)
11894 mode = GET_MODE (crtl->return_rtx);
11895 else
11896 mode = DECL_MODE (DECL_RESULT (current_function_decl));
11898 return GET_MODE_SIZE (mode);
11901 static rtx
11902 emit_sfm (int base_reg, int count)
11904 rtx par;
11905 rtx dwarf;
11906 rtx tmp, reg;
11907 int i;
11909 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11910 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11912 reg = gen_rtx_REG (XFmode, base_reg++);
11914 XVECEXP (par, 0, 0)
11915 = gen_rtx_SET (VOIDmode,
11916 gen_frame_mem (BLKmode,
11917 gen_rtx_PRE_DEC (BLKmode,
11918 stack_pointer_rtx)),
11919 gen_rtx_UNSPEC (BLKmode,
11920 gen_rtvec (1, reg),
11921 UNSPEC_PUSH_MULT));
11922 tmp = gen_rtx_SET (VOIDmode,
11923 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
11924 RTX_FRAME_RELATED_P (tmp) = 1;
11925 XVECEXP (dwarf, 0, 1) = tmp;
11927 for (i = 1; i < count; i++)
11929 reg = gen_rtx_REG (XFmode, base_reg++);
11930 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11932 tmp = gen_rtx_SET (VOIDmode,
11933 gen_frame_mem (XFmode,
11934 plus_constant (stack_pointer_rtx,
11935 i * 12)),
11936 reg);
11937 RTX_FRAME_RELATED_P (tmp) = 1;
11938 XVECEXP (dwarf, 0, i + 1) = tmp;
11941 tmp = gen_rtx_SET (VOIDmode,
11942 stack_pointer_rtx,
11943 plus_constant (stack_pointer_rtx, -12 * count));
11945 RTX_FRAME_RELATED_P (tmp) = 1;
11946 XVECEXP (dwarf, 0, 0) = tmp;
11948 par = emit_insn (par);
11949 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11950 REG_NOTES (par));
11951 return par;
11955 /* Return true if the current function needs to save/restore LR. */
11957 static bool
11958 thumb_force_lr_save (void)
11960 return !cfun->machine->lr_save_eliminated
11961 && (!leaf_function_p ()
11962 || thumb_far_jump_used_p ()
11963 || df_regs_ever_live_p (LR_REGNUM));
11967 /* Compute the distance from register FROM to register TO.
11968 These can be the arg pointer (26), the soft frame pointer (25),
11969 the stack pointer (13) or the hard frame pointer (11).
11970 In thumb mode r7 is used as the soft frame pointer, if needed.
11971 Typical stack layout looks like this:
11973 old stack pointer -> | |
11974 ----
11975 | | \
11976 | | saved arguments for
11977 | | vararg functions
11978 | | /
11980 hard FP & arg pointer -> | | \
11981 | | stack
11982 | | frame
11983 | | /
11985 | | \
11986 | | call saved
11987 | | registers
11988 soft frame pointer -> | | /
11990 | | \
11991 | | local
11992 | | variables
11993 locals base pointer -> | | /
11995 | | \
11996 | | outgoing
11997 | | arguments
11998 current stack pointer -> | | /
12001 For a given function some or all of these stack components
12002 may not be needed, giving rise to the possibility of
12003 eliminating some of the registers.
12005 The values returned by this function must reflect the behavior
12006 of arm_expand_prologue() and arm_compute_save_reg_mask().
12008 The sign of the number returned reflects the direction of stack
12009 growth, so the values are positive for all eliminations except
12010 from the soft frame pointer to the hard frame pointer.
12012 SFP may point just inside the local variables block to ensure correct
12013 alignment. */
12016 /* Calculate stack offsets. These are used to calculate register elimination
12017 offsets and in prologue/epilogue code. Also calculates which registers
12018 should be saved. */
12020 static arm_stack_offsets *
12021 arm_get_frame_offsets (void)
12023 struct arm_stack_offsets *offsets;
12024 unsigned long func_type;
12025 int leaf;
12026 int saved;
12027 int core_saved;
12028 HOST_WIDE_INT frame_size;
12029 int i;
12031 offsets = &cfun->machine->stack_offsets;
12033 /* We need to know if we are a leaf function. Unfortunately, it
12034 is possible to be called after start_sequence has been called,
12035 which causes get_insns to return the insns for the sequence,
12036 not the function, which will cause leaf_function_p to return
12037 the incorrect result.
12039 to know about leaf functions once reload has completed, and the
12040 frame size cannot be changed after that time, so we can safely
12041 use the cached value. */
12043 if (reload_completed)
12044 return offsets;
12046 /* Initially this is the size of the local variables. It will translated
12047 into an offset once we have determined the size of preceding data. */
12048 frame_size = ROUND_UP_WORD (get_frame_size ());
12050 leaf = leaf_function_p ();
12052 /* Space for variadic functions. */
12053 offsets->saved_args = crtl->args.pretend_args_size;
12055 /* In Thumb mode this is incorrect, but never used. */
12056 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
12058 if (TARGET_32BIT)
12060 unsigned int regno;
12062 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
12063 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12064 saved = core_saved;
12066 /* We know that SP will be doubleword aligned on entry, and we must
12067 preserve that condition at any subroutine call. We also require the
12068 soft frame pointer to be doubleword aligned. */
12070 if (TARGET_REALLY_IWMMXT)
12072 /* Check for the call-saved iWMMXt registers. */
12073 for (regno = FIRST_IWMMXT_REGNUM;
12074 regno <= LAST_IWMMXT_REGNUM;
12075 regno++)
12076 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12077 saved += 8;
12080 func_type = arm_current_func_type ();
12081 if (! IS_VOLATILE (func_type))
12083 /* Space for saved FPA registers. */
12084 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
12085 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
12086 saved += 12;
12088 /* Space for saved VFP registers. */
12089 if (TARGET_HARD_FLOAT && TARGET_VFP)
12090 saved += arm_get_vfp_saved_size ();
12093 else /* TARGET_THUMB1 */
12095 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
12096 core_saved = bit_count (offsets->saved_regs_mask) * 4;
12097 saved = core_saved;
12098 if (TARGET_BACKTRACE)
12099 saved += 16;
12102 /* Saved registers include the stack frame. */
12103 offsets->saved_regs = offsets->saved_args + saved;
12104 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12105 /* A leaf function does not need any stack alignment if it has nothing
12106 on the stack. */
12107 if (leaf && frame_size == 0)
12109 offsets->outgoing_args = offsets->soft_frame;
12110 offsets->locals_base = offsets->soft_frame;
12111 return offsets;
12114 /* Ensure SFP has the correct alignment. */
12115 if (ARM_DOUBLEWORD_ALIGN
12116 && (offsets->soft_frame & 7))
12118 offsets->soft_frame += 4;
12119 /* Try to align stack by pushing an extra reg. Don't bother doing this
12120 when there is a stack frame as the alignment will be rolled into
12121 the normal stack adjustment. */
12122 if (frame_size + crtl->outgoing_args_size == 0)
12124 int reg = -1;
12126 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
12128 if ((offsets->saved_regs_mask & (1 << i)) == 0)
12130 reg = i;
12131 break;
12135 if (reg == -1 && arm_size_return_regs () <= 12
12136 && !crtl->tail_call_emit)
12138 /* Push/pop an argument register (r3) if all callee saved
12139 registers are already being pushed. */
12140 reg = 3;
12143 if (reg != -1)
12145 offsets->saved_regs += 4;
12146 offsets->saved_regs_mask |= (1 << reg);
12151 offsets->locals_base = offsets->soft_frame + frame_size;
12152 offsets->outgoing_args = (offsets->locals_base
12153 + crtl->outgoing_args_size);
12155 if (ARM_DOUBLEWORD_ALIGN)
12157 /* Ensure SP remains doubleword aligned. */
12158 if (offsets->outgoing_args & 7)
12159 offsets->outgoing_args += 4;
12160 gcc_assert (!(offsets->outgoing_args & 7));
12163 return offsets;
12167 /* Calculate the relative offsets for the different stack pointers. Positive
12168 offsets are in the direction of stack growth. */
12170 HOST_WIDE_INT
12171 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12173 arm_stack_offsets *offsets;
12175 offsets = arm_get_frame_offsets ();
12177 /* OK, now we have enough information to compute the distances.
12178 There must be an entry in these switch tables for each pair
12179 of registers in ELIMINABLE_REGS, even if some of the entries
12180 seem to be redundant or useless. */
12181 switch (from)
12183 case ARG_POINTER_REGNUM:
12184 switch (to)
12186 case THUMB_HARD_FRAME_POINTER_REGNUM:
12187 return 0;
12189 case FRAME_POINTER_REGNUM:
12190 /* This is the reverse of the soft frame pointer
12191 to hard frame pointer elimination below. */
12192 return offsets->soft_frame - offsets->saved_args;
12194 case ARM_HARD_FRAME_POINTER_REGNUM:
12195 /* If there is no stack frame then the hard
12196 frame pointer and the arg pointer coincide. */
12197 if (offsets->frame == offsets->saved_regs)
12198 return 0;
12199 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
12200 return (frame_pointer_needed
12201 && cfun->static_chain_decl != NULL
12202 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
12204 case STACK_POINTER_REGNUM:
12205 /* If nothing has been pushed on the stack at all
12206 then this will return -4. This *is* correct! */
12207 return offsets->outgoing_args - (offsets->saved_args + 4);
12209 default:
12210 gcc_unreachable ();
12212 gcc_unreachable ();
12214 case FRAME_POINTER_REGNUM:
12215 switch (to)
12217 case THUMB_HARD_FRAME_POINTER_REGNUM:
12218 return 0;
12220 case ARM_HARD_FRAME_POINTER_REGNUM:
12221 /* The hard frame pointer points to the top entry in the
12222 stack frame. The soft frame pointer to the bottom entry
12223 in the stack frame. If there is no stack frame at all,
12224 then they are identical. */
12226 return offsets->frame - offsets->soft_frame;
12228 case STACK_POINTER_REGNUM:
12229 return offsets->outgoing_args - offsets->soft_frame;
12231 default:
12232 gcc_unreachable ();
12234 gcc_unreachable ();
12236 default:
12237 /* You cannot eliminate from the stack pointer.
12238 In theory you could eliminate from the hard frame
12239 pointer to the stack pointer, but this will never
12240 happen, since if a stack frame is not needed the
12241 hard frame pointer will never be used. */
12242 gcc_unreachable ();
12247 /* Emit RTL to save coprocessor registers on function entry. Returns the
12248 number of bytes pushed. */
12250 static int
12251 arm_save_coproc_regs(void)
12253 int saved_size = 0;
12254 unsigned reg;
12255 unsigned start_reg;
12256 rtx insn;
12258 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12259 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12261 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12262 insn = gen_rtx_MEM (V2SImode, insn);
12263 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12264 RTX_FRAME_RELATED_P (insn) = 1;
12265 saved_size += 8;
12268 /* Save any floating point call-saved registers used by this
12269 function. */
12270 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12272 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12273 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12275 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12276 insn = gen_rtx_MEM (XFmode, insn);
12277 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12278 RTX_FRAME_RELATED_P (insn) = 1;
12279 saved_size += 12;
12282 else
12284 start_reg = LAST_FPA_REGNUM;
12286 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12288 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12290 if (start_reg - reg == 3)
12292 insn = emit_sfm (reg, 4);
12293 RTX_FRAME_RELATED_P (insn) = 1;
12294 saved_size += 48;
12295 start_reg = reg - 1;
12298 else
12300 if (start_reg != reg)
12302 insn = emit_sfm (reg + 1, start_reg - reg);
12303 RTX_FRAME_RELATED_P (insn) = 1;
12304 saved_size += (start_reg - reg) * 12;
12306 start_reg = reg - 1;
12310 if (start_reg != reg)
12312 insn = emit_sfm (reg + 1, start_reg - reg);
12313 saved_size += (start_reg - reg) * 12;
12314 RTX_FRAME_RELATED_P (insn) = 1;
12317 if (TARGET_HARD_FLOAT && TARGET_VFP)
12319 start_reg = FIRST_VFP_REGNUM;
12321 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12323 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12324 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12326 if (start_reg != reg)
12327 saved_size += vfp_emit_fstmd (start_reg,
12328 (reg - start_reg) / 2);
12329 start_reg = reg + 2;
12332 if (start_reg != reg)
12333 saved_size += vfp_emit_fstmd (start_reg,
12334 (reg - start_reg) / 2);
12336 return saved_size;
12340 /* Set the Thumb frame pointer from the stack pointer. */
12342 static void
12343 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12345 HOST_WIDE_INT amount;
12346 rtx insn, dwarf;
12348 amount = offsets->outgoing_args - offsets->locals_base;
12349 if (amount < 1024)
12350 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12351 stack_pointer_rtx, GEN_INT (amount)));
12352 else
12354 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12355 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
12356 expects the first two operands to be the same. */
12357 if (TARGET_THUMB2)
12359 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12360 stack_pointer_rtx,
12361 hard_frame_pointer_rtx));
12363 else
12365 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12366 hard_frame_pointer_rtx,
12367 stack_pointer_rtx));
12369 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12370 plus_constant (stack_pointer_rtx, amount));
12371 RTX_FRAME_RELATED_P (dwarf) = 1;
12372 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12373 REG_NOTES (insn));
12376 RTX_FRAME_RELATED_P (insn) = 1;
12379 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12380 function. */
12381 void
12382 arm_expand_prologue (void)
12384 rtx amount;
12385 rtx insn;
12386 rtx ip_rtx;
12387 unsigned long live_regs_mask;
12388 unsigned long func_type;
12389 int fp_offset = 0;
12390 int saved_pretend_args = 0;
12391 int saved_regs = 0;
12392 unsigned HOST_WIDE_INT args_to_push;
12393 arm_stack_offsets *offsets;
12395 func_type = arm_current_func_type ();
12397 /* Naked functions don't have prologues. */
12398 if (IS_NAKED (func_type))
12399 return;
12401 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12402 args_to_push = crtl->args.pretend_args_size;
12404 /* Compute which register we will have to save onto the stack. */
12405 offsets = arm_get_frame_offsets ();
12406 live_regs_mask = offsets->saved_regs_mask;
12408 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
12410 if (IS_STACKALIGN (func_type))
12412 rtx dwarf;
12413 rtx r0;
12414 rtx r1;
12415 /* Handle a word-aligned stack pointer. We generate the following:
12417 mov r0, sp
12418 bic r1, r0, #7
12419 mov sp, r1
12420 <save and restore r0 in normal prologue/epilogue>
12421 mov sp, r0
12422 bx lr
12424 The unwinder doesn't need to know about the stack realignment.
12425 Just tell it we saved SP in r0. */
12426 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
12428 r0 = gen_rtx_REG (SImode, 0);
12429 r1 = gen_rtx_REG (SImode, 1);
12430 dwarf = gen_rtx_UNSPEC (SImode, NULL_RTVEC, UNSPEC_STACK_ALIGN);
12431 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
12432 insn = gen_movsi (r0, stack_pointer_rtx);
12433 RTX_FRAME_RELATED_P (insn) = 1;
12434 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12435 dwarf, REG_NOTES (insn));
12436 emit_insn (insn);
12437 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
12438 emit_insn (gen_movsi (stack_pointer_rtx, r1));
12441 /* For APCS frames, if IP register is clobbered
12442 when creating frame, save that register in a special
12443 way. */
12444 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
12446 if (IS_INTERRUPT (func_type))
12448 /* Interrupt functions must not corrupt any registers.
12449 Creating a frame pointer however, corrupts the IP
12450 register, so we must push it first. */
12451 insn = emit_multi_reg_push (1 << IP_REGNUM);
12453 /* Do not set RTX_FRAME_RELATED_P on this insn.
12454 The dwarf stack unwinding code only wants to see one
12455 stack decrement per function, and this is not it. If
12456 this instruction is labeled as being part of the frame
12457 creation sequence then dwarf2out_frame_debug_expr will
12458 die when it encounters the assignment of IP to FP
12459 later on, since the use of SP here establishes SP as
12460 the CFA register and not IP.
12462 Anyway this instruction is not really part of the stack
12463 frame creation although it is part of the prologue. */
12465 else if (IS_NESTED (func_type))
12467 /* The Static chain register is the same as the IP register
12468 used as a scratch register during stack frame creation.
12469 To get around this need to find somewhere to store IP
12470 whilst the frame is being created. We try the following
12471 places in order:
12473 1. The last argument register.
12474 2. A slot on the stack above the frame. (This only
12475 works if the function is not a varargs function).
12476 3. Register r3, after pushing the argument registers
12477 onto the stack.
12479 Note - we only need to tell the dwarf2 backend about the SP
12480 adjustment in the second variant; the static chain register
12481 doesn't need to be unwound, as it doesn't contain a value
12482 inherited from the caller. */
12484 if (df_regs_ever_live_p (3) == false)
12485 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12486 else if (args_to_push == 0)
12488 rtx dwarf;
12490 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
12491 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
12492 fp_offset = 4;
12494 /* Just tell the dwarf backend that we adjusted SP. */
12495 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12496 plus_constant (stack_pointer_rtx,
12497 -fp_offset));
12498 RTX_FRAME_RELATED_P (insn) = 1;
12499 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12500 dwarf, REG_NOTES (insn));
12502 else
12504 /* Store the args on the stack. */
12505 if (cfun->machine->uses_anonymous_args)
12506 insn = emit_multi_reg_push
12507 ((0xf0 >> (args_to_push / 4)) & 0xf);
12508 else
12509 insn = emit_insn
12510 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12511 GEN_INT (- args_to_push)));
12513 RTX_FRAME_RELATED_P (insn) = 1;
12515 saved_pretend_args = 1;
12516 fp_offset = args_to_push;
12517 args_to_push = 0;
12519 /* Now reuse r3 to preserve IP. */
12520 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12524 insn = emit_set_insn (ip_rtx,
12525 plus_constant (stack_pointer_rtx, fp_offset));
12526 RTX_FRAME_RELATED_P (insn) = 1;
12529 if (args_to_push)
12531 /* Push the argument registers, or reserve space for them. */
12532 if (cfun->machine->uses_anonymous_args)
12533 insn = emit_multi_reg_push
12534 ((0xf0 >> (args_to_push / 4)) & 0xf);
12535 else
12536 insn = emit_insn
12537 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12538 GEN_INT (- args_to_push)));
12539 RTX_FRAME_RELATED_P (insn) = 1;
12542 /* If this is an interrupt service routine, and the link register
12543 is going to be pushed, and we're not generating extra
12544 push of IP (needed when frame is needed and frame layout if apcs),
12545 subtracting four from LR now will mean that the function return
12546 can be done with a single instruction. */
12547 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
12548 && (live_regs_mask & (1 << LR_REGNUM)) != 0
12549 && !(frame_pointer_needed && TARGET_APCS_FRAME)
12550 && TARGET_ARM)
12552 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
12554 emit_set_insn (lr, plus_constant (lr, -4));
12557 if (live_regs_mask)
12559 saved_regs += bit_count (live_regs_mask) * 4;
12560 if (optimize_size && !frame_pointer_needed
12561 && saved_regs == offsets->saved_regs - offsets->saved_args)
12563 /* If no coprocessor registers are being pushed and we don't have
12564 to worry about a frame pointer then push extra registers to
12565 create the stack frame. This is done is a way that does not
12566 alter the frame layout, so is independent of the epilogue. */
12567 int n;
12568 int frame;
12569 n = 0;
12570 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
12571 n++;
12572 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
12573 if (frame && n * 4 >= frame)
12575 n = frame / 4;
12576 live_regs_mask |= (1 << n) - 1;
12577 saved_regs += frame;
12580 insn = emit_multi_reg_push (live_regs_mask);
12581 RTX_FRAME_RELATED_P (insn) = 1;
12584 if (! IS_VOLATILE (func_type))
12585 saved_regs += arm_save_coproc_regs ();
12587 if (frame_pointer_needed && TARGET_ARM)
12589 /* Create the new frame pointer. */
12590 if (TARGET_APCS_FRAME)
12592 insn = GEN_INT (-(4 + args_to_push + fp_offset));
12593 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
12594 RTX_FRAME_RELATED_P (insn) = 1;
12596 if (IS_NESTED (func_type))
12598 /* Recover the static chain register. */
12599 if (!df_regs_ever_live_p (3)
12600 || saved_pretend_args)
12601 insn = gen_rtx_REG (SImode, 3);
12602 else /* if (crtl->args.pretend_args_size == 0) */
12604 insn = plus_constant (hard_frame_pointer_rtx, 4);
12605 insn = gen_frame_mem (SImode, insn);
12607 emit_set_insn (ip_rtx, insn);
12608 /* Add a USE to stop propagate_one_insn() from barfing. */
12609 emit_insn (gen_prologue_use (ip_rtx));
12612 else
12614 insn = GEN_INT (saved_regs - 4);
12615 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12616 stack_pointer_rtx, insn));
12617 RTX_FRAME_RELATED_P (insn) = 1;
12621 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
12623 /* This add can produce multiple insns for a large constant, so we
12624 need to get tricky. */
12625 rtx last = get_last_insn ();
12627 amount = GEN_INT (offsets->saved_args + saved_regs
12628 - offsets->outgoing_args);
12630 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12631 amount));
12634 last = last ? NEXT_INSN (last) : get_insns ();
12635 RTX_FRAME_RELATED_P (last) = 1;
12637 while (last != insn);
12639 /* If the frame pointer is needed, emit a special barrier that
12640 will prevent the scheduler from moving stores to the frame
12641 before the stack adjustment. */
12642 if (frame_pointer_needed)
12643 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
12644 hard_frame_pointer_rtx));
12648 if (frame_pointer_needed && TARGET_THUMB2)
12649 thumb_set_frame_pointer (offsets);
12651 if (flag_pic && arm_pic_register != INVALID_REGNUM)
12653 unsigned long mask;
12655 mask = live_regs_mask;
12656 mask &= THUMB2_WORK_REGS;
12657 if (!IS_NESTED (func_type))
12658 mask |= (1 << IP_REGNUM);
12659 arm_load_pic_register (mask);
12662 /* If we are profiling, make sure no instructions are scheduled before
12663 the call to mcount. Similarly if the user has requested no
12664 scheduling in the prolog. Similarly if we want non-call exceptions
12665 using the EABI unwinder, to prevent faulting instructions from being
12666 swapped with a stack adjustment. */
12667 if (crtl->profile || !TARGET_SCHED_PROLOG
12668 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
12669 emit_insn (gen_blockage ());
12671 /* If the link register is being kept alive, with the return address in it,
12672 then make sure that it does not get reused by the ce2 pass. */
12673 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
12674 cfun->machine->lr_save_eliminated = 1;
12677 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12678 static void
12679 arm_print_condition (FILE *stream)
12681 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
12683 /* Branch conversion is not implemented for Thumb-2. */
12684 if (TARGET_THUMB)
12686 output_operand_lossage ("predicated Thumb instruction");
12687 return;
12689 if (current_insn_predicate != NULL)
12691 output_operand_lossage
12692 ("predicated instruction in conditional sequence");
12693 return;
12696 fputs (arm_condition_codes[arm_current_cc], stream);
12698 else if (current_insn_predicate)
12700 enum arm_cond_code code;
12702 if (TARGET_THUMB1)
12704 output_operand_lossage ("predicated Thumb instruction");
12705 return;
12708 code = get_arm_condition_code (current_insn_predicate);
12709 fputs (arm_condition_codes[code], stream);
12714 /* If CODE is 'd', then the X is a condition operand and the instruction
12715 should only be executed if the condition is true.
12716 if CODE is 'D', then the X is a condition operand and the instruction
12717 should only be executed if the condition is false: however, if the mode
12718 of the comparison is CCFPEmode, then always execute the instruction -- we
12719 do this because in these circumstances !GE does not necessarily imply LT;
12720 in these cases the instruction pattern will take care to make sure that
12721 an instruction containing %d will follow, thereby undoing the effects of
12722 doing this instruction unconditionally.
12723 If CODE is 'N' then X is a floating point operand that must be negated
12724 before output.
12725 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12726 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12727 void
12728 arm_print_operand (FILE *stream, rtx x, int code)
12730 switch (code)
12732 case '@':
12733 fputs (ASM_COMMENT_START, stream);
12734 return;
12736 case '_':
12737 fputs (user_label_prefix, stream);
12738 return;
12740 case '|':
12741 fputs (REGISTER_PREFIX, stream);
12742 return;
12744 case '?':
12745 arm_print_condition (stream);
12746 return;
12748 case '(':
12749 /* Nothing in unified syntax, otherwise the current condition code. */
12750 if (!TARGET_UNIFIED_ASM)
12751 arm_print_condition (stream);
12752 break;
12754 case ')':
12755 /* The current condition code in unified syntax, otherwise nothing. */
12756 if (TARGET_UNIFIED_ASM)
12757 arm_print_condition (stream);
12758 break;
12760 case '.':
12761 /* The current condition code for a condition code setting instruction.
12762 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12763 if (TARGET_UNIFIED_ASM)
12765 fputc('s', stream);
12766 arm_print_condition (stream);
12768 else
12770 arm_print_condition (stream);
12771 fputc('s', stream);
12773 return;
12775 case '!':
12776 /* If the instruction is conditionally executed then print
12777 the current condition code, otherwise print 's'. */
12778 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
12779 if (current_insn_predicate)
12780 arm_print_condition (stream);
12781 else
12782 fputc('s', stream);
12783 break;
12785 /* %# is a "break" sequence. It doesn't output anything, but is used to
12786 separate e.g. operand numbers from following text, if that text consists
12787 of further digits which we don't want to be part of the operand
12788 number. */
12789 case '#':
12790 return;
12792 case 'N':
12794 REAL_VALUE_TYPE r;
12795 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12796 r = REAL_VALUE_NEGATE (r);
12797 fprintf (stream, "%s", fp_const_from_val (&r));
12799 return;
12801 /* An integer without a preceding # sign. */
12802 case 'c':
12803 gcc_assert (GET_CODE (x) == CONST_INT);
12804 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12805 return;
12807 case 'B':
12808 if (GET_CODE (x) == CONST_INT)
12810 HOST_WIDE_INT val;
12811 val = ARM_SIGN_EXTEND (~INTVAL (x));
12812 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
12814 else
12816 putc ('~', stream);
12817 output_addr_const (stream, x);
12819 return;
12821 case 'L':
12822 /* The low 16 bits of an immediate constant. */
12823 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
12824 return;
12826 case 'i':
12827 fprintf (stream, "%s", arithmetic_instr (x, 1));
12828 return;
12830 /* Truncate Cirrus shift counts. */
12831 case 's':
12832 if (GET_CODE (x) == CONST_INT)
12834 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
12835 return;
12837 arm_print_operand (stream, x, 0);
12838 return;
12840 case 'I':
12841 fprintf (stream, "%s", arithmetic_instr (x, 0));
12842 return;
12844 case 'S':
12846 HOST_WIDE_INT val;
12847 const char *shift;
12849 if (!shift_operator (x, SImode))
12851 output_operand_lossage ("invalid shift operand");
12852 break;
12855 shift = shift_op (x, &val);
12857 if (shift)
12859 fprintf (stream, ", %s ", shift);
12860 if (val == -1)
12861 arm_print_operand (stream, XEXP (x, 1), 0);
12862 else
12863 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
12866 return;
12868 /* An explanation of the 'Q', 'R' and 'H' register operands:
12870 In a pair of registers containing a DI or DF value the 'Q'
12871 operand returns the register number of the register containing
12872 the least significant part of the value. The 'R' operand returns
12873 the register number of the register containing the most
12874 significant part of the value.
12876 The 'H' operand returns the higher of the two register numbers.
12877 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
12878 same as the 'Q' operand, since the most significant part of the
12879 value is held in the lower number register. The reverse is true
12880 on systems where WORDS_BIG_ENDIAN is false.
12882 The purpose of these operands is to distinguish between cases
12883 where the endian-ness of the values is important (for example
12884 when they are added together), and cases where the endian-ness
12885 is irrelevant, but the order of register operations is important.
12886 For example when loading a value from memory into a register
12887 pair, the endian-ness does not matter. Provided that the value
12888 from the lower memory address is put into the lower numbered
12889 register, and the value from the higher address is put into the
12890 higher numbered register, the load will work regardless of whether
12891 the value being loaded is big-wordian or little-wordian. The
12892 order of the two register loads can matter however, if the address
12893 of the memory location is actually held in one of the registers
12894 being overwritten by the load. */
12895 case 'Q':
12896 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12898 output_operand_lossage ("invalid operand for code '%c'", code);
12899 return;
12902 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
12903 return;
12905 case 'R':
12906 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12908 output_operand_lossage ("invalid operand for code '%c'", code);
12909 return;
12912 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
12913 return;
12915 case 'H':
12916 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12918 output_operand_lossage ("invalid operand for code '%c'", code);
12919 return;
12922 asm_fprintf (stream, "%r", REGNO (x) + 1);
12923 return;
12925 case 'J':
12926 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12928 output_operand_lossage ("invalid operand for code '%c'", code);
12929 return;
12932 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
12933 return;
12935 case 'K':
12936 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12938 output_operand_lossage ("invalid operand for code '%c'", code);
12939 return;
12942 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
12943 return;
12945 case 'm':
12946 asm_fprintf (stream, "%r",
12947 GET_CODE (XEXP (x, 0)) == REG
12948 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
12949 return;
12951 case 'M':
12952 asm_fprintf (stream, "{%r-%r}",
12953 REGNO (x),
12954 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
12955 return;
12957 /* Like 'M', but writing doubleword vector registers, for use by Neon
12958 insns. */
12959 case 'h':
12961 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
12962 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
12963 if (numregs == 1)
12964 asm_fprintf (stream, "{d%d}", regno);
12965 else
12966 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
12968 return;
12970 case 'd':
12971 /* CONST_TRUE_RTX means always -- that's the default. */
12972 if (x == const_true_rtx)
12973 return;
12975 if (!COMPARISON_P (x))
12977 output_operand_lossage ("invalid operand for code '%c'", code);
12978 return;
12981 fputs (arm_condition_codes[get_arm_condition_code (x)],
12982 stream);
12983 return;
12985 case 'D':
12986 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
12987 want to do that. */
12988 if (x == const_true_rtx)
12990 output_operand_lossage ("instruction never executed");
12991 return;
12993 if (!COMPARISON_P (x))
12995 output_operand_lossage ("invalid operand for code '%c'", code);
12996 return;
12999 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
13000 (get_arm_condition_code (x))],
13001 stream);
13002 return;
13004 /* Cirrus registers can be accessed in a variety of ways:
13005 single floating point (f)
13006 double floating point (d)
13007 32bit integer (fx)
13008 64bit integer (dx). */
13009 case 'W': /* Cirrus register in F mode. */
13010 case 'X': /* Cirrus register in D mode. */
13011 case 'Y': /* Cirrus register in FX mode. */
13012 case 'Z': /* Cirrus register in DX mode. */
13013 gcc_assert (GET_CODE (x) == REG
13014 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
13016 fprintf (stream, "mv%s%s",
13017 code == 'W' ? "f"
13018 : code == 'X' ? "d"
13019 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
13021 return;
13023 /* Print cirrus register in the mode specified by the register's mode. */
13024 case 'V':
13026 int mode = GET_MODE (x);
13028 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
13030 output_operand_lossage ("invalid operand for code '%c'", code);
13031 return;
13034 fprintf (stream, "mv%s%s",
13035 mode == DFmode ? "d"
13036 : mode == SImode ? "fx"
13037 : mode == DImode ? "dx"
13038 : "f", reg_names[REGNO (x)] + 2);
13040 return;
13043 case 'U':
13044 if (GET_CODE (x) != REG
13045 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
13046 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
13047 /* Bad value for wCG register number. */
13049 output_operand_lossage ("invalid operand for code '%c'", code);
13050 return;
13053 else
13054 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
13055 return;
13057 /* Print an iWMMXt control register name. */
13058 case 'w':
13059 if (GET_CODE (x) != CONST_INT
13060 || INTVAL (x) < 0
13061 || INTVAL (x) >= 16)
13062 /* Bad value for wC register number. */
13064 output_operand_lossage ("invalid operand for code '%c'", code);
13065 return;
13068 else
13070 static const char * wc_reg_names [16] =
13072 "wCID", "wCon", "wCSSF", "wCASF",
13073 "wC4", "wC5", "wC6", "wC7",
13074 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
13075 "wC12", "wC13", "wC14", "wC15"
13078 fprintf (stream, wc_reg_names [INTVAL (x)]);
13080 return;
13082 /* Print a VFP/Neon double precision or quad precision register name. */
13083 case 'P':
13084 case 'q':
13086 int mode = GET_MODE (x);
13087 int is_quad = (code == 'q');
13088 int regno;
13090 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
13092 output_operand_lossage ("invalid operand for code '%c'", code);
13093 return;
13096 if (GET_CODE (x) != REG
13097 || !IS_VFP_REGNUM (REGNO (x)))
13099 output_operand_lossage ("invalid operand for code '%c'", code);
13100 return;
13103 regno = REGNO (x);
13104 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
13105 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
13107 output_operand_lossage ("invalid operand for code '%c'", code);
13108 return;
13111 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
13112 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
13114 return;
13116 /* These two codes print the low/high doubleword register of a Neon quad
13117 register, respectively. For pair-structure types, can also print
13118 low/high quadword registers. */
13119 case 'e':
13120 case 'f':
13122 int mode = GET_MODE (x);
13123 int regno;
13125 if ((GET_MODE_SIZE (mode) != 16
13126 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
13128 output_operand_lossage ("invalid operand for code '%c'", code);
13129 return;
13132 regno = REGNO (x);
13133 if (!NEON_REGNO_OK_FOR_QUAD (regno))
13135 output_operand_lossage ("invalid operand for code '%c'", code);
13136 return;
13139 if (GET_MODE_SIZE (mode) == 16)
13140 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
13141 + (code == 'f' ? 1 : 0));
13142 else
13143 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
13144 + (code == 'f' ? 1 : 0));
13146 return;
13148 /* Print a VFPv3 floating-point constant, represented as an integer
13149 index. */
13150 case 'G':
13152 int index = vfp3_const_double_index (x);
13153 gcc_assert (index != -1);
13154 fprintf (stream, "%d", index);
13156 return;
13158 /* Print bits representing opcode features for Neon.
13160 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
13161 and polynomials as unsigned.
13163 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13165 Bit 2 is 1 for rounding functions, 0 otherwise. */
13167 /* Identify the type as 's', 'u', 'p' or 'f'. */
13168 case 'T':
13170 HOST_WIDE_INT bits = INTVAL (x);
13171 fputc ("uspf"[bits & 3], stream);
13173 return;
13175 /* Likewise, but signed and unsigned integers are both 'i'. */
13176 case 'F':
13178 HOST_WIDE_INT bits = INTVAL (x);
13179 fputc ("iipf"[bits & 3], stream);
13181 return;
13183 /* As for 'T', but emit 'u' instead of 'p'. */
13184 case 't':
13186 HOST_WIDE_INT bits = INTVAL (x);
13187 fputc ("usuf"[bits & 3], stream);
13189 return;
13191 /* Bit 2: rounding (vs none). */
13192 case 'O':
13194 HOST_WIDE_INT bits = INTVAL (x);
13195 fputs ((bits & 4) != 0 ? "r" : "", stream);
13197 return;
13199 default:
13200 if (x == 0)
13202 output_operand_lossage ("missing operand");
13203 return;
13206 switch (GET_CODE (x))
13208 case REG:
13209 asm_fprintf (stream, "%r", REGNO (x));
13210 break;
13212 case MEM:
13213 output_memory_reference_mode = GET_MODE (x);
13214 output_address (XEXP (x, 0));
13215 break;
13217 case CONST_DOUBLE:
13218 if (TARGET_NEON)
13220 char fpstr[20];
13221 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13222 sizeof (fpstr), 0, 1);
13223 fprintf (stream, "#%s", fpstr);
13225 else
13226 fprintf (stream, "#%s", fp_immediate_constant (x));
13227 break;
13229 default:
13230 gcc_assert (GET_CODE (x) != NEG);
13231 fputc ('#', stream);
13232 output_addr_const (stream, x);
13233 break;
13238 /* Target hook for assembling integer objects. The ARM version needs to
13239 handle word-sized values specially. */
13240 static bool
13241 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13243 enum machine_mode mode;
13245 if (size == UNITS_PER_WORD && aligned_p)
13247 fputs ("\t.word\t", asm_out_file);
13248 output_addr_const (asm_out_file, x);
13250 /* Mark symbols as position independent. We only do this in the
13251 .text segment, not in the .data segment. */
13252 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13253 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13255 /* See legitimize_pic_address for an explanation of the
13256 TARGET_VXWORKS_RTP check. */
13257 if (TARGET_VXWORKS_RTP
13258 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13259 fputs ("(GOT)", asm_out_file);
13260 else
13261 fputs ("(GOTOFF)", asm_out_file);
13263 fputc ('\n', asm_out_file);
13264 return true;
13267 mode = GET_MODE (x);
13269 if (arm_vector_mode_supported_p (mode))
13271 int i, units;
13272 unsigned int invmask = 0, parts_per_word;
13274 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13276 units = CONST_VECTOR_NUNITS (x);
13277 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13279 /* For big-endian Neon vectors, we must permute the vector to the form
13280 which, when loaded by a VLDR or VLDM instruction, will give a vector
13281 with the elements in the right order. */
13282 if (TARGET_NEON && WORDS_BIG_ENDIAN)
13284 parts_per_word = UNITS_PER_WORD / size;
13285 /* FIXME: This might be wrong for 64-bit vector elements, but we don't
13286 support those anywhere yet. */
13287 invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
13290 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13291 for (i = 0; i < units; i++)
13293 rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
13294 assemble_integer
13295 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13297 else
13298 for (i = 0; i < units; i++)
13300 rtx elt = CONST_VECTOR_ELT (x, i);
13301 REAL_VALUE_TYPE rval;
13303 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13305 assemble_real
13306 (rval, GET_MODE_INNER (mode),
13307 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13310 return true;
13313 return default_assemble_integer (x, size, aligned_p);
13316 static void
13317 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13319 section *s;
13321 if (!TARGET_AAPCS_BASED)
13323 (is_ctor ?
13324 default_named_section_asm_out_constructor
13325 : default_named_section_asm_out_destructor) (symbol, priority);
13326 return;
13329 /* Put these in the .init_array section, using a special relocation. */
13330 if (priority != DEFAULT_INIT_PRIORITY)
13332 char buf[18];
13333 sprintf (buf, "%s.%.5u",
13334 is_ctor ? ".init_array" : ".fini_array",
13335 priority);
13336 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13338 else if (is_ctor)
13339 s = ctors_section;
13340 else
13341 s = dtors_section;
13343 switch_to_section (s);
13344 assemble_align (POINTER_SIZE);
13345 fputs ("\t.word\t", asm_out_file);
13346 output_addr_const (asm_out_file, symbol);
13347 fputs ("(target1)\n", asm_out_file);
13350 /* Add a function to the list of static constructors. */
13352 static void
13353 arm_elf_asm_constructor (rtx symbol, int priority)
13355 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13358 /* Add a function to the list of static destructors. */
13360 static void
13361 arm_elf_asm_destructor (rtx symbol, int priority)
13363 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13366 /* A finite state machine takes care of noticing whether or not instructions
13367 can be conditionally executed, and thus decrease execution time and code
13368 size by deleting branch instructions. The fsm is controlled by
13369 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13371 /* The state of the fsm controlling condition codes are:
13372 0: normal, do nothing special
13373 1: make ASM_OUTPUT_OPCODE not output this instruction
13374 2: make ASM_OUTPUT_OPCODE not output this instruction
13375 3: make instructions conditional
13376 4: make instructions conditional
13378 State transitions (state->state by whom under condition):
13379 0 -> 1 final_prescan_insn if the `target' is a label
13380 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13381 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13382 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13383 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13384 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13385 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13386 (the target insn is arm_target_insn).
13388 If the jump clobbers the conditions then we use states 2 and 4.
13390 A similar thing can be done with conditional return insns.
13392 XXX In case the `target' is an unconditional branch, this conditionalising
13393 of the instructions always reduces code size, but not always execution
13394 time. But then, I want to reduce the code size to somewhere near what
13395 /bin/cc produces. */
13397 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13398 instructions. When a COND_EXEC instruction is seen the subsequent
13399 instructions are scanned so that multiple conditional instructions can be
13400 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13401 specify the length and true/false mask for the IT block. These will be
13402 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13404 /* Returns the index of the ARM condition code string in
13405 `arm_condition_codes'. COMPARISON should be an rtx like
13406 `(eq (...) (...))'. */
13407 static enum arm_cond_code
13408 get_arm_condition_code (rtx comparison)
13410 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
13411 int code;
13412 enum rtx_code comp_code = GET_CODE (comparison);
13414 if (GET_MODE_CLASS (mode) != MODE_CC)
13415 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
13416 XEXP (comparison, 1));
13418 switch (mode)
13420 case CC_DNEmode: code = ARM_NE; goto dominance;
13421 case CC_DEQmode: code = ARM_EQ; goto dominance;
13422 case CC_DGEmode: code = ARM_GE; goto dominance;
13423 case CC_DGTmode: code = ARM_GT; goto dominance;
13424 case CC_DLEmode: code = ARM_LE; goto dominance;
13425 case CC_DLTmode: code = ARM_LT; goto dominance;
13426 case CC_DGEUmode: code = ARM_CS; goto dominance;
13427 case CC_DGTUmode: code = ARM_HI; goto dominance;
13428 case CC_DLEUmode: code = ARM_LS; goto dominance;
13429 case CC_DLTUmode: code = ARM_CC;
13431 dominance:
13432 gcc_assert (comp_code == EQ || comp_code == NE);
13434 if (comp_code == EQ)
13435 return ARM_INVERSE_CONDITION_CODE (code);
13436 return code;
13438 case CC_NOOVmode:
13439 switch (comp_code)
13441 case NE: return ARM_NE;
13442 case EQ: return ARM_EQ;
13443 case GE: return ARM_PL;
13444 case LT: return ARM_MI;
13445 default: gcc_unreachable ();
13448 case CC_Zmode:
13449 switch (comp_code)
13451 case NE: return ARM_NE;
13452 case EQ: return ARM_EQ;
13453 default: gcc_unreachable ();
13456 case CC_Nmode:
13457 switch (comp_code)
13459 case NE: return ARM_MI;
13460 case EQ: return ARM_PL;
13461 default: gcc_unreachable ();
13464 case CCFPEmode:
13465 case CCFPmode:
13466 /* These encodings assume that AC=1 in the FPA system control
13467 byte. This allows us to handle all cases except UNEQ and
13468 LTGT. */
13469 switch (comp_code)
13471 case GE: return ARM_GE;
13472 case GT: return ARM_GT;
13473 case LE: return ARM_LS;
13474 case LT: return ARM_MI;
13475 case NE: return ARM_NE;
13476 case EQ: return ARM_EQ;
13477 case ORDERED: return ARM_VC;
13478 case UNORDERED: return ARM_VS;
13479 case UNLT: return ARM_LT;
13480 case UNLE: return ARM_LE;
13481 case UNGT: return ARM_HI;
13482 case UNGE: return ARM_PL;
13483 /* UNEQ and LTGT do not have a representation. */
13484 case UNEQ: /* Fall through. */
13485 case LTGT: /* Fall through. */
13486 default: gcc_unreachable ();
13489 case CC_SWPmode:
13490 switch (comp_code)
13492 case NE: return ARM_NE;
13493 case EQ: return ARM_EQ;
13494 case GE: return ARM_LE;
13495 case GT: return ARM_LT;
13496 case LE: return ARM_GE;
13497 case LT: return ARM_GT;
13498 case GEU: return ARM_LS;
13499 case GTU: return ARM_CC;
13500 case LEU: return ARM_CS;
13501 case LTU: return ARM_HI;
13502 default: gcc_unreachable ();
13505 case CC_Cmode:
13506 switch (comp_code)
13508 case LTU: return ARM_CS;
13509 case GEU: return ARM_CC;
13510 default: gcc_unreachable ();
13513 case CCmode:
13514 switch (comp_code)
13516 case NE: return ARM_NE;
13517 case EQ: return ARM_EQ;
13518 case GE: return ARM_GE;
13519 case GT: return ARM_GT;
13520 case LE: return ARM_LE;
13521 case LT: return ARM_LT;
13522 case GEU: return ARM_CS;
13523 case GTU: return ARM_HI;
13524 case LEU: return ARM_LS;
13525 case LTU: return ARM_CC;
13526 default: gcc_unreachable ();
13529 default: gcc_unreachable ();
13533 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13534 instructions. */
13535 void
13536 thumb2_final_prescan_insn (rtx insn)
13538 rtx first_insn = insn;
13539 rtx body = PATTERN (insn);
13540 rtx predicate;
13541 enum arm_cond_code code;
13542 int n;
13543 int mask;
13545 /* Remove the previous insn from the count of insns to be output. */
13546 if (arm_condexec_count)
13547 arm_condexec_count--;
13549 /* Nothing to do if we are already inside a conditional block. */
13550 if (arm_condexec_count)
13551 return;
13553 if (GET_CODE (body) != COND_EXEC)
13554 return;
13556 /* Conditional jumps are implemented directly. */
13557 if (GET_CODE (insn) == JUMP_INSN)
13558 return;
13560 predicate = COND_EXEC_TEST (body);
13561 arm_current_cc = get_arm_condition_code (predicate);
13563 n = get_attr_ce_count (insn);
13564 arm_condexec_count = 1;
13565 arm_condexec_mask = (1 << n) - 1;
13566 arm_condexec_masklen = n;
13567 /* See if subsequent instructions can be combined into the same block. */
13568 for (;;)
13570 insn = next_nonnote_insn (insn);
13572 /* Jumping into the middle of an IT block is illegal, so a label or
13573 barrier terminates the block. */
13574 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
13575 break;
13577 body = PATTERN (insn);
13578 /* USE and CLOBBER aren't really insns, so just skip them. */
13579 if (GET_CODE (body) == USE
13580 || GET_CODE (body) == CLOBBER)
13581 continue;
13583 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13584 if (GET_CODE (body) != COND_EXEC)
13585 break;
13586 /* Allow up to 4 conditionally executed instructions in a block. */
13587 n = get_attr_ce_count (insn);
13588 if (arm_condexec_masklen + n > 4)
13589 break;
13591 predicate = COND_EXEC_TEST (body);
13592 code = get_arm_condition_code (predicate);
13593 mask = (1 << n) - 1;
13594 if (arm_current_cc == code)
13595 arm_condexec_mask |= (mask << arm_condexec_masklen);
13596 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
13597 break;
13599 arm_condexec_count++;
13600 arm_condexec_masklen += n;
13602 /* A jump must be the last instruction in a conditional block. */
13603 if (GET_CODE(insn) == JUMP_INSN)
13604 break;
13606 /* Restore recog_data (getting the attributes of other insns can
13607 destroy this array, but final.c assumes that it remains intact
13608 across this call). */
13609 extract_constrain_insn_cached (first_insn);
13612 void
13613 arm_final_prescan_insn (rtx insn)
13615 /* BODY will hold the body of INSN. */
13616 rtx body = PATTERN (insn);
13618 /* This will be 1 if trying to repeat the trick, and things need to be
13619 reversed if it appears to fail. */
13620 int reverse = 0;
13622 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13623 taken are clobbered, even if the rtl suggests otherwise. It also
13624 means that we have to grub around within the jump expression to find
13625 out what the conditions are when the jump isn't taken. */
13626 int jump_clobbers = 0;
13628 /* If we start with a return insn, we only succeed if we find another one. */
13629 int seeking_return = 0;
13631 /* START_INSN will hold the insn from where we start looking. This is the
13632 first insn after the following code_label if REVERSE is true. */
13633 rtx start_insn = insn;
13635 /* If in state 4, check if the target branch is reached, in order to
13636 change back to state 0. */
13637 if (arm_ccfsm_state == 4)
13639 if (insn == arm_target_insn)
13641 arm_target_insn = NULL;
13642 arm_ccfsm_state = 0;
13644 return;
13647 /* If in state 3, it is possible to repeat the trick, if this insn is an
13648 unconditional branch to a label, and immediately following this branch
13649 is the previous target label which is only used once, and the label this
13650 branch jumps to is not too far off. */
13651 if (arm_ccfsm_state == 3)
13653 if (simplejump_p (insn))
13655 start_insn = next_nonnote_insn (start_insn);
13656 if (GET_CODE (start_insn) == BARRIER)
13658 /* XXX Isn't this always a barrier? */
13659 start_insn = next_nonnote_insn (start_insn);
13661 if (GET_CODE (start_insn) == CODE_LABEL
13662 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13663 && LABEL_NUSES (start_insn) == 1)
13664 reverse = TRUE;
13665 else
13666 return;
13668 else if (GET_CODE (body) == RETURN)
13670 start_insn = next_nonnote_insn (start_insn);
13671 if (GET_CODE (start_insn) == BARRIER)
13672 start_insn = next_nonnote_insn (start_insn);
13673 if (GET_CODE (start_insn) == CODE_LABEL
13674 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13675 && LABEL_NUSES (start_insn) == 1)
13677 reverse = TRUE;
13678 seeking_return = 1;
13680 else
13681 return;
13683 else
13684 return;
13687 gcc_assert (!arm_ccfsm_state || reverse);
13688 if (GET_CODE (insn) != JUMP_INSN)
13689 return;
13691 /* This jump might be paralleled with a clobber of the condition codes
13692 the jump should always come first */
13693 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
13694 body = XVECEXP (body, 0, 0);
13696 if (reverse
13697 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
13698 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
13700 int insns_skipped;
13701 int fail = FALSE, succeed = FALSE;
13702 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13703 int then_not_else = TRUE;
13704 rtx this_insn = start_insn, label = 0;
13706 /* If the jump cannot be done with one instruction, we cannot
13707 conditionally execute the instruction in the inverse case. */
13708 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
13710 jump_clobbers = 1;
13711 return;
13714 /* Register the insn jumped to. */
13715 if (reverse)
13717 if (!seeking_return)
13718 label = XEXP (SET_SRC (body), 0);
13720 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
13721 label = XEXP (XEXP (SET_SRC (body), 1), 0);
13722 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
13724 label = XEXP (XEXP (SET_SRC (body), 2), 0);
13725 then_not_else = FALSE;
13727 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
13728 seeking_return = 1;
13729 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
13731 seeking_return = 1;
13732 then_not_else = FALSE;
13734 else
13735 gcc_unreachable ();
13737 /* See how many insns this branch skips, and what kind of insns. If all
13738 insns are okay, and the label or unconditional branch to the same
13739 label is not too far away, succeed. */
13740 for (insns_skipped = 0;
13741 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
13743 rtx scanbody;
13745 this_insn = next_nonnote_insn (this_insn);
13746 if (!this_insn)
13747 break;
13749 switch (GET_CODE (this_insn))
13751 case CODE_LABEL:
13752 /* Succeed if it is the target label, otherwise fail since
13753 control falls in from somewhere else. */
13754 if (this_insn == label)
13756 if (jump_clobbers)
13758 arm_ccfsm_state = 2;
13759 this_insn = next_nonnote_insn (this_insn);
13761 else
13762 arm_ccfsm_state = 1;
13763 succeed = TRUE;
13765 else
13766 fail = TRUE;
13767 break;
13769 case BARRIER:
13770 /* Succeed if the following insn is the target label.
13771 Otherwise fail.
13772 If return insns are used then the last insn in a function
13773 will be a barrier. */
13774 this_insn = next_nonnote_insn (this_insn);
13775 if (this_insn && this_insn == label)
13777 if (jump_clobbers)
13779 arm_ccfsm_state = 2;
13780 this_insn = next_nonnote_insn (this_insn);
13782 else
13783 arm_ccfsm_state = 1;
13784 succeed = TRUE;
13786 else
13787 fail = TRUE;
13788 break;
13790 case CALL_INSN:
13791 /* The AAPCS says that conditional calls should not be
13792 used since they make interworking inefficient (the
13793 linker can't transform BL<cond> into BLX). That's
13794 only a problem if the machine has BLX. */
13795 if (arm_arch5)
13797 fail = TRUE;
13798 break;
13801 /* Succeed if the following insn is the target label, or
13802 if the following two insns are a barrier and the
13803 target label. */
13804 this_insn = next_nonnote_insn (this_insn);
13805 if (this_insn && GET_CODE (this_insn) == BARRIER)
13806 this_insn = next_nonnote_insn (this_insn);
13808 if (this_insn && this_insn == label
13809 && insns_skipped < max_insns_skipped)
13811 if (jump_clobbers)
13813 arm_ccfsm_state = 2;
13814 this_insn = next_nonnote_insn (this_insn);
13816 else
13817 arm_ccfsm_state = 1;
13818 succeed = TRUE;
13820 else
13821 fail = TRUE;
13822 break;
13824 case JUMP_INSN:
13825 /* If this is an unconditional branch to the same label, succeed.
13826 If it is to another label, do nothing. If it is conditional,
13827 fail. */
13828 /* XXX Probably, the tests for SET and the PC are
13829 unnecessary. */
13831 scanbody = PATTERN (this_insn);
13832 if (GET_CODE (scanbody) == SET
13833 && GET_CODE (SET_DEST (scanbody)) == PC)
13835 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
13836 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
13838 arm_ccfsm_state = 2;
13839 succeed = TRUE;
13841 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
13842 fail = TRUE;
13844 /* Fail if a conditional return is undesirable (e.g. on a
13845 StrongARM), but still allow this if optimizing for size. */
13846 else if (GET_CODE (scanbody) == RETURN
13847 && !use_return_insn (TRUE, NULL)
13848 && !optimize_size)
13849 fail = TRUE;
13850 else if (GET_CODE (scanbody) == RETURN
13851 && seeking_return)
13853 arm_ccfsm_state = 2;
13854 succeed = TRUE;
13856 else if (GET_CODE (scanbody) == PARALLEL)
13858 switch (get_attr_conds (this_insn))
13860 case CONDS_NOCOND:
13861 break;
13862 default:
13863 fail = TRUE;
13864 break;
13867 else
13868 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
13870 break;
13872 case INSN:
13873 /* Instructions using or affecting the condition codes make it
13874 fail. */
13875 scanbody = PATTERN (this_insn);
13876 if (!(GET_CODE (scanbody) == SET
13877 || GET_CODE (scanbody) == PARALLEL)
13878 || get_attr_conds (this_insn) != CONDS_NOCOND)
13879 fail = TRUE;
13881 /* A conditional cirrus instruction must be followed by
13882 a non Cirrus instruction. However, since we
13883 conditionalize instructions in this function and by
13884 the time we get here we can't add instructions
13885 (nops), because shorten_branches() has already been
13886 called, we will disable conditionalizing Cirrus
13887 instructions to be safe. */
13888 if (GET_CODE (scanbody) != USE
13889 && GET_CODE (scanbody) != CLOBBER
13890 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
13891 fail = TRUE;
13892 break;
13894 default:
13895 break;
13898 if (succeed)
13900 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
13901 arm_target_label = CODE_LABEL_NUMBER (label);
13902 else
13904 gcc_assert (seeking_return || arm_ccfsm_state == 2);
13906 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
13908 this_insn = next_nonnote_insn (this_insn);
13909 gcc_assert (!this_insn
13910 || (GET_CODE (this_insn) != BARRIER
13911 && GET_CODE (this_insn) != CODE_LABEL));
13913 if (!this_insn)
13915 /* Oh, dear! we ran off the end.. give up. */
13916 extract_constrain_insn_cached (insn);
13917 arm_ccfsm_state = 0;
13918 arm_target_insn = NULL;
13919 return;
13921 arm_target_insn = this_insn;
13923 if (jump_clobbers)
13925 gcc_assert (!reverse);
13926 arm_current_cc =
13927 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
13928 0), 0), 1));
13929 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
13930 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13931 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
13932 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13934 else
13936 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
13937 what it was. */
13938 if (!reverse)
13939 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
13940 0));
13943 if (reverse || then_not_else)
13944 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13947 /* Restore recog_data (getting the attributes of other insns can
13948 destroy this array, but final.c assumes that it remains intact
13949 across this call. */
13950 extract_constrain_insn_cached (insn);
13954 /* Output IT instructions. */
13955 void
13956 thumb2_asm_output_opcode (FILE * stream)
13958 char buff[5];
13959 int n;
13961 if (arm_condexec_mask)
13963 for (n = 0; n < arm_condexec_masklen; n++)
13964 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
13965 buff[n] = 0;
13966 asm_fprintf(stream, "i%s\t%s\n\t", buff,
13967 arm_condition_codes[arm_current_cc]);
13968 arm_condexec_mask = 0;
13972 /* Returns true if REGNO is a valid register
13973 for holding a quantity of type MODE. */
13975 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
13977 if (GET_MODE_CLASS (mode) == MODE_CC)
13978 return (regno == CC_REGNUM
13979 || (TARGET_HARD_FLOAT && TARGET_VFP
13980 && regno == VFPCC_REGNUM));
13982 if (TARGET_THUMB1)
13983 /* For the Thumb we only allow values bigger than SImode in
13984 registers 0 - 6, so that there is always a second low
13985 register available to hold the upper part of the value.
13986 We probably we ought to ensure that the register is the
13987 start of an even numbered register pair. */
13988 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
13990 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
13991 && IS_CIRRUS_REGNUM (regno))
13992 /* We have outlawed SI values in Cirrus registers because they
13993 reside in the lower 32 bits, but SF values reside in the
13994 upper 32 bits. This causes gcc all sorts of grief. We can't
13995 even split the registers into pairs because Cirrus SI values
13996 get sign extended to 64bits-- aldyh. */
13997 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
13999 if (TARGET_HARD_FLOAT && TARGET_VFP
14000 && IS_VFP_REGNUM (regno))
14002 if (mode == SFmode || mode == SImode)
14003 return VFP_REGNO_OK_FOR_SINGLE (regno);
14005 if (mode == DFmode)
14006 return VFP_REGNO_OK_FOR_DOUBLE (regno);
14008 if (TARGET_NEON)
14009 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
14010 || (VALID_NEON_QREG_MODE (mode)
14011 && NEON_REGNO_OK_FOR_QUAD (regno))
14012 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
14013 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
14014 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
14015 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
14016 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
14018 return FALSE;
14021 if (TARGET_REALLY_IWMMXT)
14023 if (IS_IWMMXT_GR_REGNUM (regno))
14024 return mode == SImode;
14026 if (IS_IWMMXT_REGNUM (regno))
14027 return VALID_IWMMXT_REG_MODE (mode);
14030 /* We allow any value to be stored in the general registers.
14031 Restrict doubleword quantities to even register pairs so that we can
14032 use ldrd. Do not allow Neon structure opaque modes in general registers;
14033 they would use too many. */
14034 if (regno <= LAST_ARM_REGNUM)
14035 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
14036 && !VALID_NEON_STRUCT_MODE (mode);
14038 if (regno == FRAME_POINTER_REGNUM
14039 || regno == ARG_POINTER_REGNUM)
14040 /* We only allow integers in the fake hard registers. */
14041 return GET_MODE_CLASS (mode) == MODE_INT;
14043 /* The only registers left are the FPA registers
14044 which we only allow to hold FP values. */
14045 return (TARGET_HARD_FLOAT && TARGET_FPA
14046 && GET_MODE_CLASS (mode) == MODE_FLOAT
14047 && regno >= FIRST_FPA_REGNUM
14048 && regno <= LAST_FPA_REGNUM);
14051 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
14052 not used in arm mode. */
14054 arm_regno_class (int regno)
14056 if (TARGET_THUMB1)
14058 if (regno == STACK_POINTER_REGNUM)
14059 return STACK_REG;
14060 if (regno == CC_REGNUM)
14061 return CC_REG;
14062 if (regno < 8)
14063 return LO_REGS;
14064 return HI_REGS;
14067 if (TARGET_THUMB2 && regno < 8)
14068 return LO_REGS;
14070 if ( regno <= LAST_ARM_REGNUM
14071 || regno == FRAME_POINTER_REGNUM
14072 || regno == ARG_POINTER_REGNUM)
14073 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
14075 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
14076 return TARGET_THUMB2 ? CC_REG : NO_REGS;
14078 if (IS_CIRRUS_REGNUM (regno))
14079 return CIRRUS_REGS;
14081 if (IS_VFP_REGNUM (regno))
14083 if (regno <= D7_VFP_REGNUM)
14084 return VFP_D0_D7_REGS;
14085 else if (regno <= LAST_LO_VFP_REGNUM)
14086 return VFP_LO_REGS;
14087 else
14088 return VFP_HI_REGS;
14091 if (IS_IWMMXT_REGNUM (regno))
14092 return IWMMXT_REGS;
14094 if (IS_IWMMXT_GR_REGNUM (regno))
14095 return IWMMXT_GR_REGS;
14097 return FPA_REGS;
14100 /* Handle a special case when computing the offset
14101 of an argument from the frame pointer. */
14103 arm_debugger_arg_offset (int value, rtx addr)
14105 rtx insn;
14107 /* We are only interested if dbxout_parms() failed to compute the offset. */
14108 if (value != 0)
14109 return 0;
14111 /* We can only cope with the case where the address is held in a register. */
14112 if (GET_CODE (addr) != REG)
14113 return 0;
14115 /* If we are using the frame pointer to point at the argument, then
14116 an offset of 0 is correct. */
14117 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
14118 return 0;
14120 /* If we are using the stack pointer to point at the
14121 argument, then an offset of 0 is correct. */
14122 /* ??? Check this is consistent with thumb2 frame layout. */
14123 if ((TARGET_THUMB || !frame_pointer_needed)
14124 && REGNO (addr) == SP_REGNUM)
14125 return 0;
14127 /* Oh dear. The argument is pointed to by a register rather
14128 than being held in a register, or being stored at a known
14129 offset from the frame pointer. Since GDB only understands
14130 those two kinds of argument we must translate the address
14131 held in the register into an offset from the frame pointer.
14132 We do this by searching through the insns for the function
14133 looking to see where this register gets its value. If the
14134 register is initialized from the frame pointer plus an offset
14135 then we are in luck and we can continue, otherwise we give up.
14137 This code is exercised by producing debugging information
14138 for a function with arguments like this:
14140 double func (double a, double b, int c, double d) {return d;}
14142 Without this code the stab for parameter 'd' will be set to
14143 an offset of 0 from the frame pointer, rather than 8. */
14145 /* The if() statement says:
14147 If the insn is a normal instruction
14148 and if the insn is setting the value in a register
14149 and if the register being set is the register holding the address of the argument
14150 and if the address is computing by an addition
14151 that involves adding to a register
14152 which is the frame pointer
14153 a constant integer
14155 then... */
14157 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14159 if ( GET_CODE (insn) == INSN
14160 && GET_CODE (PATTERN (insn)) == SET
14161 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
14162 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
14163 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
14164 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14165 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14168 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14170 break;
14174 if (value == 0)
14176 debug_rtx (addr);
14177 warning (0, "unable to compute real location of stacked parameter");
14178 value = 8; /* XXX magic hack */
14181 return value;
14184 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14185 do \
14187 if ((MASK) & insn_flags) \
14188 add_builtin_function ((NAME), (TYPE), (CODE), \
14189 BUILT_IN_MD, NULL, NULL_TREE); \
14191 while (0)
14193 struct builtin_description
14195 const unsigned int mask;
14196 const enum insn_code icode;
14197 const char * const name;
14198 const enum arm_builtins code;
14199 const enum rtx_code comparison;
14200 const unsigned int flag;
14203 static const struct builtin_description bdesc_2arg[] =
14205 #define IWMMXT_BUILTIN(code, string, builtin) \
14206 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14207 ARM_BUILTIN_##builtin, 0, 0 },
14209 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14210 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14211 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14212 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14213 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14214 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14215 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14216 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14217 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14218 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14219 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14220 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14221 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14222 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14223 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14224 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14225 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14226 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14227 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14228 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14229 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14230 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14231 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14232 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14233 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14234 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14235 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14236 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14237 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14238 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14239 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14240 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14241 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14242 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14243 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14244 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14245 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14246 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14247 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14248 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14249 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14250 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14251 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14252 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14253 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14254 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14255 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14256 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14257 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14258 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14259 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14260 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14261 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14262 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14263 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14264 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14265 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14266 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14268 #define IWMMXT_BUILTIN2(code, builtin) \
14269 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14271 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14272 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14273 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14274 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14275 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14276 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14277 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14278 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14279 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14280 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14281 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14282 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14283 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14284 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14285 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14286 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14287 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14288 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14289 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14290 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14291 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14292 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14293 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14294 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14295 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14296 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14297 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14298 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14299 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14300 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14301 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14302 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14305 static const struct builtin_description bdesc_1arg[] =
14307 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14308 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14309 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14310 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14311 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14312 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14313 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14314 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14315 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14316 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14317 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14318 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14319 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14320 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14321 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14322 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14323 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14324 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14327 /* Set up all the iWMMXt builtins. This is
14328 not called if TARGET_IWMMXT is zero. */
14330 static void
14331 arm_init_iwmmxt_builtins (void)
14333 const struct builtin_description * d;
14334 size_t i;
14335 tree endlink = void_list_node;
14337 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14338 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14339 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14341 tree int_ftype_int
14342 = build_function_type (integer_type_node,
14343 tree_cons (NULL_TREE, integer_type_node, endlink));
14344 tree v8qi_ftype_v8qi_v8qi_int
14345 = build_function_type (V8QI_type_node,
14346 tree_cons (NULL_TREE, V8QI_type_node,
14347 tree_cons (NULL_TREE, V8QI_type_node,
14348 tree_cons (NULL_TREE,
14349 integer_type_node,
14350 endlink))));
14351 tree v4hi_ftype_v4hi_int
14352 = build_function_type (V4HI_type_node,
14353 tree_cons (NULL_TREE, V4HI_type_node,
14354 tree_cons (NULL_TREE, integer_type_node,
14355 endlink)));
14356 tree v2si_ftype_v2si_int
14357 = build_function_type (V2SI_type_node,
14358 tree_cons (NULL_TREE, V2SI_type_node,
14359 tree_cons (NULL_TREE, integer_type_node,
14360 endlink)));
14361 tree v2si_ftype_di_di
14362 = build_function_type (V2SI_type_node,
14363 tree_cons (NULL_TREE, long_long_integer_type_node,
14364 tree_cons (NULL_TREE, long_long_integer_type_node,
14365 endlink)));
14366 tree di_ftype_di_int
14367 = build_function_type (long_long_integer_type_node,
14368 tree_cons (NULL_TREE, long_long_integer_type_node,
14369 tree_cons (NULL_TREE, integer_type_node,
14370 endlink)));
14371 tree di_ftype_di_int_int
14372 = build_function_type (long_long_integer_type_node,
14373 tree_cons (NULL_TREE, long_long_integer_type_node,
14374 tree_cons (NULL_TREE, integer_type_node,
14375 tree_cons (NULL_TREE,
14376 integer_type_node,
14377 endlink))));
14378 tree int_ftype_v8qi
14379 = build_function_type (integer_type_node,
14380 tree_cons (NULL_TREE, V8QI_type_node,
14381 endlink));
14382 tree int_ftype_v4hi
14383 = build_function_type (integer_type_node,
14384 tree_cons (NULL_TREE, V4HI_type_node,
14385 endlink));
14386 tree int_ftype_v2si
14387 = build_function_type (integer_type_node,
14388 tree_cons (NULL_TREE, V2SI_type_node,
14389 endlink));
14390 tree int_ftype_v8qi_int
14391 = build_function_type (integer_type_node,
14392 tree_cons (NULL_TREE, V8QI_type_node,
14393 tree_cons (NULL_TREE, integer_type_node,
14394 endlink)));
14395 tree int_ftype_v4hi_int
14396 = build_function_type (integer_type_node,
14397 tree_cons (NULL_TREE, V4HI_type_node,
14398 tree_cons (NULL_TREE, integer_type_node,
14399 endlink)));
14400 tree int_ftype_v2si_int
14401 = build_function_type (integer_type_node,
14402 tree_cons (NULL_TREE, V2SI_type_node,
14403 tree_cons (NULL_TREE, integer_type_node,
14404 endlink)));
14405 tree v8qi_ftype_v8qi_int_int
14406 = build_function_type (V8QI_type_node,
14407 tree_cons (NULL_TREE, V8QI_type_node,
14408 tree_cons (NULL_TREE, integer_type_node,
14409 tree_cons (NULL_TREE,
14410 integer_type_node,
14411 endlink))));
14412 tree v4hi_ftype_v4hi_int_int
14413 = build_function_type (V4HI_type_node,
14414 tree_cons (NULL_TREE, V4HI_type_node,
14415 tree_cons (NULL_TREE, integer_type_node,
14416 tree_cons (NULL_TREE,
14417 integer_type_node,
14418 endlink))));
14419 tree v2si_ftype_v2si_int_int
14420 = build_function_type (V2SI_type_node,
14421 tree_cons (NULL_TREE, V2SI_type_node,
14422 tree_cons (NULL_TREE, integer_type_node,
14423 tree_cons (NULL_TREE,
14424 integer_type_node,
14425 endlink))));
14426 /* Miscellaneous. */
14427 tree v8qi_ftype_v4hi_v4hi
14428 = build_function_type (V8QI_type_node,
14429 tree_cons (NULL_TREE, V4HI_type_node,
14430 tree_cons (NULL_TREE, V4HI_type_node,
14431 endlink)));
14432 tree v4hi_ftype_v2si_v2si
14433 = build_function_type (V4HI_type_node,
14434 tree_cons (NULL_TREE, V2SI_type_node,
14435 tree_cons (NULL_TREE, V2SI_type_node,
14436 endlink)));
14437 tree v2si_ftype_v4hi_v4hi
14438 = build_function_type (V2SI_type_node,
14439 tree_cons (NULL_TREE, V4HI_type_node,
14440 tree_cons (NULL_TREE, V4HI_type_node,
14441 endlink)));
14442 tree v2si_ftype_v8qi_v8qi
14443 = build_function_type (V2SI_type_node,
14444 tree_cons (NULL_TREE, V8QI_type_node,
14445 tree_cons (NULL_TREE, V8QI_type_node,
14446 endlink)));
14447 tree v4hi_ftype_v4hi_di
14448 = build_function_type (V4HI_type_node,
14449 tree_cons (NULL_TREE, V4HI_type_node,
14450 tree_cons (NULL_TREE,
14451 long_long_integer_type_node,
14452 endlink)));
14453 tree v2si_ftype_v2si_di
14454 = build_function_type (V2SI_type_node,
14455 tree_cons (NULL_TREE, V2SI_type_node,
14456 tree_cons (NULL_TREE,
14457 long_long_integer_type_node,
14458 endlink)));
14459 tree void_ftype_int_int
14460 = build_function_type (void_type_node,
14461 tree_cons (NULL_TREE, integer_type_node,
14462 tree_cons (NULL_TREE, integer_type_node,
14463 endlink)));
14464 tree di_ftype_void
14465 = build_function_type (long_long_unsigned_type_node, endlink);
14466 tree di_ftype_v8qi
14467 = build_function_type (long_long_integer_type_node,
14468 tree_cons (NULL_TREE, V8QI_type_node,
14469 endlink));
14470 tree di_ftype_v4hi
14471 = build_function_type (long_long_integer_type_node,
14472 tree_cons (NULL_TREE, V4HI_type_node,
14473 endlink));
14474 tree di_ftype_v2si
14475 = build_function_type (long_long_integer_type_node,
14476 tree_cons (NULL_TREE, V2SI_type_node,
14477 endlink));
14478 tree v2si_ftype_v4hi
14479 = build_function_type (V2SI_type_node,
14480 tree_cons (NULL_TREE, V4HI_type_node,
14481 endlink));
14482 tree v4hi_ftype_v8qi
14483 = build_function_type (V4HI_type_node,
14484 tree_cons (NULL_TREE, V8QI_type_node,
14485 endlink));
14487 tree di_ftype_di_v4hi_v4hi
14488 = build_function_type (long_long_unsigned_type_node,
14489 tree_cons (NULL_TREE,
14490 long_long_unsigned_type_node,
14491 tree_cons (NULL_TREE, V4HI_type_node,
14492 tree_cons (NULL_TREE,
14493 V4HI_type_node,
14494 endlink))));
14496 tree di_ftype_v4hi_v4hi
14497 = build_function_type (long_long_unsigned_type_node,
14498 tree_cons (NULL_TREE, V4HI_type_node,
14499 tree_cons (NULL_TREE, V4HI_type_node,
14500 endlink)));
14502 /* Normal vector binops. */
14503 tree v8qi_ftype_v8qi_v8qi
14504 = build_function_type (V8QI_type_node,
14505 tree_cons (NULL_TREE, V8QI_type_node,
14506 tree_cons (NULL_TREE, V8QI_type_node,
14507 endlink)));
14508 tree v4hi_ftype_v4hi_v4hi
14509 = build_function_type (V4HI_type_node,
14510 tree_cons (NULL_TREE, V4HI_type_node,
14511 tree_cons (NULL_TREE, V4HI_type_node,
14512 endlink)));
14513 tree v2si_ftype_v2si_v2si
14514 = build_function_type (V2SI_type_node,
14515 tree_cons (NULL_TREE, V2SI_type_node,
14516 tree_cons (NULL_TREE, V2SI_type_node,
14517 endlink)));
14518 tree di_ftype_di_di
14519 = build_function_type (long_long_unsigned_type_node,
14520 tree_cons (NULL_TREE, long_long_unsigned_type_node,
14521 tree_cons (NULL_TREE,
14522 long_long_unsigned_type_node,
14523 endlink)));
14525 /* Add all builtins that are more or less simple operations on two
14526 operands. */
14527 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14529 /* Use one of the operands; the target can have a different mode for
14530 mask-generating compares. */
14531 enum machine_mode mode;
14532 tree type;
14534 if (d->name == 0)
14535 continue;
14537 mode = insn_data[d->icode].operand[1].mode;
14539 switch (mode)
14541 case V8QImode:
14542 type = v8qi_ftype_v8qi_v8qi;
14543 break;
14544 case V4HImode:
14545 type = v4hi_ftype_v4hi_v4hi;
14546 break;
14547 case V2SImode:
14548 type = v2si_ftype_v2si_v2si;
14549 break;
14550 case DImode:
14551 type = di_ftype_di_di;
14552 break;
14554 default:
14555 gcc_unreachable ();
14558 def_mbuiltin (d->mask, d->name, type, d->code);
14561 /* Add the remaining MMX insns with somewhat more complicated types. */
14562 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
14563 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
14564 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
14566 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
14567 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
14568 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
14569 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
14570 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
14571 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
14573 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
14574 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
14575 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
14576 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
14577 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
14578 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
14580 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
14581 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
14582 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
14583 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
14584 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
14585 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
14587 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
14588 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
14589 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
14590 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
14591 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
14592 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
14594 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
14596 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
14597 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
14598 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
14599 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
14601 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
14602 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
14603 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
14604 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
14605 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
14606 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
14607 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
14608 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
14609 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
14611 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
14612 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
14613 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
14615 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
14616 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
14617 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
14619 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
14620 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
14621 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
14622 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
14623 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
14624 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
14626 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
14627 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
14628 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
14629 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
14630 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
14631 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
14632 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
14633 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
14634 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
14635 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
14636 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
14637 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
14639 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
14640 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
14641 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
14642 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
14644 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
14645 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
14646 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
14647 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
14648 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
14649 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
14650 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
14653 static void
14654 arm_init_tls_builtins (void)
14656 tree ftype, decl;
14658 ftype = build_function_type (ptr_type_node, void_list_node);
14659 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
14660 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
14661 NULL, NULL_TREE);
14662 TREE_NOTHROW (decl) = 1;
14663 TREE_READONLY (decl) = 1;
14666 typedef enum {
14667 T_V8QI = 0x0001,
14668 T_V4HI = 0x0002,
14669 T_V2SI = 0x0004,
14670 T_V2SF = 0x0008,
14671 T_DI = 0x0010,
14672 T_V16QI = 0x0020,
14673 T_V8HI = 0x0040,
14674 T_V4SI = 0x0080,
14675 T_V4SF = 0x0100,
14676 T_V2DI = 0x0200,
14677 T_TI = 0x0400,
14678 T_EI = 0x0800,
14679 T_OI = 0x1000
14680 } neon_builtin_type_bits;
14682 #define v8qi_UP T_V8QI
14683 #define v4hi_UP T_V4HI
14684 #define v2si_UP T_V2SI
14685 #define v2sf_UP T_V2SF
14686 #define di_UP T_DI
14687 #define v16qi_UP T_V16QI
14688 #define v8hi_UP T_V8HI
14689 #define v4si_UP T_V4SI
14690 #define v4sf_UP T_V4SF
14691 #define v2di_UP T_V2DI
14692 #define ti_UP T_TI
14693 #define ei_UP T_EI
14694 #define oi_UP T_OI
14696 #define UP(X) X##_UP
14698 #define T_MAX 13
14700 typedef enum {
14701 NEON_BINOP,
14702 NEON_TERNOP,
14703 NEON_UNOP,
14704 NEON_GETLANE,
14705 NEON_SETLANE,
14706 NEON_CREATE,
14707 NEON_DUP,
14708 NEON_DUPLANE,
14709 NEON_COMBINE,
14710 NEON_SPLIT,
14711 NEON_LANEMUL,
14712 NEON_LANEMULL,
14713 NEON_LANEMULH,
14714 NEON_LANEMAC,
14715 NEON_SCALARMUL,
14716 NEON_SCALARMULL,
14717 NEON_SCALARMULH,
14718 NEON_SCALARMAC,
14719 NEON_CONVERT,
14720 NEON_FIXCONV,
14721 NEON_SELECT,
14722 NEON_RESULTPAIR,
14723 NEON_REINTERP,
14724 NEON_VTBL,
14725 NEON_VTBX,
14726 NEON_LOAD1,
14727 NEON_LOAD1LANE,
14728 NEON_STORE1,
14729 NEON_STORE1LANE,
14730 NEON_LOADSTRUCT,
14731 NEON_LOADSTRUCTLANE,
14732 NEON_STORESTRUCT,
14733 NEON_STORESTRUCTLANE,
14734 NEON_LOGICBINOP,
14735 NEON_SHIFTINSERT,
14736 NEON_SHIFTIMM,
14737 NEON_SHIFTACC
14738 } neon_itype;
14740 typedef struct {
14741 const char *name;
14742 const neon_itype itype;
14743 const neon_builtin_type_bits bits;
14744 const enum insn_code codes[T_MAX];
14745 const unsigned int num_vars;
14746 unsigned int base_fcode;
14747 } neon_builtin_datum;
14749 #define CF(N,X) CODE_FOR_neon_##N##X
14751 #define VAR1(T, N, A) \
14752 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14753 #define VAR2(T, N, A, B) \
14754 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14755 #define VAR3(T, N, A, B, C) \
14756 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14757 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14758 #define VAR4(T, N, A, B, C, D) \
14759 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14760 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14761 #define VAR5(T, N, A, B, C, D, E) \
14762 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14763 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14764 #define VAR6(T, N, A, B, C, D, E, F) \
14765 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14766 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14767 #define VAR7(T, N, A, B, C, D, E, F, G) \
14768 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14769 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14770 CF (N, G) }, 7, 0
14771 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14772 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14773 | UP (H), \
14774 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14775 CF (N, G), CF (N, H) }, 8, 0
14776 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14777 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14778 | UP (H) | UP (I), \
14779 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14780 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14781 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14782 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14783 | UP (H) | UP (I) | UP (J), \
14784 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14785 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14787 /* The mode entries in the following table correspond to the "key" type of the
14788 instruction variant, i.e. equivalent to that which would be specified after
14789 the assembler mnemonic, which usually refers to the last vector operand.
14790 (Signed/unsigned/polynomial types are not differentiated between though, and
14791 are all mapped onto the same mode for a given element size.) The modes
14792 listed per instruction should be the same as those defined for that
14793 instruction's pattern in neon.md.
14794 WARNING: Variants should be listed in the same increasing order as
14795 neon_builtin_type_bits. */
14797 static neon_builtin_datum neon_builtin_data[] =
14799 { VAR10 (BINOP, vadd,
14800 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14801 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
14802 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
14803 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14804 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14805 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
14806 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14807 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14808 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
14809 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14810 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
14811 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
14812 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
14813 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
14814 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
14815 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
14816 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
14817 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
14818 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
14819 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
14820 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
14821 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
14822 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14823 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14824 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14825 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
14826 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
14827 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
14828 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14829 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14830 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14831 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
14832 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14833 { VAR10 (BINOP, vsub,
14834 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14835 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
14836 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
14837 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14838 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14839 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
14840 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14841 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14842 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14843 { VAR2 (BINOP, vcage, v2sf, v4sf) },
14844 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
14845 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14846 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14847 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
14848 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14849 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
14850 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14851 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14852 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
14853 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14854 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14855 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
14856 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
14857 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
14858 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
14859 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14860 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14861 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14862 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14863 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14864 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14865 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14866 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14867 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
14868 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
14869 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
14870 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14871 /* FIXME: vget_lane supports more variants than this! */
14872 { VAR10 (GETLANE, vget_lane,
14873 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14874 { VAR10 (SETLANE, vset_lane,
14875 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14876 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
14877 { VAR10 (DUP, vdup_n,
14878 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14879 { VAR10 (DUPLANE, vdup_lane,
14880 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14881 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
14882 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
14883 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
14884 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
14885 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
14886 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
14887 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
14888 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14889 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14890 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
14891 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
14892 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14893 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
14894 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
14895 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14896 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14897 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
14898 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
14899 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14900 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
14901 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
14902 { VAR10 (BINOP, vext,
14903 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14904 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14905 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
14906 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
14907 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
14908 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
14909 { VAR10 (SELECT, vbsl,
14910 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14911 { VAR1 (VTBL, vtbl1, v8qi) },
14912 { VAR1 (VTBL, vtbl2, v8qi) },
14913 { VAR1 (VTBL, vtbl3, v8qi) },
14914 { VAR1 (VTBL, vtbl4, v8qi) },
14915 { VAR1 (VTBX, vtbx1, v8qi) },
14916 { VAR1 (VTBX, vtbx2, v8qi) },
14917 { VAR1 (VTBX, vtbx3, v8qi) },
14918 { VAR1 (VTBX, vtbx4, v8qi) },
14919 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14920 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14921 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14922 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
14923 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
14924 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
14925 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
14926 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
14927 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
14928 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
14929 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
14930 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
14931 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
14932 { VAR10 (LOAD1, vld1,
14933 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14934 { VAR10 (LOAD1LANE, vld1_lane,
14935 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14936 { VAR10 (LOAD1, vld1_dup,
14937 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14938 { VAR10 (STORE1, vst1,
14939 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14940 { VAR10 (STORE1LANE, vst1_lane,
14941 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14942 { VAR9 (LOADSTRUCT,
14943 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14944 { VAR7 (LOADSTRUCTLANE, vld2_lane,
14945 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14946 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
14947 { VAR9 (STORESTRUCT, vst2,
14948 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14949 { VAR7 (STORESTRUCTLANE, vst2_lane,
14950 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14951 { VAR9 (LOADSTRUCT,
14952 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14953 { VAR7 (LOADSTRUCTLANE, vld3_lane,
14954 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14955 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
14956 { VAR9 (STORESTRUCT, vst3,
14957 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14958 { VAR7 (STORESTRUCTLANE, vst3_lane,
14959 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14960 { VAR9 (LOADSTRUCT, vld4,
14961 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14962 { VAR7 (LOADSTRUCTLANE, vld4_lane,
14963 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14964 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
14965 { VAR9 (STORESTRUCT, vst4,
14966 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14967 { VAR7 (STORESTRUCTLANE, vst4_lane,
14968 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14969 { VAR10 (LOGICBINOP, vand,
14970 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14971 { VAR10 (LOGICBINOP, vorr,
14972 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14973 { VAR10 (BINOP, veor,
14974 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14975 { VAR10 (LOGICBINOP, vbic,
14976 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14977 { VAR10 (LOGICBINOP, vorn,
14978 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
14981 #undef CF
14982 #undef VAR1
14983 #undef VAR2
14984 #undef VAR3
14985 #undef VAR4
14986 #undef VAR5
14987 #undef VAR6
14988 #undef VAR7
14989 #undef VAR8
14990 #undef VAR9
14991 #undef VAR10
14993 static void
14994 arm_init_neon_builtins (void)
14996 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
14998 /* Create distinguished type nodes for NEON vector element types,
14999 and pointers to values of such types, so we can detect them later. */
15000 tree neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15001 tree neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15002 tree neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
15003 tree neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
15004 tree neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
15005 tree neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
15006 tree neon_float_type_node = make_node (REAL_TYPE);
15008 tree intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
15009 tree intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
15010 tree intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
15011 tree intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
15012 tree float_pointer_node = build_pointer_type (neon_float_type_node);
15014 /* Next create constant-qualified versions of the above types. */
15015 tree const_intQI_node = build_qualified_type (neon_intQI_type_node,
15016 TYPE_QUAL_CONST);
15017 tree const_intHI_node = build_qualified_type (neon_intHI_type_node,
15018 TYPE_QUAL_CONST);
15019 tree const_intSI_node = build_qualified_type (neon_intSI_type_node,
15020 TYPE_QUAL_CONST);
15021 tree const_intDI_node = build_qualified_type (neon_intDI_type_node,
15022 TYPE_QUAL_CONST);
15023 tree const_float_node = build_qualified_type (neon_float_type_node,
15024 TYPE_QUAL_CONST);
15026 tree const_intQI_pointer_node = build_pointer_type (const_intQI_node);
15027 tree const_intHI_pointer_node = build_pointer_type (const_intHI_node);
15028 tree const_intSI_pointer_node = build_pointer_type (const_intSI_node);
15029 tree const_intDI_pointer_node = build_pointer_type (const_intDI_node);
15030 tree const_float_pointer_node = build_pointer_type (const_float_node);
15032 /* Now create vector types based on our NEON element types. */
15033 /* 64-bit vectors. */
15034 tree V8QI_type_node =
15035 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
15036 tree V4HI_type_node =
15037 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
15038 tree V2SI_type_node =
15039 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
15040 tree V2SF_type_node =
15041 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
15042 /* 128-bit vectors. */
15043 tree V16QI_type_node =
15044 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
15045 tree V8HI_type_node =
15046 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
15047 tree V4SI_type_node =
15048 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
15049 tree V4SF_type_node =
15050 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
15051 tree V2DI_type_node =
15052 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
15054 /* Unsigned integer types for various mode sizes. */
15055 tree intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
15056 tree intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
15057 tree intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
15058 tree intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
15060 /* Opaque integer types for structures of vectors. */
15061 tree intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
15062 tree intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
15063 tree intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
15064 tree intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
15066 /* Pointers to vector types. */
15067 tree V8QI_pointer_node = build_pointer_type (V8QI_type_node);
15068 tree V4HI_pointer_node = build_pointer_type (V4HI_type_node);
15069 tree V2SI_pointer_node = build_pointer_type (V2SI_type_node);
15070 tree V2SF_pointer_node = build_pointer_type (V2SF_type_node);
15071 tree V16QI_pointer_node = build_pointer_type (V16QI_type_node);
15072 tree V8HI_pointer_node = build_pointer_type (V8HI_type_node);
15073 tree V4SI_pointer_node = build_pointer_type (V4SI_type_node);
15074 tree V4SF_pointer_node = build_pointer_type (V4SF_type_node);
15075 tree V2DI_pointer_node = build_pointer_type (V2DI_type_node);
15077 /* Operations which return results as pairs. */
15078 tree void_ftype_pv8qi_v8qi_v8qi =
15079 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
15080 V8QI_type_node, NULL);
15081 tree void_ftype_pv4hi_v4hi_v4hi =
15082 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
15083 V4HI_type_node, NULL);
15084 tree void_ftype_pv2si_v2si_v2si =
15085 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
15086 V2SI_type_node, NULL);
15087 tree void_ftype_pv2sf_v2sf_v2sf =
15088 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
15089 V2SF_type_node, NULL);
15090 tree void_ftype_pdi_di_di =
15091 build_function_type_list (void_type_node, intDI_pointer_node,
15092 neon_intDI_type_node, neon_intDI_type_node, NULL);
15093 tree void_ftype_pv16qi_v16qi_v16qi =
15094 build_function_type_list (void_type_node, V16QI_pointer_node,
15095 V16QI_type_node, V16QI_type_node, NULL);
15096 tree void_ftype_pv8hi_v8hi_v8hi =
15097 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
15098 V8HI_type_node, NULL);
15099 tree void_ftype_pv4si_v4si_v4si =
15100 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
15101 V4SI_type_node, NULL);
15102 tree void_ftype_pv4sf_v4sf_v4sf =
15103 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
15104 V4SF_type_node, NULL);
15105 tree void_ftype_pv2di_v2di_v2di =
15106 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
15107 V2DI_type_node, NULL);
15109 tree reinterp_ftype_dreg[5][5];
15110 tree reinterp_ftype_qreg[5][5];
15111 tree dreg_types[5], qreg_types[5];
15113 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
15114 layout_type (neon_float_type_node);
15116 /* Define typedefs which exactly correspond to the modes we are basing vector
15117 types on. If you change these names you'll need to change
15118 the table used by arm_mangle_type too. */
15119 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
15120 "__builtin_neon_qi");
15121 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
15122 "__builtin_neon_hi");
15123 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
15124 "__builtin_neon_si");
15125 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
15126 "__builtin_neon_sf");
15127 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
15128 "__builtin_neon_di");
15130 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
15131 "__builtin_neon_poly8");
15132 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
15133 "__builtin_neon_poly16");
15134 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
15135 "__builtin_neon_uqi");
15136 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
15137 "__builtin_neon_uhi");
15138 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
15139 "__builtin_neon_usi");
15140 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
15141 "__builtin_neon_udi");
15143 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
15144 "__builtin_neon_ti");
15145 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
15146 "__builtin_neon_ei");
15147 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
15148 "__builtin_neon_oi");
15149 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
15150 "__builtin_neon_ci");
15151 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
15152 "__builtin_neon_xi");
15154 dreg_types[0] = V8QI_type_node;
15155 dreg_types[1] = V4HI_type_node;
15156 dreg_types[2] = V2SI_type_node;
15157 dreg_types[3] = V2SF_type_node;
15158 dreg_types[4] = neon_intDI_type_node;
15160 qreg_types[0] = V16QI_type_node;
15161 qreg_types[1] = V8HI_type_node;
15162 qreg_types[2] = V4SI_type_node;
15163 qreg_types[3] = V4SF_type_node;
15164 qreg_types[4] = V2DI_type_node;
15166 for (i = 0; i < 5; i++)
15168 int j;
15169 for (j = 0; j < 5; j++)
15171 reinterp_ftype_dreg[i][j]
15172 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15173 reinterp_ftype_qreg[i][j]
15174 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15178 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15180 neon_builtin_datum *d = &neon_builtin_data[i];
15181 unsigned int j, codeidx = 0;
15183 d->base_fcode = fcode;
15185 for (j = 0; j < T_MAX; j++)
15187 const char* const modenames[] = {
15188 "v8qi", "v4hi", "v2si", "v2sf", "di",
15189 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15191 char namebuf[60];
15192 tree ftype = NULL;
15193 enum insn_code icode;
15194 int is_load = 0, is_store = 0;
15196 if ((d->bits & (1 << j)) == 0)
15197 continue;
15199 icode = d->codes[codeidx++];
15201 switch (d->itype)
15203 case NEON_LOAD1:
15204 case NEON_LOAD1LANE:
15205 case NEON_LOADSTRUCT:
15206 case NEON_LOADSTRUCTLANE:
15207 is_load = 1;
15208 /* Fall through. */
15209 case NEON_STORE1:
15210 case NEON_STORE1LANE:
15211 case NEON_STORESTRUCT:
15212 case NEON_STORESTRUCTLANE:
15213 if (!is_load)
15214 is_store = 1;
15215 /* Fall through. */
15216 case NEON_UNOP:
15217 case NEON_BINOP:
15218 case NEON_LOGICBINOP:
15219 case NEON_SHIFTINSERT:
15220 case NEON_TERNOP:
15221 case NEON_GETLANE:
15222 case NEON_SETLANE:
15223 case NEON_CREATE:
15224 case NEON_DUP:
15225 case NEON_DUPLANE:
15226 case NEON_SHIFTIMM:
15227 case NEON_SHIFTACC:
15228 case NEON_COMBINE:
15229 case NEON_SPLIT:
15230 case NEON_CONVERT:
15231 case NEON_FIXCONV:
15232 case NEON_LANEMUL:
15233 case NEON_LANEMULL:
15234 case NEON_LANEMULH:
15235 case NEON_LANEMAC:
15236 case NEON_SCALARMUL:
15237 case NEON_SCALARMULL:
15238 case NEON_SCALARMULH:
15239 case NEON_SCALARMAC:
15240 case NEON_SELECT:
15241 case NEON_VTBL:
15242 case NEON_VTBX:
15244 int k;
15245 tree return_type = void_type_node, args = void_list_node;
15247 /* Build a function type directly from the insn_data for this
15248 builtin. The build_function_type() function takes care of
15249 removing duplicates for us. */
15250 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15252 tree eltype;
15254 if (is_load && k == 1)
15256 /* Neon load patterns always have the memory operand
15257 (a SImode pointer) in the operand 1 position. We
15258 want a const pointer to the element type in that
15259 position. */
15260 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15262 switch (1 << j)
15264 case T_V8QI:
15265 case T_V16QI:
15266 eltype = const_intQI_pointer_node;
15267 break;
15269 case T_V4HI:
15270 case T_V8HI:
15271 eltype = const_intHI_pointer_node;
15272 break;
15274 case T_V2SI:
15275 case T_V4SI:
15276 eltype = const_intSI_pointer_node;
15277 break;
15279 case T_V2SF:
15280 case T_V4SF:
15281 eltype = const_float_pointer_node;
15282 break;
15284 case T_DI:
15285 case T_V2DI:
15286 eltype = const_intDI_pointer_node;
15287 break;
15289 default: gcc_unreachable ();
15292 else if (is_store && k == 0)
15294 /* Similarly, Neon store patterns use operand 0 as
15295 the memory location to store to (a SImode pointer).
15296 Use a pointer to the element type of the store in
15297 that position. */
15298 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15300 switch (1 << j)
15302 case T_V8QI:
15303 case T_V16QI:
15304 eltype = intQI_pointer_node;
15305 break;
15307 case T_V4HI:
15308 case T_V8HI:
15309 eltype = intHI_pointer_node;
15310 break;
15312 case T_V2SI:
15313 case T_V4SI:
15314 eltype = intSI_pointer_node;
15315 break;
15317 case T_V2SF:
15318 case T_V4SF:
15319 eltype = float_pointer_node;
15320 break;
15322 case T_DI:
15323 case T_V2DI:
15324 eltype = intDI_pointer_node;
15325 break;
15327 default: gcc_unreachable ();
15330 else
15332 switch (insn_data[icode].operand[k].mode)
15334 case VOIDmode: eltype = void_type_node; break;
15335 /* Scalars. */
15336 case QImode: eltype = neon_intQI_type_node; break;
15337 case HImode: eltype = neon_intHI_type_node; break;
15338 case SImode: eltype = neon_intSI_type_node; break;
15339 case SFmode: eltype = neon_float_type_node; break;
15340 case DImode: eltype = neon_intDI_type_node; break;
15341 case TImode: eltype = intTI_type_node; break;
15342 case EImode: eltype = intEI_type_node; break;
15343 case OImode: eltype = intOI_type_node; break;
15344 case CImode: eltype = intCI_type_node; break;
15345 case XImode: eltype = intXI_type_node; break;
15346 /* 64-bit vectors. */
15347 case V8QImode: eltype = V8QI_type_node; break;
15348 case V4HImode: eltype = V4HI_type_node; break;
15349 case V2SImode: eltype = V2SI_type_node; break;
15350 case V2SFmode: eltype = V2SF_type_node; break;
15351 /* 128-bit vectors. */
15352 case V16QImode: eltype = V16QI_type_node; break;
15353 case V8HImode: eltype = V8HI_type_node; break;
15354 case V4SImode: eltype = V4SI_type_node; break;
15355 case V4SFmode: eltype = V4SF_type_node; break;
15356 case V2DImode: eltype = V2DI_type_node; break;
15357 default: gcc_unreachable ();
15361 if (k == 0 && !is_store)
15362 return_type = eltype;
15363 else
15364 args = tree_cons (NULL_TREE, eltype, args);
15367 ftype = build_function_type (return_type, args);
15369 break;
15371 case NEON_RESULTPAIR:
15373 switch (insn_data[icode].operand[1].mode)
15375 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
15376 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
15377 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
15378 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
15379 case DImode: ftype = void_ftype_pdi_di_di; break;
15380 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
15381 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
15382 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
15383 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
15384 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
15385 default: gcc_unreachable ();
15388 break;
15390 case NEON_REINTERP:
15392 /* We iterate over 5 doubleword types, then 5 quadword
15393 types. */
15394 int rhs = j % 5;
15395 switch (insn_data[icode].operand[0].mode)
15397 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
15398 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
15399 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
15400 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
15401 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
15402 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
15403 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
15404 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
15405 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
15406 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
15407 default: gcc_unreachable ();
15410 break;
15412 default:
15413 gcc_unreachable ();
15416 gcc_assert (ftype != NULL);
15418 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
15420 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
15421 NULL_TREE);
15426 static void
15427 arm_init_builtins (void)
15429 arm_init_tls_builtins ();
15431 if (TARGET_REALLY_IWMMXT)
15432 arm_init_iwmmxt_builtins ();
15434 if (TARGET_NEON)
15435 arm_init_neon_builtins ();
15438 /* Errors in the source file can cause expand_expr to return const0_rtx
15439 where we expect a vector. To avoid crashing, use one of the vector
15440 clear instructions. */
15442 static rtx
15443 safe_vector_operand (rtx x, enum machine_mode mode)
15445 if (x != const0_rtx)
15446 return x;
15447 x = gen_reg_rtx (mode);
15449 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
15450 : gen_rtx_SUBREG (DImode, x, 0)));
15451 return x;
15454 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15456 static rtx
15457 arm_expand_binop_builtin (enum insn_code icode,
15458 tree exp, rtx target)
15460 rtx pat;
15461 tree arg0 = CALL_EXPR_ARG (exp, 0);
15462 tree arg1 = CALL_EXPR_ARG (exp, 1);
15463 rtx op0 = expand_normal (arg0);
15464 rtx op1 = expand_normal (arg1);
15465 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15466 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15467 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15469 if (VECTOR_MODE_P (mode0))
15470 op0 = safe_vector_operand (op0, mode0);
15471 if (VECTOR_MODE_P (mode1))
15472 op1 = safe_vector_operand (op1, mode1);
15474 if (! target
15475 || GET_MODE (target) != tmode
15476 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15477 target = gen_reg_rtx (tmode);
15479 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
15481 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15482 op0 = copy_to_mode_reg (mode0, op0);
15483 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15484 op1 = copy_to_mode_reg (mode1, op1);
15486 pat = GEN_FCN (icode) (target, op0, op1);
15487 if (! pat)
15488 return 0;
15489 emit_insn (pat);
15490 return target;
15493 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15495 static rtx
15496 arm_expand_unop_builtin (enum insn_code icode,
15497 tree exp, rtx target, int do_load)
15499 rtx pat;
15500 tree arg0 = CALL_EXPR_ARG (exp, 0);
15501 rtx op0 = expand_normal (arg0);
15502 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15503 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15505 if (! target
15506 || GET_MODE (target) != tmode
15507 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15508 target = gen_reg_rtx (tmode);
15509 if (do_load)
15510 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15511 else
15513 if (VECTOR_MODE_P (mode0))
15514 op0 = safe_vector_operand (op0, mode0);
15516 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15517 op0 = copy_to_mode_reg (mode0, op0);
15520 pat = GEN_FCN (icode) (target, op0);
15521 if (! pat)
15522 return 0;
15523 emit_insn (pat);
15524 return target;
15527 static int
15528 neon_builtin_compare (const void *a, const void *b)
15530 const neon_builtin_datum *key = a;
15531 const neon_builtin_datum *memb = b;
15532 unsigned int soughtcode = key->base_fcode;
15534 if (soughtcode >= memb->base_fcode
15535 && soughtcode < memb->base_fcode + memb->num_vars)
15536 return 0;
15537 else if (soughtcode < memb->base_fcode)
15538 return -1;
15539 else
15540 return 1;
15543 static enum insn_code
15544 locate_neon_builtin_icode (int fcode, neon_itype *itype)
15546 neon_builtin_datum key, *found;
15547 int idx;
15549 key.base_fcode = fcode;
15550 found = bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
15551 sizeof (neon_builtin_data[0]), neon_builtin_compare);
15552 gcc_assert (found);
15553 idx = fcode - (int) found->base_fcode;
15554 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
15556 if (itype)
15557 *itype = found->itype;
15559 return found->codes[idx];
15562 typedef enum {
15563 NEON_ARG_COPY_TO_REG,
15564 NEON_ARG_CONSTANT,
15565 NEON_ARG_STOP
15566 } builtin_arg;
15568 #define NEON_MAX_BUILTIN_ARGS 5
15570 /* Expand a Neon builtin. */
15571 static rtx
15572 arm_expand_neon_args (rtx target, int icode, int have_retval,
15573 tree exp, ...)
15575 va_list ap;
15576 rtx pat;
15577 tree arg[NEON_MAX_BUILTIN_ARGS];
15578 rtx op[NEON_MAX_BUILTIN_ARGS];
15579 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15580 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
15581 int argc = 0;
15583 if (have_retval
15584 && (!target
15585 || GET_MODE (target) != tmode
15586 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
15587 target = gen_reg_rtx (tmode);
15589 va_start (ap, exp);
15591 for (;;)
15593 builtin_arg thisarg = va_arg (ap, int);
15595 if (thisarg == NEON_ARG_STOP)
15596 break;
15597 else
15599 arg[argc] = CALL_EXPR_ARG (exp, argc);
15600 op[argc] = expand_normal (arg[argc]);
15601 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
15603 switch (thisarg)
15605 case NEON_ARG_COPY_TO_REG:
15606 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15607 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15608 (op[argc], mode[argc]))
15609 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
15610 break;
15612 case NEON_ARG_CONSTANT:
15613 /* FIXME: This error message is somewhat unhelpful. */
15614 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15615 (op[argc], mode[argc]))
15616 error ("argument must be a constant");
15617 break;
15619 case NEON_ARG_STOP:
15620 gcc_unreachable ();
15623 argc++;
15627 va_end (ap);
15629 if (have_retval)
15630 switch (argc)
15632 case 1:
15633 pat = GEN_FCN (icode) (target, op[0]);
15634 break;
15636 case 2:
15637 pat = GEN_FCN (icode) (target, op[0], op[1]);
15638 break;
15640 case 3:
15641 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
15642 break;
15644 case 4:
15645 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
15646 break;
15648 case 5:
15649 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
15650 break;
15652 default:
15653 gcc_unreachable ();
15655 else
15656 switch (argc)
15658 case 1:
15659 pat = GEN_FCN (icode) (op[0]);
15660 break;
15662 case 2:
15663 pat = GEN_FCN (icode) (op[0], op[1]);
15664 break;
15666 case 3:
15667 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15668 break;
15670 case 4:
15671 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15672 break;
15674 case 5:
15675 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
15676 break;
15678 default:
15679 gcc_unreachable ();
15682 if (!pat)
15683 return 0;
15685 emit_insn (pat);
15687 return target;
15690 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15691 constants defined per-instruction or per instruction-variant. Instead, the
15692 required info is looked up in the table neon_builtin_data. */
15693 static rtx
15694 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
15696 neon_itype itype;
15697 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
15699 switch (itype)
15701 case NEON_UNOP:
15702 case NEON_CONVERT:
15703 case NEON_DUPLANE:
15704 return arm_expand_neon_args (target, icode, 1, exp,
15705 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15707 case NEON_BINOP:
15708 case NEON_SETLANE:
15709 case NEON_SCALARMUL:
15710 case NEON_SCALARMULL:
15711 case NEON_SCALARMULH:
15712 case NEON_SHIFTINSERT:
15713 case NEON_LOGICBINOP:
15714 return arm_expand_neon_args (target, icode, 1, exp,
15715 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15716 NEON_ARG_STOP);
15718 case NEON_TERNOP:
15719 return arm_expand_neon_args (target, icode, 1, exp,
15720 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15721 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15723 case NEON_GETLANE:
15724 case NEON_FIXCONV:
15725 case NEON_SHIFTIMM:
15726 return arm_expand_neon_args (target, icode, 1, exp,
15727 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
15728 NEON_ARG_STOP);
15730 case NEON_CREATE:
15731 return arm_expand_neon_args (target, icode, 1, exp,
15732 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15734 case NEON_DUP:
15735 case NEON_SPLIT:
15736 case NEON_REINTERP:
15737 return arm_expand_neon_args (target, icode, 1, exp,
15738 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15740 case NEON_COMBINE:
15741 case NEON_VTBL:
15742 return arm_expand_neon_args (target, icode, 1, exp,
15743 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15745 case NEON_RESULTPAIR:
15746 return arm_expand_neon_args (target, icode, 0, exp,
15747 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15748 NEON_ARG_STOP);
15750 case NEON_LANEMUL:
15751 case NEON_LANEMULL:
15752 case NEON_LANEMULH:
15753 return arm_expand_neon_args (target, icode, 1, exp,
15754 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15755 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15757 case NEON_LANEMAC:
15758 return arm_expand_neon_args (target, icode, 1, exp,
15759 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15760 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15762 case NEON_SHIFTACC:
15763 return arm_expand_neon_args (target, icode, 1, exp,
15764 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15765 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15767 case NEON_SCALARMAC:
15768 return arm_expand_neon_args (target, icode, 1, exp,
15769 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15770 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15772 case NEON_SELECT:
15773 case NEON_VTBX:
15774 return arm_expand_neon_args (target, icode, 1, exp,
15775 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15776 NEON_ARG_STOP);
15778 case NEON_LOAD1:
15779 case NEON_LOADSTRUCT:
15780 return arm_expand_neon_args (target, icode, 1, exp,
15781 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15783 case NEON_LOAD1LANE:
15784 case NEON_LOADSTRUCTLANE:
15785 return arm_expand_neon_args (target, icode, 1, exp,
15786 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15787 NEON_ARG_STOP);
15789 case NEON_STORE1:
15790 case NEON_STORESTRUCT:
15791 return arm_expand_neon_args (target, icode, 0, exp,
15792 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15794 case NEON_STORE1LANE:
15795 case NEON_STORESTRUCTLANE:
15796 return arm_expand_neon_args (target, icode, 0, exp,
15797 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15798 NEON_ARG_STOP);
15801 gcc_unreachable ();
15804 /* Emit code to reinterpret one Neon type as another, without altering bits. */
15805 void
15806 neon_reinterpret (rtx dest, rtx src)
15808 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
15811 /* Emit code to place a Neon pair result in memory locations (with equal
15812 registers). */
15813 void
15814 neon_emit_pair_result_insn (enum machine_mode mode,
15815 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
15816 rtx op1, rtx op2)
15818 rtx mem = gen_rtx_MEM (mode, destaddr);
15819 rtx tmp1 = gen_reg_rtx (mode);
15820 rtx tmp2 = gen_reg_rtx (mode);
15822 emit_insn (intfn (tmp1, op1, tmp2, op2));
15824 emit_move_insn (mem, tmp1);
15825 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
15826 emit_move_insn (mem, tmp2);
15829 /* Set up operands for a register copy from src to dest, taking care not to
15830 clobber registers in the process.
15831 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
15832 be called with a large N, so that should be OK. */
15834 void
15835 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
15837 unsigned int copied = 0, opctr = 0;
15838 unsigned int done = (1 << count) - 1;
15839 unsigned int i, j;
15841 while (copied != done)
15843 for (i = 0; i < count; i++)
15845 int good = 1;
15847 for (j = 0; good && j < count; j++)
15848 if (i != j && (copied & (1 << j)) == 0
15849 && reg_overlap_mentioned_p (src[j], dest[i]))
15850 good = 0;
15852 if (good)
15854 operands[opctr++] = dest[i];
15855 operands[opctr++] = src[i];
15856 copied |= 1 << i;
15861 gcc_assert (opctr == count * 2);
15864 /* Expand an expression EXP that calls a built-in function,
15865 with result going to TARGET if that's convenient
15866 (and in mode MODE if that's convenient).
15867 SUBTARGET may be used as the target for computing one of EXP's operands.
15868 IGNORE is nonzero if the value is to be ignored. */
15870 static rtx
15871 arm_expand_builtin (tree exp,
15872 rtx target,
15873 rtx subtarget ATTRIBUTE_UNUSED,
15874 enum machine_mode mode ATTRIBUTE_UNUSED,
15875 int ignore ATTRIBUTE_UNUSED)
15877 const struct builtin_description * d;
15878 enum insn_code icode;
15879 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15880 tree arg0;
15881 tree arg1;
15882 tree arg2;
15883 rtx op0;
15884 rtx op1;
15885 rtx op2;
15886 rtx pat;
15887 int fcode = DECL_FUNCTION_CODE (fndecl);
15888 size_t i;
15889 enum machine_mode tmode;
15890 enum machine_mode mode0;
15891 enum machine_mode mode1;
15892 enum machine_mode mode2;
15894 if (fcode >= ARM_BUILTIN_NEON_BASE)
15895 return arm_expand_neon_builtin (fcode, exp, target);
15897 switch (fcode)
15899 case ARM_BUILTIN_TEXTRMSB:
15900 case ARM_BUILTIN_TEXTRMUB:
15901 case ARM_BUILTIN_TEXTRMSH:
15902 case ARM_BUILTIN_TEXTRMUH:
15903 case ARM_BUILTIN_TEXTRMSW:
15904 case ARM_BUILTIN_TEXTRMUW:
15905 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
15906 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
15907 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
15908 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
15909 : CODE_FOR_iwmmxt_textrmw);
15911 arg0 = CALL_EXPR_ARG (exp, 0);
15912 arg1 = CALL_EXPR_ARG (exp, 1);
15913 op0 = expand_normal (arg0);
15914 op1 = expand_normal (arg1);
15915 tmode = insn_data[icode].operand[0].mode;
15916 mode0 = insn_data[icode].operand[1].mode;
15917 mode1 = insn_data[icode].operand[2].mode;
15919 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15920 op0 = copy_to_mode_reg (mode0, op0);
15921 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15923 /* @@@ better error message */
15924 error ("selector must be an immediate");
15925 return gen_reg_rtx (tmode);
15927 if (target == 0
15928 || GET_MODE (target) != tmode
15929 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15930 target = gen_reg_rtx (tmode);
15931 pat = GEN_FCN (icode) (target, op0, op1);
15932 if (! pat)
15933 return 0;
15934 emit_insn (pat);
15935 return target;
15937 case ARM_BUILTIN_TINSRB:
15938 case ARM_BUILTIN_TINSRH:
15939 case ARM_BUILTIN_TINSRW:
15940 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
15941 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
15942 : CODE_FOR_iwmmxt_tinsrw);
15943 arg0 = CALL_EXPR_ARG (exp, 0);
15944 arg1 = CALL_EXPR_ARG (exp, 1);
15945 arg2 = CALL_EXPR_ARG (exp, 2);
15946 op0 = expand_normal (arg0);
15947 op1 = expand_normal (arg1);
15948 op2 = expand_normal (arg2);
15949 tmode = insn_data[icode].operand[0].mode;
15950 mode0 = insn_data[icode].operand[1].mode;
15951 mode1 = insn_data[icode].operand[2].mode;
15952 mode2 = insn_data[icode].operand[3].mode;
15954 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15955 op0 = copy_to_mode_reg (mode0, op0);
15956 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15957 op1 = copy_to_mode_reg (mode1, op1);
15958 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15960 /* @@@ better error message */
15961 error ("selector must be an immediate");
15962 return const0_rtx;
15964 if (target == 0
15965 || GET_MODE (target) != tmode
15966 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15967 target = gen_reg_rtx (tmode);
15968 pat = GEN_FCN (icode) (target, op0, op1, op2);
15969 if (! pat)
15970 return 0;
15971 emit_insn (pat);
15972 return target;
15974 case ARM_BUILTIN_SETWCX:
15975 arg0 = CALL_EXPR_ARG (exp, 0);
15976 arg1 = CALL_EXPR_ARG (exp, 1);
15977 op0 = force_reg (SImode, expand_normal (arg0));
15978 op1 = expand_normal (arg1);
15979 emit_insn (gen_iwmmxt_tmcr (op1, op0));
15980 return 0;
15982 case ARM_BUILTIN_GETWCX:
15983 arg0 = CALL_EXPR_ARG (exp, 0);
15984 op0 = expand_normal (arg0);
15985 target = gen_reg_rtx (SImode);
15986 emit_insn (gen_iwmmxt_tmrc (target, op0));
15987 return target;
15989 case ARM_BUILTIN_WSHUFH:
15990 icode = CODE_FOR_iwmmxt_wshufh;
15991 arg0 = CALL_EXPR_ARG (exp, 0);
15992 arg1 = CALL_EXPR_ARG (exp, 1);
15993 op0 = expand_normal (arg0);
15994 op1 = expand_normal (arg1);
15995 tmode = insn_data[icode].operand[0].mode;
15996 mode1 = insn_data[icode].operand[1].mode;
15997 mode2 = insn_data[icode].operand[2].mode;
15999 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
16000 op0 = copy_to_mode_reg (mode1, op0);
16001 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
16003 /* @@@ better error message */
16004 error ("mask must be an immediate");
16005 return const0_rtx;
16007 if (target == 0
16008 || GET_MODE (target) != tmode
16009 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16010 target = gen_reg_rtx (tmode);
16011 pat = GEN_FCN (icode) (target, op0, op1);
16012 if (! pat)
16013 return 0;
16014 emit_insn (pat);
16015 return target;
16017 case ARM_BUILTIN_WSADB:
16018 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
16019 case ARM_BUILTIN_WSADH:
16020 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
16021 case ARM_BUILTIN_WSADBZ:
16022 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
16023 case ARM_BUILTIN_WSADHZ:
16024 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
16026 /* Several three-argument builtins. */
16027 case ARM_BUILTIN_WMACS:
16028 case ARM_BUILTIN_WMACU:
16029 case ARM_BUILTIN_WALIGN:
16030 case ARM_BUILTIN_TMIA:
16031 case ARM_BUILTIN_TMIAPH:
16032 case ARM_BUILTIN_TMIATT:
16033 case ARM_BUILTIN_TMIATB:
16034 case ARM_BUILTIN_TMIABT:
16035 case ARM_BUILTIN_TMIABB:
16036 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
16037 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
16038 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
16039 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
16040 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
16041 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
16042 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
16043 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
16044 : CODE_FOR_iwmmxt_walign);
16045 arg0 = CALL_EXPR_ARG (exp, 0);
16046 arg1 = CALL_EXPR_ARG (exp, 1);
16047 arg2 = CALL_EXPR_ARG (exp, 2);
16048 op0 = expand_normal (arg0);
16049 op1 = expand_normal (arg1);
16050 op2 = expand_normal (arg2);
16051 tmode = insn_data[icode].operand[0].mode;
16052 mode0 = insn_data[icode].operand[1].mode;
16053 mode1 = insn_data[icode].operand[2].mode;
16054 mode2 = insn_data[icode].operand[3].mode;
16056 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16057 op0 = copy_to_mode_reg (mode0, op0);
16058 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
16059 op1 = copy_to_mode_reg (mode1, op1);
16060 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
16061 op2 = copy_to_mode_reg (mode2, op2);
16062 if (target == 0
16063 || GET_MODE (target) != tmode
16064 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16065 target = gen_reg_rtx (tmode);
16066 pat = GEN_FCN (icode) (target, op0, op1, op2);
16067 if (! pat)
16068 return 0;
16069 emit_insn (pat);
16070 return target;
16072 case ARM_BUILTIN_WZERO:
16073 target = gen_reg_rtx (DImode);
16074 emit_insn (gen_iwmmxt_clrdi (target));
16075 return target;
16077 case ARM_BUILTIN_THREAD_POINTER:
16078 return arm_load_tp (target);
16080 default:
16081 break;
16084 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16085 if (d->code == (const enum arm_builtins) fcode)
16086 return arm_expand_binop_builtin (d->icode, exp, target);
16088 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16089 if (d->code == (const enum arm_builtins) fcode)
16090 return arm_expand_unop_builtin (d->icode, exp, target, 0);
16092 /* @@@ Should really do something sensible here. */
16093 return NULL_RTX;
16096 /* Return the number (counting from 0) of
16097 the least significant set bit in MASK. */
16099 inline static int
16100 number_of_first_bit_set (unsigned mask)
16102 int bit;
16104 for (bit = 0;
16105 (mask & (1 << bit)) == 0;
16106 ++bit)
16107 continue;
16109 return bit;
16112 /* Emit code to push or pop registers to or from the stack. F is the
16113 assembly file. MASK is the registers to push or pop. PUSH is
16114 nonzero if we should push, and zero if we should pop. For debugging
16115 output, if pushing, adjust CFA_OFFSET by the amount of space added
16116 to the stack. REAL_REGS should have the same number of bits set as
16117 MASK, and will be used instead (in the same order) to describe which
16118 registers were saved - this is used to mark the save slots when we
16119 push high registers after moving them to low registers. */
16120 static void
16121 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
16122 unsigned long real_regs)
16124 int regno;
16125 int lo_mask = mask & 0xFF;
16126 int pushed_words = 0;
16128 gcc_assert (mask);
16130 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
16132 /* Special case. Do not generate a POP PC statement here, do it in
16133 thumb_exit() */
16134 thumb_exit (f, -1);
16135 return;
16138 if (ARM_EABI_UNWIND_TABLES && push)
16140 fprintf (f, "\t.save\t{");
16141 for (regno = 0; regno < 15; regno++)
16143 if (real_regs & (1 << regno))
16145 if (real_regs & ((1 << regno) -1))
16146 fprintf (f, ", ");
16147 asm_fprintf (f, "%r", regno);
16150 fprintf (f, "}\n");
16153 fprintf (f, "\t%s\t{", push ? "push" : "pop");
16155 /* Look at the low registers first. */
16156 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
16158 if (lo_mask & 1)
16160 asm_fprintf (f, "%r", regno);
16162 if ((lo_mask & ~1) != 0)
16163 fprintf (f, ", ");
16165 pushed_words++;
16169 if (push && (mask & (1 << LR_REGNUM)))
16171 /* Catch pushing the LR. */
16172 if (mask & 0xFF)
16173 fprintf (f, ", ");
16175 asm_fprintf (f, "%r", LR_REGNUM);
16177 pushed_words++;
16179 else if (!push && (mask & (1 << PC_REGNUM)))
16181 /* Catch popping the PC. */
16182 if (TARGET_INTERWORK || TARGET_BACKTRACE
16183 || crtl->calls_eh_return)
16185 /* The PC is never poped directly, instead
16186 it is popped into r3 and then BX is used. */
16187 fprintf (f, "}\n");
16189 thumb_exit (f, -1);
16191 return;
16193 else
16195 if (mask & 0xFF)
16196 fprintf (f, ", ");
16198 asm_fprintf (f, "%r", PC_REGNUM);
16202 fprintf (f, "}\n");
16204 if (push && pushed_words && dwarf2out_do_frame ())
16206 char *l = dwarf2out_cfi_label ();
16207 int pushed_mask = real_regs;
16209 *cfa_offset += pushed_words * 4;
16210 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16212 pushed_words = 0;
16213 pushed_mask = real_regs;
16214 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16216 if (pushed_mask & 1)
16217 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16222 /* Generate code to return from a thumb function.
16223 If 'reg_containing_return_addr' is -1, then the return address is
16224 actually on the stack, at the stack pointer. */
16225 static void
16226 thumb_exit (FILE *f, int reg_containing_return_addr)
16228 unsigned regs_available_for_popping;
16229 unsigned regs_to_pop;
16230 int pops_needed;
16231 unsigned available;
16232 unsigned required;
16233 int mode;
16234 int size;
16235 int restore_a4 = FALSE;
16237 /* Compute the registers we need to pop. */
16238 regs_to_pop = 0;
16239 pops_needed = 0;
16241 if (reg_containing_return_addr == -1)
16243 regs_to_pop |= 1 << LR_REGNUM;
16244 ++pops_needed;
16247 if (TARGET_BACKTRACE)
16249 /* Restore the (ARM) frame pointer and stack pointer. */
16250 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16251 pops_needed += 2;
16254 /* If there is nothing to pop then just emit the BX instruction and
16255 return. */
16256 if (pops_needed == 0)
16258 if (crtl->calls_eh_return)
16259 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16261 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16262 return;
16264 /* Otherwise if we are not supporting interworking and we have not created
16265 a backtrace structure and the function was not entered in ARM mode then
16266 just pop the return address straight into the PC. */
16267 else if (!TARGET_INTERWORK
16268 && !TARGET_BACKTRACE
16269 && !is_called_in_ARM_mode (current_function_decl)
16270 && !crtl->calls_eh_return)
16272 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16273 return;
16276 /* Find out how many of the (return) argument registers we can corrupt. */
16277 regs_available_for_popping = 0;
16279 /* If returning via __builtin_eh_return, the bottom three registers
16280 all contain information needed for the return. */
16281 if (crtl->calls_eh_return)
16282 size = 12;
16283 else
16285 /* If we can deduce the registers used from the function's
16286 return value. This is more reliable that examining
16287 df_regs_ever_live_p () because that will be set if the register is
16288 ever used in the function, not just if the register is used
16289 to hold a return value. */
16291 if (crtl->return_rtx != 0)
16292 mode = GET_MODE (crtl->return_rtx);
16293 else
16294 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16296 size = GET_MODE_SIZE (mode);
16298 if (size == 0)
16300 /* In a void function we can use any argument register.
16301 In a function that returns a structure on the stack
16302 we can use the second and third argument registers. */
16303 if (mode == VOIDmode)
16304 regs_available_for_popping =
16305 (1 << ARG_REGISTER (1))
16306 | (1 << ARG_REGISTER (2))
16307 | (1 << ARG_REGISTER (3));
16308 else
16309 regs_available_for_popping =
16310 (1 << ARG_REGISTER (2))
16311 | (1 << ARG_REGISTER (3));
16313 else if (size <= 4)
16314 regs_available_for_popping =
16315 (1 << ARG_REGISTER (2))
16316 | (1 << ARG_REGISTER (3));
16317 else if (size <= 8)
16318 regs_available_for_popping =
16319 (1 << ARG_REGISTER (3));
16322 /* Match registers to be popped with registers into which we pop them. */
16323 for (available = regs_available_for_popping,
16324 required = regs_to_pop;
16325 required != 0 && available != 0;
16326 available &= ~(available & - available),
16327 required &= ~(required & - required))
16328 -- pops_needed;
16330 /* If we have any popping registers left over, remove them. */
16331 if (available > 0)
16332 regs_available_for_popping &= ~available;
16334 /* Otherwise if we need another popping register we can use
16335 the fourth argument register. */
16336 else if (pops_needed)
16338 /* If we have not found any free argument registers and
16339 reg a4 contains the return address, we must move it. */
16340 if (regs_available_for_popping == 0
16341 && reg_containing_return_addr == LAST_ARG_REGNUM)
16343 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16344 reg_containing_return_addr = LR_REGNUM;
16346 else if (size > 12)
16348 /* Register a4 is being used to hold part of the return value,
16349 but we have dire need of a free, low register. */
16350 restore_a4 = TRUE;
16352 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
16355 if (reg_containing_return_addr != LAST_ARG_REGNUM)
16357 /* The fourth argument register is available. */
16358 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
16360 --pops_needed;
16364 /* Pop as many registers as we can. */
16365 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16366 regs_available_for_popping);
16368 /* Process the registers we popped. */
16369 if (reg_containing_return_addr == -1)
16371 /* The return address was popped into the lowest numbered register. */
16372 regs_to_pop &= ~(1 << LR_REGNUM);
16374 reg_containing_return_addr =
16375 number_of_first_bit_set (regs_available_for_popping);
16377 /* Remove this register for the mask of available registers, so that
16378 the return address will not be corrupted by further pops. */
16379 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
16382 /* If we popped other registers then handle them here. */
16383 if (regs_available_for_popping)
16385 int frame_pointer;
16387 /* Work out which register currently contains the frame pointer. */
16388 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
16390 /* Move it into the correct place. */
16391 asm_fprintf (f, "\tmov\t%r, %r\n",
16392 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
16394 /* (Temporarily) remove it from the mask of popped registers. */
16395 regs_available_for_popping &= ~(1 << frame_pointer);
16396 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
16398 if (regs_available_for_popping)
16400 int stack_pointer;
16402 /* We popped the stack pointer as well,
16403 find the register that contains it. */
16404 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
16406 /* Move it into the stack register. */
16407 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
16409 /* At this point we have popped all necessary registers, so
16410 do not worry about restoring regs_available_for_popping
16411 to its correct value:
16413 assert (pops_needed == 0)
16414 assert (regs_available_for_popping == (1 << frame_pointer))
16415 assert (regs_to_pop == (1 << STACK_POINTER)) */
16417 else
16419 /* Since we have just move the popped value into the frame
16420 pointer, the popping register is available for reuse, and
16421 we know that we still have the stack pointer left to pop. */
16422 regs_available_for_popping |= (1 << frame_pointer);
16426 /* If we still have registers left on the stack, but we no longer have
16427 any registers into which we can pop them, then we must move the return
16428 address into the link register and make available the register that
16429 contained it. */
16430 if (regs_available_for_popping == 0 && pops_needed > 0)
16432 regs_available_for_popping |= 1 << reg_containing_return_addr;
16434 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
16435 reg_containing_return_addr);
16437 reg_containing_return_addr = LR_REGNUM;
16440 /* If we have registers left on the stack then pop some more.
16441 We know that at most we will want to pop FP and SP. */
16442 if (pops_needed > 0)
16444 int popped_into;
16445 int move_to;
16447 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16448 regs_available_for_popping);
16450 /* We have popped either FP or SP.
16451 Move whichever one it is into the correct register. */
16452 popped_into = number_of_first_bit_set (regs_available_for_popping);
16453 move_to = number_of_first_bit_set (regs_to_pop);
16455 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
16457 regs_to_pop &= ~(1 << move_to);
16459 --pops_needed;
16462 /* If we still have not popped everything then we must have only
16463 had one register available to us and we are now popping the SP. */
16464 if (pops_needed > 0)
16466 int popped_into;
16468 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16469 regs_available_for_popping);
16471 popped_into = number_of_first_bit_set (regs_available_for_popping);
16473 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
16475 assert (regs_to_pop == (1 << STACK_POINTER))
16476 assert (pops_needed == 1)
16480 /* If necessary restore the a4 register. */
16481 if (restore_a4)
16483 if (reg_containing_return_addr != LR_REGNUM)
16485 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16486 reg_containing_return_addr = LR_REGNUM;
16489 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
16492 if (crtl->calls_eh_return)
16493 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16495 /* Return to caller. */
16496 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16500 void
16501 thumb1_final_prescan_insn (rtx insn)
16503 if (flag_print_asm_name)
16504 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
16505 INSN_ADDRESSES (INSN_UID (insn)));
16509 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
16511 unsigned HOST_WIDE_INT mask = 0xff;
16512 int i;
16514 if (val == 0) /* XXX */
16515 return 0;
16517 for (i = 0; i < 25; i++)
16518 if ((val & (mask << i)) == val)
16519 return 1;
16521 return 0;
16524 /* Returns nonzero if the current function contains,
16525 or might contain a far jump. */
16526 static int
16527 thumb_far_jump_used_p (void)
16529 rtx insn;
16531 /* This test is only important for leaf functions. */
16532 /* assert (!leaf_function_p ()); */
16534 /* If we have already decided that far jumps may be used,
16535 do not bother checking again, and always return true even if
16536 it turns out that they are not being used. Once we have made
16537 the decision that far jumps are present (and that hence the link
16538 register will be pushed onto the stack) we cannot go back on it. */
16539 if (cfun->machine->far_jump_used)
16540 return 1;
16542 /* If this function is not being called from the prologue/epilogue
16543 generation code then it must be being called from the
16544 INITIAL_ELIMINATION_OFFSET macro. */
16545 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
16547 /* In this case we know that we are being asked about the elimination
16548 of the arg pointer register. If that register is not being used,
16549 then there are no arguments on the stack, and we do not have to
16550 worry that a far jump might force the prologue to push the link
16551 register, changing the stack offsets. In this case we can just
16552 return false, since the presence of far jumps in the function will
16553 not affect stack offsets.
16555 If the arg pointer is live (or if it was live, but has now been
16556 eliminated and so set to dead) then we do have to test to see if
16557 the function might contain a far jump. This test can lead to some
16558 false negatives, since before reload is completed, then length of
16559 branch instructions is not known, so gcc defaults to returning their
16560 longest length, which in turn sets the far jump attribute to true.
16562 A false negative will not result in bad code being generated, but it
16563 will result in a needless push and pop of the link register. We
16564 hope that this does not occur too often.
16566 If we need doubleword stack alignment this could affect the other
16567 elimination offsets so we can't risk getting it wrong. */
16568 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
16569 cfun->machine->arg_pointer_live = 1;
16570 else if (!cfun->machine->arg_pointer_live)
16571 return 0;
16574 /* Check to see if the function contains a branch
16575 insn with the far jump attribute set. */
16576 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16578 if (GET_CODE (insn) == JUMP_INSN
16579 /* Ignore tablejump patterns. */
16580 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16581 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
16582 && get_attr_far_jump (insn) == FAR_JUMP_YES
16585 /* Record the fact that we have decided that
16586 the function does use far jumps. */
16587 cfun->machine->far_jump_used = 1;
16588 return 1;
16592 return 0;
16595 /* Return nonzero if FUNC must be entered in ARM mode. */
16597 is_called_in_ARM_mode (tree func)
16599 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
16601 /* Ignore the problem about functions whose address is taken. */
16602 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
16603 return TRUE;
16605 #ifdef ARM_PE
16606 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
16607 #else
16608 return FALSE;
16609 #endif
16612 /* The bits which aren't usefully expanded as rtl. */
16613 const char *
16614 thumb_unexpanded_epilogue (void)
16616 arm_stack_offsets *offsets;
16617 int regno;
16618 unsigned long live_regs_mask = 0;
16619 int high_regs_pushed = 0;
16620 int had_to_push_lr;
16621 int size;
16623 if (return_used_this_function)
16624 return "";
16626 if (IS_NAKED (arm_current_func_type ()))
16627 return "";
16629 offsets = arm_get_frame_offsets ();
16630 live_regs_mask = offsets->saved_regs_mask;
16631 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16633 /* If we can deduce the registers used from the function's return value.
16634 This is more reliable that examining df_regs_ever_live_p () because that
16635 will be set if the register is ever used in the function, not just if
16636 the register is used to hold a return value. */
16637 size = arm_size_return_regs ();
16639 /* The prolog may have pushed some high registers to use as
16640 work registers. e.g. the testsuite file:
16641 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16642 compiles to produce:
16643 push {r4, r5, r6, r7, lr}
16644 mov r7, r9
16645 mov r6, r8
16646 push {r6, r7}
16647 as part of the prolog. We have to undo that pushing here. */
16649 if (high_regs_pushed)
16651 unsigned long mask = live_regs_mask & 0xff;
16652 int next_hi_reg;
16654 /* The available low registers depend on the size of the value we are
16655 returning. */
16656 if (size <= 12)
16657 mask |= 1 << 3;
16658 if (size <= 8)
16659 mask |= 1 << 2;
16661 if (mask == 0)
16662 /* Oh dear! We have no low registers into which we can pop
16663 high registers! */
16664 internal_error
16665 ("no low registers available for popping high registers");
16667 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
16668 if (live_regs_mask & (1 << next_hi_reg))
16669 break;
16671 while (high_regs_pushed)
16673 /* Find lo register(s) into which the high register(s) can
16674 be popped. */
16675 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16677 if (mask & (1 << regno))
16678 high_regs_pushed--;
16679 if (high_regs_pushed == 0)
16680 break;
16683 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
16685 /* Pop the values into the low register(s). */
16686 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
16688 /* Move the value(s) into the high registers. */
16689 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16691 if (mask & (1 << regno))
16693 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
16694 regno);
16696 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
16697 if (live_regs_mask & (1 << next_hi_reg))
16698 break;
16702 live_regs_mask &= ~0x0f00;
16705 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
16706 live_regs_mask &= 0xff;
16708 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
16710 /* Pop the return address into the PC. */
16711 if (had_to_push_lr)
16712 live_regs_mask |= 1 << PC_REGNUM;
16714 /* Either no argument registers were pushed or a backtrace
16715 structure was created which includes an adjusted stack
16716 pointer, so just pop everything. */
16717 if (live_regs_mask)
16718 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16719 live_regs_mask);
16721 /* We have either just popped the return address into the
16722 PC or it is was kept in LR for the entire function. */
16723 if (!had_to_push_lr)
16724 thumb_exit (asm_out_file, LR_REGNUM);
16726 else
16728 /* Pop everything but the return address. */
16729 if (live_regs_mask)
16730 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16731 live_regs_mask);
16733 if (had_to_push_lr)
16735 if (size > 12)
16737 /* We have no free low regs, so save one. */
16738 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
16739 LAST_ARG_REGNUM);
16742 /* Get the return address into a temporary register. */
16743 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
16744 1 << LAST_ARG_REGNUM);
16746 if (size > 12)
16748 /* Move the return address to lr. */
16749 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
16750 LAST_ARG_REGNUM);
16751 /* Restore the low register. */
16752 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
16753 IP_REGNUM);
16754 regno = LR_REGNUM;
16756 else
16757 regno = LAST_ARG_REGNUM;
16759 else
16760 regno = LR_REGNUM;
16762 /* Remove the argument registers that were pushed onto the stack. */
16763 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
16764 SP_REGNUM, SP_REGNUM,
16765 crtl->args.pretend_args_size);
16767 thumb_exit (asm_out_file, regno);
16770 return "";
16773 /* Functions to save and restore machine-specific function data. */
16774 static struct machine_function *
16775 arm_init_machine_status (void)
16777 struct machine_function *machine;
16778 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
16780 #if ARM_FT_UNKNOWN != 0
16781 machine->func_type = ARM_FT_UNKNOWN;
16782 #endif
16783 return machine;
16786 /* Return an RTX indicating where the return address to the
16787 calling function can be found. */
16789 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
16791 if (count != 0)
16792 return NULL_RTX;
16794 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
16797 /* Do anything needed before RTL is emitted for each function. */
16798 void
16799 arm_init_expanders (void)
16801 /* Arrange to initialize and mark the machine per-function status. */
16802 init_machine_status = arm_init_machine_status;
16804 /* This is to stop the combine pass optimizing away the alignment
16805 adjustment of va_arg. */
16806 /* ??? It is claimed that this should not be necessary. */
16807 if (cfun)
16808 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
16812 /* Like arm_compute_initial_elimination offset. Simpler because there
16813 isn't an ABI specified frame pointer for Thumb. Instead, we set it
16814 to point at the base of the local variables after static stack
16815 space for a function has been allocated. */
16817 HOST_WIDE_INT
16818 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16820 arm_stack_offsets *offsets;
16822 offsets = arm_get_frame_offsets ();
16824 switch (from)
16826 case ARG_POINTER_REGNUM:
16827 switch (to)
16829 case STACK_POINTER_REGNUM:
16830 return offsets->outgoing_args - offsets->saved_args;
16832 case FRAME_POINTER_REGNUM:
16833 return offsets->soft_frame - offsets->saved_args;
16835 case ARM_HARD_FRAME_POINTER_REGNUM:
16836 return offsets->saved_regs - offsets->saved_args;
16838 case THUMB_HARD_FRAME_POINTER_REGNUM:
16839 return offsets->locals_base - offsets->saved_args;
16841 default:
16842 gcc_unreachable ();
16844 break;
16846 case FRAME_POINTER_REGNUM:
16847 switch (to)
16849 case STACK_POINTER_REGNUM:
16850 return offsets->outgoing_args - offsets->soft_frame;
16852 case ARM_HARD_FRAME_POINTER_REGNUM:
16853 return offsets->saved_regs - offsets->soft_frame;
16855 case THUMB_HARD_FRAME_POINTER_REGNUM:
16856 return offsets->locals_base - offsets->soft_frame;
16858 default:
16859 gcc_unreachable ();
16861 break;
16863 default:
16864 gcc_unreachable ();
16868 /* Generate the rest of a function's prologue. */
16869 void
16870 thumb1_expand_prologue (void)
16872 rtx insn, dwarf;
16874 HOST_WIDE_INT amount;
16875 arm_stack_offsets *offsets;
16876 unsigned long func_type;
16877 int regno;
16878 unsigned long live_regs_mask;
16880 func_type = arm_current_func_type ();
16882 /* Naked functions don't have prologues. */
16883 if (IS_NAKED (func_type))
16884 return;
16886 if (IS_INTERRUPT (func_type))
16888 error ("interrupt Service Routines cannot be coded in Thumb mode");
16889 return;
16892 offsets = arm_get_frame_offsets ();
16893 live_regs_mask = offsets->saved_regs_mask;
16894 /* Load the pic register before setting the frame pointer,
16895 so we can use r7 as a temporary work register. */
16896 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16897 arm_load_pic_register (live_regs_mask);
16899 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16900 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
16901 stack_pointer_rtx);
16903 amount = offsets->outgoing_args - offsets->saved_regs;
16904 if (amount)
16906 if (amount < 512)
16908 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16909 GEN_INT (- amount)));
16910 RTX_FRAME_RELATED_P (insn) = 1;
16912 else
16914 rtx reg;
16916 /* The stack decrement is too big for an immediate value in a single
16917 insn. In theory we could issue multiple subtracts, but after
16918 three of them it becomes more space efficient to place the full
16919 value in the constant pool and load into a register. (Also the
16920 ARM debugger really likes to see only one stack decrement per
16921 function). So instead we look for a scratch register into which
16922 we can load the decrement, and then we subtract this from the
16923 stack pointer. Unfortunately on the thumb the only available
16924 scratch registers are the argument registers, and we cannot use
16925 these as they may hold arguments to the function. Instead we
16926 attempt to locate a call preserved register which is used by this
16927 function. If we can find one, then we know that it will have
16928 been pushed at the start of the prologue and so we can corrupt
16929 it now. */
16930 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
16931 if (live_regs_mask & (1 << regno)
16932 && !(frame_pointer_needed
16933 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
16934 break;
16936 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
16938 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
16940 /* Choose an arbitrary, non-argument low register. */
16941 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
16943 /* Save it by copying it into a high, scratch register. */
16944 emit_insn (gen_movsi (spare, reg));
16945 /* Add a USE to stop propagate_one_insn() from barfing. */
16946 emit_insn (gen_prologue_use (spare));
16948 /* Decrement the stack. */
16949 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16950 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16951 stack_pointer_rtx, reg));
16952 RTX_FRAME_RELATED_P (insn) = 1;
16953 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16954 plus_constant (stack_pointer_rtx,
16955 -amount));
16956 RTX_FRAME_RELATED_P (dwarf) = 1;
16957 REG_NOTES (insn)
16958 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16959 REG_NOTES (insn));
16961 /* Restore the low register's original value. */
16962 emit_insn (gen_movsi (reg, spare));
16964 /* Emit a USE of the restored scratch register, so that flow
16965 analysis will not consider the restore redundant. The
16966 register won't be used again in this function and isn't
16967 restored by the epilogue. */
16968 emit_insn (gen_prologue_use (reg));
16970 else
16972 reg = gen_rtx_REG (SImode, regno);
16974 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16976 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16977 stack_pointer_rtx, reg));
16978 RTX_FRAME_RELATED_P (insn) = 1;
16979 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16980 plus_constant (stack_pointer_rtx,
16981 -amount));
16982 RTX_FRAME_RELATED_P (dwarf) = 1;
16983 REG_NOTES (insn)
16984 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16985 REG_NOTES (insn));
16990 if (frame_pointer_needed)
16991 thumb_set_frame_pointer (offsets);
16993 /* If we are profiling, make sure no instructions are scheduled before
16994 the call to mcount. Similarly if the user has requested no
16995 scheduling in the prolog. Similarly if we want non-call exceptions
16996 using the EABI unwinder, to prevent faulting instructions from being
16997 swapped with a stack adjustment. */
16998 if (crtl->profile || !TARGET_SCHED_PROLOG
16999 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
17000 emit_insn (gen_blockage ());
17002 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
17003 if (live_regs_mask & 0xff)
17004 cfun->machine->lr_save_eliminated = 0;
17008 void
17009 thumb1_expand_epilogue (void)
17011 HOST_WIDE_INT amount;
17012 arm_stack_offsets *offsets;
17013 int regno;
17015 /* Naked functions don't have prologues. */
17016 if (IS_NAKED (arm_current_func_type ()))
17017 return;
17019 offsets = arm_get_frame_offsets ();
17020 amount = offsets->outgoing_args - offsets->saved_regs;
17022 if (frame_pointer_needed)
17024 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
17025 amount = offsets->locals_base - offsets->saved_regs;
17028 gcc_assert (amount >= 0);
17029 if (amount)
17031 if (amount < 512)
17032 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
17033 GEN_INT (amount)));
17034 else
17036 /* r3 is always free in the epilogue. */
17037 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
17039 emit_insn (gen_movsi (reg, GEN_INT (amount)));
17040 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
17044 /* Emit a USE (stack_pointer_rtx), so that
17045 the stack adjustment will not be deleted. */
17046 emit_insn (gen_prologue_use (stack_pointer_rtx));
17048 if (crtl->profile || !TARGET_SCHED_PROLOG)
17049 emit_insn (gen_blockage ());
17051 /* Emit a clobber for each insn that will be restored in the epilogue,
17052 so that flow2 will get register lifetimes correct. */
17053 for (regno = 0; regno < 13; regno++)
17054 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
17055 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
17057 if (! df_regs_ever_live_p (LR_REGNUM))
17058 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
17061 static void
17062 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
17064 arm_stack_offsets *offsets;
17065 unsigned long live_regs_mask = 0;
17066 unsigned long l_mask;
17067 unsigned high_regs_pushed = 0;
17068 int cfa_offset = 0;
17069 int regno;
17071 if (IS_NAKED (arm_current_func_type ()))
17072 return;
17074 if (is_called_in_ARM_mode (current_function_decl))
17076 const char * name;
17078 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
17079 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
17080 == SYMBOL_REF);
17081 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
17083 /* Generate code sequence to switch us into Thumb mode. */
17084 /* The .code 32 directive has already been emitted by
17085 ASM_DECLARE_FUNCTION_NAME. */
17086 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
17087 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
17089 /* Generate a label, so that the debugger will notice the
17090 change in instruction sets. This label is also used by
17091 the assembler to bypass the ARM code when this function
17092 is called from a Thumb encoded function elsewhere in the
17093 same file. Hence the definition of STUB_NAME here must
17094 agree with the definition in gas/config/tc-arm.c. */
17096 #define STUB_NAME ".real_start_of"
17098 fprintf (f, "\t.code\t16\n");
17099 #ifdef ARM_PE
17100 if (arm_dllexport_name_p (name))
17101 name = arm_strip_name_encoding (name);
17102 #endif
17103 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
17104 fprintf (f, "\t.thumb_func\n");
17105 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
17108 if (crtl->args.pretend_args_size)
17110 /* Output unwind directive for the stack adjustment. */
17111 if (ARM_EABI_UNWIND_TABLES)
17112 fprintf (f, "\t.pad #%d\n",
17113 crtl->args.pretend_args_size);
17115 if (cfun->machine->uses_anonymous_args)
17117 int num_pushes;
17119 fprintf (f, "\tpush\t{");
17121 num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
17123 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
17124 regno <= LAST_ARG_REGNUM;
17125 regno++)
17126 asm_fprintf (f, "%r%s", regno,
17127 regno == LAST_ARG_REGNUM ? "" : ", ");
17129 fprintf (f, "}\n");
17131 else
17132 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
17133 SP_REGNUM, SP_REGNUM,
17134 crtl->args.pretend_args_size);
17136 /* We don't need to record the stores for unwinding (would it
17137 help the debugger any if we did?), but record the change in
17138 the stack pointer. */
17139 if (dwarf2out_do_frame ())
17141 char *l = dwarf2out_cfi_label ();
17143 cfa_offset = cfa_offset + crtl->args.pretend_args_size;
17144 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17148 /* Get the registers we are going to push. */
17149 offsets = arm_get_frame_offsets ();
17150 live_regs_mask = offsets->saved_regs_mask;
17151 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
17152 l_mask = live_regs_mask & 0x40ff;
17153 /* Then count how many other high registers will need to be pushed. */
17154 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
17156 if (TARGET_BACKTRACE)
17158 unsigned offset;
17159 unsigned work_register;
17161 /* We have been asked to create a stack backtrace structure.
17162 The code looks like this:
17164 0 .align 2
17165 0 func:
17166 0 sub SP, #16 Reserve space for 4 registers.
17167 2 push {R7} Push low registers.
17168 4 add R7, SP, #20 Get the stack pointer before the push.
17169 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17170 8 mov R7, PC Get hold of the start of this code plus 12.
17171 10 str R7, [SP, #16] Store it.
17172 12 mov R7, FP Get hold of the current frame pointer.
17173 14 str R7, [SP, #4] Store it.
17174 16 mov R7, LR Get hold of the current return address.
17175 18 str R7, [SP, #12] Store it.
17176 20 add R7, SP, #16 Point at the start of the backtrace structure.
17177 22 mov FP, R7 Put this value into the frame pointer. */
17179 work_register = thumb_find_work_register (live_regs_mask);
17181 if (ARM_EABI_UNWIND_TABLES)
17182 asm_fprintf (f, "\t.pad #16\n");
17184 asm_fprintf
17185 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17186 SP_REGNUM, SP_REGNUM);
17188 if (dwarf2out_do_frame ())
17190 char *l = dwarf2out_cfi_label ();
17192 cfa_offset = cfa_offset + 16;
17193 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17196 if (l_mask)
17198 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17199 offset = bit_count (l_mask) * UNITS_PER_WORD;
17201 else
17202 offset = 0;
17204 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17205 offset + 16 + crtl->args.pretend_args_size);
17207 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17208 offset + 4);
17210 /* Make sure that the instruction fetching the PC is in the right place
17211 to calculate "start of backtrace creation code + 12". */
17212 if (l_mask)
17214 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17215 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17216 offset + 12);
17217 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17218 ARM_HARD_FRAME_POINTER_REGNUM);
17219 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17220 offset);
17222 else
17224 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17225 ARM_HARD_FRAME_POINTER_REGNUM);
17226 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17227 offset);
17228 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17229 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17230 offset + 12);
17233 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17234 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17235 offset + 8);
17236 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17237 offset + 12);
17238 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17239 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17241 /* Optimization: If we are not pushing any low registers but we are going
17242 to push some high registers then delay our first push. This will just
17243 be a push of LR and we can combine it with the push of the first high
17244 register. */
17245 else if ((l_mask & 0xff) != 0
17246 || (high_regs_pushed == 0 && l_mask))
17247 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17249 if (high_regs_pushed)
17251 unsigned pushable_regs;
17252 unsigned next_hi_reg;
17254 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17255 if (live_regs_mask & (1 << next_hi_reg))
17256 break;
17258 pushable_regs = l_mask & 0xff;
17260 if (pushable_regs == 0)
17261 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17263 while (high_regs_pushed > 0)
17265 unsigned long real_regs_mask = 0;
17267 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17269 if (pushable_regs & (1 << regno))
17271 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17273 high_regs_pushed --;
17274 real_regs_mask |= (1 << next_hi_reg);
17276 if (high_regs_pushed)
17278 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17279 next_hi_reg --)
17280 if (live_regs_mask & (1 << next_hi_reg))
17281 break;
17283 else
17285 pushable_regs &= ~((1 << regno) - 1);
17286 break;
17291 /* If we had to find a work register and we have not yet
17292 saved the LR then add it to the list of regs to push. */
17293 if (l_mask == (1 << LR_REGNUM))
17295 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17296 1, &cfa_offset,
17297 real_regs_mask | (1 << LR_REGNUM));
17298 l_mask = 0;
17300 else
17301 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17306 /* Handle the case of a double word load into a low register from
17307 a computed memory address. The computed address may involve a
17308 register which is overwritten by the load. */
17309 const char *
17310 thumb_load_double_from_address (rtx *operands)
17312 rtx addr;
17313 rtx base;
17314 rtx offset;
17315 rtx arg1;
17316 rtx arg2;
17318 gcc_assert (GET_CODE (operands[0]) == REG);
17319 gcc_assert (GET_CODE (operands[1]) == MEM);
17321 /* Get the memory address. */
17322 addr = XEXP (operands[1], 0);
17324 /* Work out how the memory address is computed. */
17325 switch (GET_CODE (addr))
17327 case REG:
17328 operands[2] = adjust_address (operands[1], SImode, 4);
17330 if (REGNO (operands[0]) == REGNO (addr))
17332 output_asm_insn ("ldr\t%H0, %2", operands);
17333 output_asm_insn ("ldr\t%0, %1", operands);
17335 else
17337 output_asm_insn ("ldr\t%0, %1", operands);
17338 output_asm_insn ("ldr\t%H0, %2", operands);
17340 break;
17342 case CONST:
17343 /* Compute <address> + 4 for the high order load. */
17344 operands[2] = adjust_address (operands[1], SImode, 4);
17346 output_asm_insn ("ldr\t%0, %1", operands);
17347 output_asm_insn ("ldr\t%H0, %2", operands);
17348 break;
17350 case PLUS:
17351 arg1 = XEXP (addr, 0);
17352 arg2 = XEXP (addr, 1);
17354 if (CONSTANT_P (arg1))
17355 base = arg2, offset = arg1;
17356 else
17357 base = arg1, offset = arg2;
17359 gcc_assert (GET_CODE (base) == REG);
17361 /* Catch the case of <address> = <reg> + <reg> */
17362 if (GET_CODE (offset) == REG)
17364 int reg_offset = REGNO (offset);
17365 int reg_base = REGNO (base);
17366 int reg_dest = REGNO (operands[0]);
17368 /* Add the base and offset registers together into the
17369 higher destination register. */
17370 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
17371 reg_dest + 1, reg_base, reg_offset);
17373 /* Load the lower destination register from the address in
17374 the higher destination register. */
17375 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
17376 reg_dest, reg_dest + 1);
17378 /* Load the higher destination register from its own address
17379 plus 4. */
17380 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
17381 reg_dest + 1, reg_dest + 1);
17383 else
17385 /* Compute <address> + 4 for the high order load. */
17386 operands[2] = adjust_address (operands[1], SImode, 4);
17388 /* If the computed address is held in the low order register
17389 then load the high order register first, otherwise always
17390 load the low order register first. */
17391 if (REGNO (operands[0]) == REGNO (base))
17393 output_asm_insn ("ldr\t%H0, %2", operands);
17394 output_asm_insn ("ldr\t%0, %1", operands);
17396 else
17398 output_asm_insn ("ldr\t%0, %1", operands);
17399 output_asm_insn ("ldr\t%H0, %2", operands);
17402 break;
17404 case LABEL_REF:
17405 /* With no registers to worry about we can just load the value
17406 directly. */
17407 operands[2] = adjust_address (operands[1], SImode, 4);
17409 output_asm_insn ("ldr\t%H0, %2", operands);
17410 output_asm_insn ("ldr\t%0, %1", operands);
17411 break;
17413 default:
17414 gcc_unreachable ();
17417 return "";
17420 const char *
17421 thumb_output_move_mem_multiple (int n, rtx *operands)
17423 rtx tmp;
17425 switch (n)
17427 case 2:
17428 if (REGNO (operands[4]) > REGNO (operands[5]))
17430 tmp = operands[4];
17431 operands[4] = operands[5];
17432 operands[5] = tmp;
17434 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
17435 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
17436 break;
17438 case 3:
17439 if (REGNO (operands[4]) > REGNO (operands[5]))
17441 tmp = operands[4];
17442 operands[4] = operands[5];
17443 operands[5] = tmp;
17445 if (REGNO (operands[5]) > REGNO (operands[6]))
17447 tmp = operands[5];
17448 operands[5] = operands[6];
17449 operands[6] = tmp;
17451 if (REGNO (operands[4]) > REGNO (operands[5]))
17453 tmp = operands[4];
17454 operands[4] = operands[5];
17455 operands[5] = tmp;
17458 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
17459 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
17460 break;
17462 default:
17463 gcc_unreachable ();
17466 return "";
17469 /* Output a call-via instruction for thumb state. */
17470 const char *
17471 thumb_call_via_reg (rtx reg)
17473 int regno = REGNO (reg);
17474 rtx *labelp;
17476 gcc_assert (regno < LR_REGNUM);
17478 /* If we are in the normal text section we can use a single instance
17479 per compilation unit. If we are doing function sections, then we need
17480 an entry per section, since we can't rely on reachability. */
17481 if (in_section == text_section)
17483 thumb_call_reg_needed = 1;
17485 if (thumb_call_via_label[regno] == NULL)
17486 thumb_call_via_label[regno] = gen_label_rtx ();
17487 labelp = thumb_call_via_label + regno;
17489 else
17491 if (cfun->machine->call_via[regno] == NULL)
17492 cfun->machine->call_via[regno] = gen_label_rtx ();
17493 labelp = cfun->machine->call_via + regno;
17496 output_asm_insn ("bl\t%a0", labelp);
17497 return "";
17500 /* Routines for generating rtl. */
17501 void
17502 thumb_expand_movmemqi (rtx *operands)
17504 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
17505 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
17506 HOST_WIDE_INT len = INTVAL (operands[2]);
17507 HOST_WIDE_INT offset = 0;
17509 while (len >= 12)
17511 emit_insn (gen_movmem12b (out, in, out, in));
17512 len -= 12;
17515 if (len >= 8)
17517 emit_insn (gen_movmem8b (out, in, out, in));
17518 len -= 8;
17521 if (len >= 4)
17523 rtx reg = gen_reg_rtx (SImode);
17524 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
17525 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
17526 len -= 4;
17527 offset += 4;
17530 if (len >= 2)
17532 rtx reg = gen_reg_rtx (HImode);
17533 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
17534 plus_constant (in, offset))));
17535 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
17536 reg));
17537 len -= 2;
17538 offset += 2;
17541 if (len)
17543 rtx reg = gen_reg_rtx (QImode);
17544 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
17545 plus_constant (in, offset))));
17546 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
17547 reg));
17551 void
17552 thumb_reload_out_hi (rtx *operands)
17554 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
17557 /* Handle reading a half-word from memory during reload. */
17558 void
17559 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
17561 gcc_unreachable ();
17564 /* Return the length of a function name prefix
17565 that starts with the character 'c'. */
17566 static int
17567 arm_get_strip_length (int c)
17569 switch (c)
17571 ARM_NAME_ENCODING_LENGTHS
17572 default: return 0;
17576 /* Return a pointer to a function's name with any
17577 and all prefix encodings stripped from it. */
17578 const char *
17579 arm_strip_name_encoding (const char *name)
17581 int skip;
17583 while ((skip = arm_get_strip_length (* name)))
17584 name += skip;
17586 return name;
17589 /* If there is a '*' anywhere in the name's prefix, then
17590 emit the stripped name verbatim, otherwise prepend an
17591 underscore if leading underscores are being used. */
17592 void
17593 arm_asm_output_labelref (FILE *stream, const char *name)
17595 int skip;
17596 int verbatim = 0;
17598 while ((skip = arm_get_strip_length (* name)))
17600 verbatim |= (*name == '*');
17601 name += skip;
17604 if (verbatim)
17605 fputs (name, stream);
17606 else
17607 asm_fprintf (stream, "%U%s", name);
17610 static void
17611 arm_file_start (void)
17613 int val;
17615 if (TARGET_UNIFIED_ASM)
17616 asm_fprintf (asm_out_file, "\t.syntax unified\n");
17618 if (TARGET_BPABI)
17620 const char *fpu_name;
17621 if (arm_select[0].string)
17622 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
17623 else if (arm_select[1].string)
17624 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
17625 else
17626 asm_fprintf (asm_out_file, "\t.cpu %s\n",
17627 all_cores[arm_default_cpu].name);
17629 if (TARGET_SOFT_FLOAT)
17631 if (TARGET_VFP)
17632 fpu_name = "softvfp";
17633 else
17634 fpu_name = "softfpa";
17636 else
17638 int set_float_abi_attributes = 0;
17639 switch (arm_fpu_arch)
17641 case FPUTYPE_FPA:
17642 fpu_name = "fpa";
17643 break;
17644 case FPUTYPE_FPA_EMU2:
17645 fpu_name = "fpe2";
17646 break;
17647 case FPUTYPE_FPA_EMU3:
17648 fpu_name = "fpe3";
17649 break;
17650 case FPUTYPE_MAVERICK:
17651 fpu_name = "maverick";
17652 break;
17653 case FPUTYPE_VFP:
17654 fpu_name = "vfp";
17655 set_float_abi_attributes = 1;
17656 break;
17657 case FPUTYPE_VFP3:
17658 fpu_name = "vfp3";
17659 set_float_abi_attributes = 1;
17660 break;
17661 case FPUTYPE_NEON:
17662 fpu_name = "neon";
17663 set_float_abi_attributes = 1;
17664 break;
17665 default:
17666 abort();
17668 if (set_float_abi_attributes)
17670 if (TARGET_HARD_FLOAT)
17671 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
17672 if (TARGET_HARD_FLOAT_ABI)
17673 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
17676 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
17678 /* Some of these attributes only apply when the corresponding features
17679 are used. However we don't have any easy way of figuring this out.
17680 Conservatively record the setting that would have been used. */
17682 /* Tag_ABI_FP_rounding. */
17683 if (flag_rounding_math)
17684 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
17685 if (!flag_unsafe_math_optimizations)
17687 /* Tag_ABI_FP_denomal. */
17688 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
17689 /* Tag_ABI_FP_exceptions. */
17690 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
17692 /* Tag_ABI_FP_user_exceptions. */
17693 if (flag_signaling_nans)
17694 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
17695 /* Tag_ABI_FP_number_model. */
17696 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
17697 flag_finite_math_only ? 1 : 3);
17699 /* Tag_ABI_align8_needed. */
17700 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
17701 /* Tag_ABI_align8_preserved. */
17702 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
17703 /* Tag_ABI_enum_size. */
17704 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
17705 flag_short_enums ? 1 : 2);
17707 /* Tag_ABI_optimization_goals. */
17708 if (optimize_size)
17709 val = 4;
17710 else if (optimize >= 2)
17711 val = 2;
17712 else if (optimize)
17713 val = 1;
17714 else
17715 val = 6;
17716 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
17718 if (arm_lang_output_object_attributes_hook)
17719 arm_lang_output_object_attributes_hook();
17721 default_file_start();
17724 static void
17725 arm_file_end (void)
17727 int regno;
17729 if (NEED_INDICATE_EXEC_STACK)
17730 /* Add .note.GNU-stack. */
17731 file_end_indicate_exec_stack ();
17733 if (! thumb_call_reg_needed)
17734 return;
17736 switch_to_section (text_section);
17737 asm_fprintf (asm_out_file, "\t.code 16\n");
17738 ASM_OUTPUT_ALIGN (asm_out_file, 1);
17740 for (regno = 0; regno < LR_REGNUM; regno++)
17742 rtx label = thumb_call_via_label[regno];
17744 if (label != 0)
17746 targetm.asm_out.internal_label (asm_out_file, "L",
17747 CODE_LABEL_NUMBER (label));
17748 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
17753 #ifndef ARM_PE
17754 /* Symbols in the text segment can be accessed without indirecting via the
17755 constant pool; it may take an extra binary operation, but this is still
17756 faster than indirecting via memory. Don't do this when not optimizing,
17757 since we won't be calculating al of the offsets necessary to do this
17758 simplification. */
17760 static void
17761 arm_encode_section_info (tree decl, rtx rtl, int first)
17763 if (optimize > 0 && TREE_CONSTANT (decl))
17764 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
17766 default_encode_section_info (decl, rtl, first);
17768 #endif /* !ARM_PE */
17770 static void
17771 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
17773 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
17774 && !strcmp (prefix, "L"))
17776 arm_ccfsm_state = 0;
17777 arm_target_insn = NULL;
17779 default_internal_label (stream, prefix, labelno);
17782 /* Output code to add DELTA to the first argument, and then jump
17783 to FUNCTION. Used for C++ multiple inheritance. */
17784 static void
17785 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
17786 HOST_WIDE_INT delta,
17787 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
17788 tree function)
17790 static int thunk_label = 0;
17791 char label[256];
17792 char labelpc[256];
17793 int mi_delta = delta;
17794 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
17795 int shift = 0;
17796 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
17797 ? 1 : 0);
17798 if (mi_delta < 0)
17799 mi_delta = - mi_delta;
17801 if (TARGET_THUMB1)
17803 int labelno = thunk_label++;
17804 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
17805 /* Thunks are entered in arm mode when avaiable. */
17806 if (TARGET_THUMB1_ONLY)
17808 /* push r3 so we can use it as a temporary. */
17809 /* TODO: Omit this save if r3 is not used. */
17810 fputs ("\tpush {r3}\n", file);
17811 fputs ("\tldr\tr3, ", file);
17813 else
17815 fputs ("\tldr\tr12, ", file);
17817 assemble_name (file, label);
17818 fputc ('\n', file);
17819 if (flag_pic)
17821 /* If we are generating PIC, the ldr instruction below loads
17822 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
17823 the address of the add + 8, so we have:
17825 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
17826 = target + 1.
17828 Note that we have "+ 1" because some versions of GNU ld
17829 don't set the low bit of the result for R_ARM_REL32
17830 relocations against thumb function symbols.
17831 On ARMv6M this is +4, not +8. */
17832 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
17833 assemble_name (file, labelpc);
17834 fputs (":\n", file);
17835 if (TARGET_THUMB1_ONLY)
17837 /* This is 2 insns after the start of the thunk, so we know it
17838 is 4-byte aligned. */
17839 fputs ("\tadd\tr3, pc, r3\n", file);
17840 fputs ("\tmov r12, r3\n", file);
17842 else
17843 fputs ("\tadd\tr12, pc, r12\n", file);
17845 else if (TARGET_THUMB1_ONLY)
17846 fputs ("\tmov r12, r3\n", file);
17848 if (TARGET_THUMB1_ONLY)
17850 if (mi_delta > 255)
17852 fputs ("\tldr\tr3, ", file);
17853 assemble_name (file, label);
17854 fputs ("+4\n", file);
17855 asm_fprintf (file, "\t%s\t%r, %r, r3\n",
17856 mi_op, this_regno, this_regno);
17858 else if (mi_delta != 0)
17860 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17861 mi_op, this_regno, this_regno,
17862 mi_delta);
17865 else
17867 /* TODO: Use movw/movt for large constants when available. */
17868 while (mi_delta != 0)
17870 if ((mi_delta & (3 << shift)) == 0)
17871 shift += 2;
17872 else
17874 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17875 mi_op, this_regno, this_regno,
17876 mi_delta & (0xff << shift));
17877 mi_delta &= ~(0xff << shift);
17878 shift += 8;
17882 if (TARGET_THUMB1)
17884 if (TARGET_THUMB1_ONLY)
17885 fputs ("\tpop\t{r3}\n", file);
17887 fprintf (file, "\tbx\tr12\n");
17888 ASM_OUTPUT_ALIGN (file, 2);
17889 assemble_name (file, label);
17890 fputs (":\n", file);
17891 if (flag_pic)
17893 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
17894 rtx tem = XEXP (DECL_RTL (function), 0);
17895 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
17896 tem = gen_rtx_MINUS (GET_MODE (tem),
17897 tem,
17898 gen_rtx_SYMBOL_REF (Pmode,
17899 ggc_strdup (labelpc)));
17900 assemble_integer (tem, 4, BITS_PER_WORD, 1);
17902 else
17903 /* Output ".word .LTHUNKn". */
17904 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
17906 if (TARGET_THUMB1_ONLY && mi_delta > 255)
17907 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
17909 else
17911 fputs ("\tb\t", file);
17912 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
17913 if (NEED_PLT_RELOC)
17914 fputs ("(PLT)", file);
17915 fputc ('\n', file);
17920 arm_emit_vector_const (FILE *file, rtx x)
17922 int i;
17923 const char * pattern;
17925 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17927 switch (GET_MODE (x))
17929 case V2SImode: pattern = "%08x"; break;
17930 case V4HImode: pattern = "%04x"; break;
17931 case V8QImode: pattern = "%02x"; break;
17932 default: gcc_unreachable ();
17935 fprintf (file, "0x");
17936 for (i = CONST_VECTOR_NUNITS (x); i--;)
17938 rtx element;
17940 element = CONST_VECTOR_ELT (x, i);
17941 fprintf (file, pattern, INTVAL (element));
17944 return 1;
17947 const char *
17948 arm_output_load_gr (rtx *operands)
17950 rtx reg;
17951 rtx offset;
17952 rtx wcgr;
17953 rtx sum;
17955 if (GET_CODE (operands [1]) != MEM
17956 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
17957 || GET_CODE (reg = XEXP (sum, 0)) != REG
17958 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
17959 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
17960 return "wldrw%?\t%0, %1";
17962 /* Fix up an out-of-range load of a GR register. */
17963 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
17964 wcgr = operands[0];
17965 operands[0] = reg;
17966 output_asm_insn ("ldr%?\t%0, %1", operands);
17968 operands[0] = wcgr;
17969 operands[1] = reg;
17970 output_asm_insn ("tmcr%?\t%0, %1", operands);
17971 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
17973 return "";
17976 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
17978 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
17979 named arg and all anonymous args onto the stack.
17980 XXX I know the prologue shouldn't be pushing registers, but it is faster
17981 that way. */
17983 static void
17984 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
17985 enum machine_mode mode,
17986 tree type,
17987 int *pretend_size,
17988 int second_time ATTRIBUTE_UNUSED)
17990 int nregs = cum->nregs;
17991 if (nregs & 1
17992 && ARM_DOUBLEWORD_ALIGN
17993 && arm_needs_doubleword_align (mode, type))
17994 nregs++;
17996 cfun->machine->uses_anonymous_args = 1;
17997 if (nregs < NUM_ARG_REGS)
17998 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
18001 /* Return nonzero if the CONSUMER instruction (a store) does not need
18002 PRODUCER's value to calculate the address. */
18005 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
18007 rtx value = PATTERN (producer);
18008 rtx addr = PATTERN (consumer);
18010 if (GET_CODE (value) == COND_EXEC)
18011 value = COND_EXEC_CODE (value);
18012 if (GET_CODE (value) == PARALLEL)
18013 value = XVECEXP (value, 0, 0);
18014 value = XEXP (value, 0);
18015 if (GET_CODE (addr) == COND_EXEC)
18016 addr = COND_EXEC_CODE (addr);
18017 if (GET_CODE (addr) == PARALLEL)
18018 addr = XVECEXP (addr, 0, 0);
18019 addr = XEXP (addr, 0);
18021 return !reg_overlap_mentioned_p (value, addr);
18024 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18025 have an early register shift value or amount dependency on the
18026 result of PRODUCER. */
18029 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
18031 rtx value = PATTERN (producer);
18032 rtx op = PATTERN (consumer);
18033 rtx early_op;
18035 if (GET_CODE (value) == COND_EXEC)
18036 value = COND_EXEC_CODE (value);
18037 if (GET_CODE (value) == PARALLEL)
18038 value = XVECEXP (value, 0, 0);
18039 value = XEXP (value, 0);
18040 if (GET_CODE (op) == COND_EXEC)
18041 op = COND_EXEC_CODE (op);
18042 if (GET_CODE (op) == PARALLEL)
18043 op = XVECEXP (op, 0, 0);
18044 op = XEXP (op, 1);
18046 early_op = XEXP (op, 0);
18047 /* This is either an actual independent shift, or a shift applied to
18048 the first operand of another operation. We want the whole shift
18049 operation. */
18050 if (GET_CODE (early_op) == REG)
18051 early_op = op;
18053 return !reg_overlap_mentioned_p (value, early_op);
18056 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
18057 have an early register shift value dependency on the result of
18058 PRODUCER. */
18061 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
18063 rtx value = PATTERN (producer);
18064 rtx op = PATTERN (consumer);
18065 rtx early_op;
18067 if (GET_CODE (value) == COND_EXEC)
18068 value = COND_EXEC_CODE (value);
18069 if (GET_CODE (value) == PARALLEL)
18070 value = XVECEXP (value, 0, 0);
18071 value = XEXP (value, 0);
18072 if (GET_CODE (op) == COND_EXEC)
18073 op = COND_EXEC_CODE (op);
18074 if (GET_CODE (op) == PARALLEL)
18075 op = XVECEXP (op, 0, 0);
18076 op = XEXP (op, 1);
18078 early_op = XEXP (op, 0);
18080 /* This is either an actual independent shift, or a shift applied to
18081 the first operand of another operation. We want the value being
18082 shifted, in either case. */
18083 if (GET_CODE (early_op) != REG)
18084 early_op = XEXP (early_op, 0);
18086 return !reg_overlap_mentioned_p (value, early_op);
18089 /* Return nonzero if the CONSUMER (a mul or mac op) does not
18090 have an early register mult dependency on the result of
18091 PRODUCER. */
18094 arm_no_early_mul_dep (rtx producer, rtx consumer)
18096 rtx value = PATTERN (producer);
18097 rtx op = PATTERN (consumer);
18099 if (GET_CODE (value) == COND_EXEC)
18100 value = COND_EXEC_CODE (value);
18101 if (GET_CODE (value) == PARALLEL)
18102 value = XVECEXP (value, 0, 0);
18103 value = XEXP (value, 0);
18104 if (GET_CODE (op) == COND_EXEC)
18105 op = COND_EXEC_CODE (op);
18106 if (GET_CODE (op) == PARALLEL)
18107 op = XVECEXP (op, 0, 0);
18108 op = XEXP (op, 1);
18110 return (GET_CODE (op) == PLUS
18111 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
18114 /* We can't rely on the caller doing the proper promotion when
18115 using APCS or ATPCS. */
18117 static bool
18118 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
18120 return !TARGET_AAPCS_BASED;
18124 /* AAPCS based ABIs use short enums by default. */
18126 static bool
18127 arm_default_short_enums (void)
18129 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
18133 /* AAPCS requires that anonymous bitfields affect structure alignment. */
18135 static bool
18136 arm_align_anon_bitfield (void)
18138 return TARGET_AAPCS_BASED;
18142 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
18144 static tree
18145 arm_cxx_guard_type (void)
18147 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
18150 /* Return non-zero if the consumer (a multiply-accumulate instruction)
18151 has an accumulator dependency on the result of the producer (a
18152 multiplication instruction) and no other dependency on that result. */
18154 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
18156 rtx mul = PATTERN (producer);
18157 rtx mac = PATTERN (consumer);
18158 rtx mul_result;
18159 rtx mac_op0, mac_op1, mac_acc;
18161 if (GET_CODE (mul) == COND_EXEC)
18162 mul = COND_EXEC_CODE (mul);
18163 if (GET_CODE (mac) == COND_EXEC)
18164 mac = COND_EXEC_CODE (mac);
18166 /* Check that mul is of the form (set (...) (mult ...))
18167 and mla is of the form (set (...) (plus (mult ...) (...))). */
18168 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
18169 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
18170 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
18171 return 0;
18173 mul_result = XEXP (mul, 0);
18174 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
18175 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
18176 mac_acc = XEXP (XEXP (mac, 1), 1);
18178 return (reg_overlap_mentioned_p (mul_result, mac_acc)
18179 && !reg_overlap_mentioned_p (mul_result, mac_op0)
18180 && !reg_overlap_mentioned_p (mul_result, mac_op1));
18184 /* The EABI says test the least significant bit of a guard variable. */
18186 static bool
18187 arm_cxx_guard_mask_bit (void)
18189 return TARGET_AAPCS_BASED;
18193 /* The EABI specifies that all array cookies are 8 bytes long. */
18195 static tree
18196 arm_get_cookie_size (tree type)
18198 tree size;
18200 if (!TARGET_AAPCS_BASED)
18201 return default_cxx_get_cookie_size (type);
18203 size = build_int_cst (sizetype, 8);
18204 return size;
18208 /* The EABI says that array cookies should also contain the element size. */
18210 static bool
18211 arm_cookie_has_size (void)
18213 return TARGET_AAPCS_BASED;
18217 /* The EABI says constructors and destructors should return a pointer to
18218 the object constructed/destroyed. */
18220 static bool
18221 arm_cxx_cdtor_returns_this (void)
18223 return TARGET_AAPCS_BASED;
18226 /* The EABI says that an inline function may never be the key
18227 method. */
18229 static bool
18230 arm_cxx_key_method_may_be_inline (void)
18232 return !TARGET_AAPCS_BASED;
18235 static void
18236 arm_cxx_determine_class_data_visibility (tree decl)
18238 if (!TARGET_AAPCS_BASED)
18239 return;
18241 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18242 is exported. However, on systems without dynamic vague linkage,
18243 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18244 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18245 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18246 else
18247 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18248 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18251 static bool
18252 arm_cxx_class_data_always_comdat (void)
18254 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18255 vague linkage if the class has no key function. */
18256 return !TARGET_AAPCS_BASED;
18260 /* The EABI says __aeabi_atexit should be used to register static
18261 destructors. */
18263 static bool
18264 arm_cxx_use_aeabi_atexit (void)
18266 return TARGET_AAPCS_BASED;
18270 void
18271 arm_set_return_address (rtx source, rtx scratch)
18273 arm_stack_offsets *offsets;
18274 HOST_WIDE_INT delta;
18275 rtx addr;
18276 unsigned long saved_regs;
18278 offsets = arm_get_frame_offsets ();
18279 saved_regs = offsets->saved_regs_mask;
18281 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18282 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18283 else
18285 if (frame_pointer_needed)
18286 addr = plus_constant(hard_frame_pointer_rtx, -4);
18287 else
18289 /* LR will be the first saved register. */
18290 delta = offsets->outgoing_args - (offsets->frame + 4);
18293 if (delta >= 4096)
18295 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18296 GEN_INT (delta & ~4095)));
18297 addr = scratch;
18298 delta &= 4095;
18300 else
18301 addr = stack_pointer_rtx;
18303 addr = plus_constant (addr, delta);
18305 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18310 void
18311 thumb_set_return_address (rtx source, rtx scratch)
18313 arm_stack_offsets *offsets;
18314 HOST_WIDE_INT delta;
18315 HOST_WIDE_INT limit;
18316 int reg;
18317 rtx addr;
18318 unsigned long mask;
18320 emit_insn (gen_rtx_USE (VOIDmode, source));
18322 offsets = arm_get_frame_offsets ();
18323 mask = offsets->saved_regs_mask;
18324 if (mask & (1 << LR_REGNUM))
18326 limit = 1024;
18327 /* Find the saved regs. */
18328 if (frame_pointer_needed)
18330 delta = offsets->soft_frame - offsets->saved_args;
18331 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18332 if (TARGET_THUMB1)
18333 limit = 128;
18335 else
18337 delta = offsets->outgoing_args - offsets->saved_args;
18338 reg = SP_REGNUM;
18340 /* Allow for the stack frame. */
18341 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18342 delta -= 16;
18343 /* The link register is always the first saved register. */
18344 delta -= 4;
18346 /* Construct the address. */
18347 addr = gen_rtx_REG (SImode, reg);
18348 if (delta > limit)
18350 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
18351 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
18352 addr = scratch;
18354 else
18355 addr = plus_constant (addr, delta);
18357 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18359 else
18360 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18363 /* Implements target hook vector_mode_supported_p. */
18364 bool
18365 arm_vector_mode_supported_p (enum machine_mode mode)
18367 /* Neon also supports V2SImode, etc. listed in the clause below. */
18368 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18369 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18370 return true;
18372 if ((mode == V2SImode)
18373 || (mode == V4HImode)
18374 || (mode == V8QImode))
18375 return true;
18377 return false;
18380 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18381 ARM insns and therefore guarantee that the shift count is modulo 256.
18382 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18383 guarantee no particular behavior for out-of-range counts. */
18385 static unsigned HOST_WIDE_INT
18386 arm_shift_truncation_mask (enum machine_mode mode)
18388 return mode == SImode ? 255 : 0;
18392 /* Map internal gcc register numbers to DWARF2 register numbers. */
18394 unsigned int
18395 arm_dbx_register_number (unsigned int regno)
18397 if (regno < 16)
18398 return regno;
18400 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18401 compatibility. The EABI defines them as registers 96-103. */
18402 if (IS_FPA_REGNUM (regno))
18403 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
18405 /* FIXME: VFPv3 register numbering. */
18406 if (IS_VFP_REGNUM (regno))
18407 return 64 + regno - FIRST_VFP_REGNUM;
18409 if (IS_IWMMXT_GR_REGNUM (regno))
18410 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
18412 if (IS_IWMMXT_REGNUM (regno))
18413 return 112 + regno - FIRST_IWMMXT_REGNUM;
18415 gcc_unreachable ();
18419 #ifdef TARGET_UNWIND_INFO
18420 /* Emit unwind directives for a store-multiple instruction or stack pointer
18421 push during alignment.
18422 These should only ever be generated by the function prologue code, so
18423 expect them to have a particular form. */
18425 static void
18426 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
18428 int i;
18429 HOST_WIDE_INT offset;
18430 HOST_WIDE_INT nregs;
18431 int reg_size;
18432 unsigned reg;
18433 unsigned lastreg;
18434 rtx e;
18436 e = XVECEXP (p, 0, 0);
18437 if (GET_CODE (e) != SET)
18438 abort ();
18440 /* First insn will adjust the stack pointer. */
18441 if (GET_CODE (e) != SET
18442 || GET_CODE (XEXP (e, 0)) != REG
18443 || REGNO (XEXP (e, 0)) != SP_REGNUM
18444 || GET_CODE (XEXP (e, 1)) != PLUS)
18445 abort ();
18447 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
18448 nregs = XVECLEN (p, 0) - 1;
18450 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
18451 if (reg < 16)
18453 /* The function prologue may also push pc, but not annotate it as it is
18454 never restored. We turn this into a stack pointer adjustment. */
18455 if (nregs * 4 == offset - 4)
18457 fprintf (asm_out_file, "\t.pad #4\n");
18458 offset -= 4;
18460 reg_size = 4;
18461 fprintf (asm_out_file, "\t.save {");
18463 else if (IS_VFP_REGNUM (reg))
18465 reg_size = 8;
18466 fprintf (asm_out_file, "\t.vsave {");
18468 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
18470 /* FPA registers are done differently. */
18471 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
18472 return;
18474 else
18475 /* Unknown register type. */
18476 abort ();
18478 /* If the stack increment doesn't match the size of the saved registers,
18479 something has gone horribly wrong. */
18480 if (offset != nregs * reg_size)
18481 abort ();
18483 offset = 0;
18484 lastreg = 0;
18485 /* The remaining insns will describe the stores. */
18486 for (i = 1; i <= nregs; i++)
18488 /* Expect (set (mem <addr>) (reg)).
18489 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18490 e = XVECEXP (p, 0, i);
18491 if (GET_CODE (e) != SET
18492 || GET_CODE (XEXP (e, 0)) != MEM
18493 || GET_CODE (XEXP (e, 1)) != REG)
18494 abort ();
18496 reg = REGNO (XEXP (e, 1));
18497 if (reg < lastreg)
18498 abort ();
18500 if (i != 1)
18501 fprintf (asm_out_file, ", ");
18502 /* We can't use %r for vfp because we need to use the
18503 double precision register names. */
18504 if (IS_VFP_REGNUM (reg))
18505 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
18506 else
18507 asm_fprintf (asm_out_file, "%r", reg);
18509 #ifdef ENABLE_CHECKING
18510 /* Check that the addresses are consecutive. */
18511 e = XEXP (XEXP (e, 0), 0);
18512 if (GET_CODE (e) == PLUS)
18514 offset += reg_size;
18515 if (GET_CODE (XEXP (e, 0)) != REG
18516 || REGNO (XEXP (e, 0)) != SP_REGNUM
18517 || GET_CODE (XEXP (e, 1)) != CONST_INT
18518 || offset != INTVAL (XEXP (e, 1)))
18519 abort ();
18521 else if (i != 1
18522 || GET_CODE (e) != REG
18523 || REGNO (e) != SP_REGNUM)
18524 abort ();
18525 #endif
18527 fprintf (asm_out_file, "}\n");
18530 /* Emit unwind directives for a SET. */
18532 static void
18533 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
18535 rtx e0;
18536 rtx e1;
18537 unsigned reg;
18539 e0 = XEXP (p, 0);
18540 e1 = XEXP (p, 1);
18541 switch (GET_CODE (e0))
18543 case MEM:
18544 /* Pushing a single register. */
18545 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
18546 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
18547 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
18548 abort ();
18550 asm_fprintf (asm_out_file, "\t.save ");
18551 if (IS_VFP_REGNUM (REGNO (e1)))
18552 asm_fprintf(asm_out_file, "{d%d}\n",
18553 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
18554 else
18555 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
18556 break;
18558 case REG:
18559 if (REGNO (e0) == SP_REGNUM)
18561 /* A stack increment. */
18562 if (GET_CODE (e1) != PLUS
18563 || GET_CODE (XEXP (e1, 0)) != REG
18564 || REGNO (XEXP (e1, 0)) != SP_REGNUM
18565 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18566 abort ();
18568 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
18569 -INTVAL (XEXP (e1, 1)));
18571 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
18573 HOST_WIDE_INT offset;
18575 if (GET_CODE (e1) == PLUS)
18577 if (GET_CODE (XEXP (e1, 0)) != REG
18578 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18579 abort ();
18580 reg = REGNO (XEXP (e1, 0));
18581 offset = INTVAL (XEXP (e1, 1));
18582 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
18583 HARD_FRAME_POINTER_REGNUM, reg,
18584 INTVAL (XEXP (e1, 1)));
18586 else if (GET_CODE (e1) == REG)
18588 reg = REGNO (e1);
18589 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
18590 HARD_FRAME_POINTER_REGNUM, reg);
18592 else
18593 abort ();
18595 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
18597 /* Move from sp to reg. */
18598 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
18600 else if (GET_CODE (e1) == PLUS
18601 && GET_CODE (XEXP (e1, 0)) == REG
18602 && REGNO (XEXP (e1, 0)) == SP_REGNUM
18603 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
18605 /* Set reg to offset from sp. */
18606 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
18607 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
18609 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
18611 /* Stack pointer save before alignment. */
18612 reg = REGNO (e0);
18613 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18614 reg + 0x90, reg);
18616 else
18617 abort ();
18618 break;
18620 default:
18621 abort ();
18626 /* Emit unwind directives for the given insn. */
18628 static void
18629 arm_unwind_emit (FILE * asm_out_file, rtx insn)
18631 rtx pat;
18633 if (!ARM_EABI_UNWIND_TABLES)
18634 return;
18636 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
18637 && (TREE_NOTHROW (current_function_decl)
18638 || cfun->all_throwers_are_sibcalls))
18639 return;
18641 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
18642 return;
18644 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
18645 if (pat)
18646 pat = XEXP (pat, 0);
18647 else
18648 pat = PATTERN (insn);
18650 switch (GET_CODE (pat))
18652 case SET:
18653 arm_unwind_emit_set (asm_out_file, pat);
18654 break;
18656 case SEQUENCE:
18657 /* Store multiple. */
18658 arm_unwind_emit_sequence (asm_out_file, pat);
18659 break;
18661 default:
18662 abort();
18667 /* Output a reference from a function exception table to the type_info
18668 object X. The EABI specifies that the symbol should be relocated by
18669 an R_ARM_TARGET2 relocation. */
18671 static bool
18672 arm_output_ttype (rtx x)
18674 fputs ("\t.word\t", asm_out_file);
18675 output_addr_const (asm_out_file, x);
18676 /* Use special relocations for symbol references. */
18677 if (GET_CODE (x) != CONST_INT)
18678 fputs ("(TARGET2)", asm_out_file);
18679 fputc ('\n', asm_out_file);
18681 return TRUE;
18683 #endif /* TARGET_UNWIND_INFO */
18686 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18687 stack alignment. */
18689 static void
18690 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
18692 rtx unspec = SET_SRC (pattern);
18693 gcc_assert (GET_CODE (unspec) == UNSPEC);
18695 switch (index)
18697 case UNSPEC_STACK_ALIGN:
18698 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18699 put anything on the stack, so hopefully it won't matter.
18700 CFA = SP will be correct after alignment. */
18701 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
18702 SET_DEST (pattern));
18703 break;
18704 default:
18705 gcc_unreachable ();
18710 /* Output unwind directives for the start/end of a function. */
18712 void
18713 arm_output_fn_unwind (FILE * f, bool prologue)
18715 if (!ARM_EABI_UNWIND_TABLES)
18716 return;
18718 if (prologue)
18719 fputs ("\t.fnstart\n", f);
18720 else
18722 /* If this function will never be unwound, then mark it as such.
18723 The came condition is used in arm_unwind_emit to suppress
18724 the frame annotations. */
18725 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
18726 && (TREE_NOTHROW (current_function_decl)
18727 || cfun->all_throwers_are_sibcalls))
18728 fputs("\t.cantunwind\n", f);
18730 fputs ("\t.fnend\n", f);
18734 static bool
18735 arm_emit_tls_decoration (FILE *fp, rtx x)
18737 enum tls_reloc reloc;
18738 rtx val;
18740 val = XVECEXP (x, 0, 0);
18741 reloc = INTVAL (XVECEXP (x, 0, 1));
18743 output_addr_const (fp, val);
18745 switch (reloc)
18747 case TLS_GD32:
18748 fputs ("(tlsgd)", fp);
18749 break;
18750 case TLS_LDM32:
18751 fputs ("(tlsldm)", fp);
18752 break;
18753 case TLS_LDO32:
18754 fputs ("(tlsldo)", fp);
18755 break;
18756 case TLS_IE32:
18757 fputs ("(gottpoff)", fp);
18758 break;
18759 case TLS_LE32:
18760 fputs ("(tpoff)", fp);
18761 break;
18762 default:
18763 gcc_unreachable ();
18766 switch (reloc)
18768 case TLS_GD32:
18769 case TLS_LDM32:
18770 case TLS_IE32:
18771 fputs (" + (. - ", fp);
18772 output_addr_const (fp, XVECEXP (x, 0, 2));
18773 fputs (" - ", fp);
18774 output_addr_const (fp, XVECEXP (x, 0, 3));
18775 fputc (')', fp);
18776 break;
18777 default:
18778 break;
18781 return TRUE;
18784 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18786 static void
18787 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
18789 gcc_assert (size == 4);
18790 fputs ("\t.word\t", file);
18791 output_addr_const (file, x);
18792 fputs ("(tlsldo)", file);
18795 bool
18796 arm_output_addr_const_extra (FILE *fp, rtx x)
18798 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
18799 return arm_emit_tls_decoration (fp, x);
18800 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
18802 char label[256];
18803 int labelno = INTVAL (XVECEXP (x, 0, 0));
18805 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
18806 assemble_name_raw (fp, label);
18808 return TRUE;
18810 else if (GET_CODE (x) == CONST_VECTOR)
18811 return arm_emit_vector_const (fp, x);
18813 return FALSE;
18816 /* Output assembly for a shift instruction.
18817 SET_FLAGS determines how the instruction modifies the condition codes.
18818 0 - Do not set condition codes.
18819 1 - Set condition codes.
18820 2 - Use smallest instruction. */
18821 const char *
18822 arm_output_shift(rtx * operands, int set_flags)
18824 char pattern[100];
18825 static const char flag_chars[3] = {'?', '.', '!'};
18826 const char *shift;
18827 HOST_WIDE_INT val;
18828 char c;
18830 c = flag_chars[set_flags];
18831 if (TARGET_UNIFIED_ASM)
18833 shift = shift_op(operands[3], &val);
18834 if (shift)
18836 if (val != -1)
18837 operands[2] = GEN_INT(val);
18838 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
18840 else
18841 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
18843 else
18844 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
18845 output_asm_insn (pattern, operands);
18846 return "";
18849 /* Output a Thumb-2 casesi instruction. */
18850 const char *
18851 thumb2_output_casesi (rtx *operands)
18853 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
18855 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
18857 output_asm_insn ("cmp\t%0, %1", operands);
18858 output_asm_insn ("bhi\t%l3", operands);
18859 switch (GET_MODE(diff_vec))
18861 case QImode:
18862 return "tbb\t[%|pc, %0]";
18863 case HImode:
18864 return "tbh\t[%|pc, %0, lsl #1]";
18865 case SImode:
18866 if (flag_pic)
18868 output_asm_insn ("adr\t%4, %l2", operands);
18869 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
18870 output_asm_insn ("add\t%4, %4, %5", operands);
18871 return "bx\t%4";
18873 else
18875 output_asm_insn ("adr\t%4, %l2", operands);
18876 return "ldr\t%|pc, [%4, %0, lsl #2]";
18878 default:
18879 gcc_unreachable ();
18883 /* Most ARM cores are single issue, but some newer ones can dual issue.
18884 The scheduler descriptions rely on this being correct. */
18885 static int
18886 arm_issue_rate (void)
18888 switch (arm_tune)
18890 case cortexr4:
18891 case cortexa8:
18892 return 2;
18894 default:
18895 return 1;
18899 /* A table and a function to perform ARM-specific name mangling for
18900 NEON vector types in order to conform to the AAPCS (see "Procedure
18901 Call Standard for the ARM Architecture", Appendix A). To qualify
18902 for emission with the mangled names defined in that document, a
18903 vector type must not only be of the correct mode but also be
18904 composed of NEON vector element types (e.g. __builtin_neon_qi). */
18905 typedef struct
18907 enum machine_mode mode;
18908 const char *element_type_name;
18909 const char *aapcs_name;
18910 } arm_mangle_map_entry;
18912 static arm_mangle_map_entry arm_mangle_map[] = {
18913 /* 64-bit containerized types. */
18914 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
18915 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
18916 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
18917 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
18918 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
18919 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
18920 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
18921 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
18922 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
18923 /* 128-bit containerized types. */
18924 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
18925 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
18926 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
18927 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
18928 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
18929 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
18930 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
18931 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
18932 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
18933 { VOIDmode, NULL, NULL }
18936 const char *
18937 arm_mangle_type (const_tree type)
18939 arm_mangle_map_entry *pos = arm_mangle_map;
18941 if (TREE_CODE (type) != VECTOR_TYPE)
18942 return NULL;
18944 /* Check the mode of the vector type, and the name of the vector
18945 element type, against the table. */
18946 while (pos->mode != VOIDmode)
18948 tree elt_type = TREE_TYPE (type);
18950 if (pos->mode == TYPE_MODE (type)
18951 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
18952 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
18953 pos->element_type_name))
18954 return pos->aapcs_name;
18956 pos++;
18959 /* Use the default mangling for unrecognized (possibly user-defined)
18960 vector types. */
18961 return NULL;
18964 #include "gt-arm.h"