* genopinit.c: Include tm_p.h in generated file.
[official-gcc.git] / gcc / config / arm / arm.c
blob5b3b7035c15d8fe68b97a89495bbcb3be0ec74a7
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 3, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "obstack.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "real.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "recog.h"
45 #include "ggc.h"
46 #include "except.h"
47 #include "c-pragma.h"
48 #include "integrate.h"
49 #include "tm_p.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "debug.h"
53 #include "langhooks.h"
54 #include "df.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 void (*arm_lang_output_object_attributes_hook)(void);
64 /* Forward function declarations. */
65 static arm_stack_offsets *arm_get_frame_offsets (void);
66 static void arm_add_gc_roots (void);
67 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
68 HOST_WIDE_INT, rtx, rtx, int, int);
69 static unsigned bit_count (unsigned long);
70 static int arm_address_register_rtx_p (rtx, int);
71 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
72 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
73 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
74 inline static int thumb1_index_register_rtx_p (rtx, int);
75 static int thumb_far_jump_used_p (void);
76 static bool thumb_force_lr_save (void);
77 static unsigned long thumb1_compute_save_reg_mask (void);
78 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
79 static rtx emit_sfm (int, int);
80 static unsigned arm_size_return_regs (void);
81 static bool arm_assemble_integer (rtx, unsigned int, int);
82 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
83 static arm_cc get_arm_condition_code (rtx);
84 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
85 static rtx is_jump_table (rtx);
86 static const char *output_multi_immediate (rtx *, const char *, const char *,
87 int, HOST_WIDE_INT);
88 static const char *shift_op (rtx, HOST_WIDE_INT *);
89 static struct machine_function *arm_init_machine_status (void);
90 static void thumb_exit (FILE *, int);
91 static rtx is_jump_table (rtx);
92 static HOST_WIDE_INT get_jump_table_size (rtx);
93 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
94 static Mnode *add_minipool_forward_ref (Mfix *);
95 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
96 static Mnode *add_minipool_backward_ref (Mfix *);
97 static void assign_minipool_offsets (Mfix *);
98 static void arm_print_value (FILE *, rtx);
99 static void dump_minipool (rtx);
100 static int arm_barrier_cost (rtx);
101 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
102 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
103 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
104 rtx);
105 static void arm_reorg (void);
106 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
107 static unsigned long arm_compute_save_reg0_reg12_mask (void);
108 static unsigned long arm_compute_save_reg_mask (void);
109 static unsigned long arm_isr_value (tree);
110 static unsigned long arm_compute_func_type (void);
111 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
112 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
113 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
114 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
115 #endif
116 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
117 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
119 static int arm_comp_type_attributes (const_tree, const_tree);
120 static void arm_set_default_type_attributes (tree);
121 static int arm_adjust_cost (rtx, rtx, rtx, int);
122 static int count_insns_for_constant (HOST_WIDE_INT, int);
123 static int arm_get_strip_length (int);
124 static bool arm_function_ok_for_sibcall (tree, tree);
125 static void arm_internal_label (FILE *, const char *, unsigned long);
126 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
127 tree);
128 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
129 static bool arm_size_rtx_costs (rtx, int, int, int *);
130 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
131 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
132 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
133 static bool arm_9e_rtx_costs (rtx, int, int, int *);
134 static int arm_address_cost (rtx);
135 static bool arm_memory_load_p (rtx);
136 static bool arm_cirrus_insn_p (rtx);
137 static void cirrus_reorg (rtx);
138 static void arm_init_builtins (void);
139 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
140 static void arm_init_iwmmxt_builtins (void);
141 static rtx safe_vector_operand (rtx, enum machine_mode);
142 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
143 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
144 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
145 static void emit_constant_insn (rtx cond, rtx pattern);
146 static rtx emit_set_insn (rtx, rtx);
147 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
148 tree, bool);
150 #ifdef OBJECT_FORMAT_ELF
151 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
152 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
153 #endif
154 #ifndef ARM_PE
155 static void arm_encode_section_info (tree, rtx, int);
156 #endif
158 static void arm_file_end (void);
159 static void arm_file_start (void);
161 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
162 tree, int *, int);
163 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
164 enum machine_mode, const_tree, bool);
165 static bool arm_promote_prototypes (const_tree);
166 static bool arm_default_short_enums (void);
167 static bool arm_align_anon_bitfield (void);
168 static bool arm_return_in_msb (const_tree);
169 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
170 #ifdef TARGET_UNWIND_INFO
171 static void arm_unwind_emit (FILE *, rtx);
172 static bool arm_output_ttype (rtx);
173 #endif
174 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
176 static tree arm_cxx_guard_type (void);
177 static bool arm_cxx_guard_mask_bit (void);
178 static tree arm_get_cookie_size (tree);
179 static bool arm_cookie_has_size (void);
180 static bool arm_cxx_cdtor_returns_this (void);
181 static bool arm_cxx_key_method_may_be_inline (void);
182 static void arm_cxx_determine_class_data_visibility (tree);
183 static bool arm_cxx_class_data_always_comdat (void);
184 static bool arm_cxx_use_aeabi_atexit (void);
185 static void arm_init_libfuncs (void);
186 static bool arm_handle_option (size_t, const char *, int);
187 static void arm_target_help (void);
188 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
189 static bool arm_cannot_copy_insn_p (rtx);
190 static bool arm_tls_symbol_p (rtx x);
191 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
194 /* Initialize the GCC target structure. */
195 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
196 #undef TARGET_MERGE_DECL_ATTRIBUTES
197 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
198 #endif
200 #undef TARGET_ATTRIBUTE_TABLE
201 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
203 #undef TARGET_ASM_FILE_START
204 #define TARGET_ASM_FILE_START arm_file_start
205 #undef TARGET_ASM_FILE_END
206 #define TARGET_ASM_FILE_END arm_file_end
208 #undef TARGET_ASM_ALIGNED_SI_OP
209 #define TARGET_ASM_ALIGNED_SI_OP NULL
210 #undef TARGET_ASM_INTEGER
211 #define TARGET_ASM_INTEGER arm_assemble_integer
213 #undef TARGET_ASM_FUNCTION_PROLOGUE
214 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
216 #undef TARGET_ASM_FUNCTION_EPILOGUE
217 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
219 #undef TARGET_DEFAULT_TARGET_FLAGS
220 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
221 #undef TARGET_HANDLE_OPTION
222 #define TARGET_HANDLE_OPTION arm_handle_option
223 #undef TARGET_HELP
224 #define TARGET_HELP arm_target_help
226 #undef TARGET_COMP_TYPE_ATTRIBUTES
227 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
229 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
230 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
232 #undef TARGET_SCHED_ADJUST_COST
233 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
235 #undef TARGET_ENCODE_SECTION_INFO
236 #ifdef ARM_PE
237 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
238 #else
239 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
240 #endif
242 #undef TARGET_STRIP_NAME_ENCODING
243 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
245 #undef TARGET_ASM_INTERNAL_LABEL
246 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
248 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
249 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
251 #undef TARGET_ASM_OUTPUT_MI_THUNK
252 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
253 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
254 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
256 /* This will be overridden in arm_override_options. */
257 #undef TARGET_RTX_COSTS
258 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
259 #undef TARGET_ADDRESS_COST
260 #define TARGET_ADDRESS_COST arm_address_cost
262 #undef TARGET_SHIFT_TRUNCATION_MASK
263 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
264 #undef TARGET_VECTOR_MODE_SUPPORTED_P
265 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
267 #undef TARGET_MACHINE_DEPENDENT_REORG
268 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
270 #undef TARGET_INIT_BUILTINS
271 #define TARGET_INIT_BUILTINS arm_init_builtins
272 #undef TARGET_EXPAND_BUILTIN
273 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
275 #undef TARGET_INIT_LIBFUNCS
276 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
278 #undef TARGET_PROMOTE_FUNCTION_ARGS
279 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
280 #undef TARGET_PROMOTE_FUNCTION_RETURN
281 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
282 #undef TARGET_PROMOTE_PROTOTYPES
283 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
284 #undef TARGET_PASS_BY_REFERENCE
285 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
286 #undef TARGET_ARG_PARTIAL_BYTES
287 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
289 #undef TARGET_SETUP_INCOMING_VARARGS
290 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
292 #undef TARGET_DEFAULT_SHORT_ENUMS
293 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
295 #undef TARGET_ALIGN_ANON_BITFIELD
296 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
298 #undef TARGET_NARROW_VOLATILE_BITFIELD
299 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
301 #undef TARGET_CXX_GUARD_TYPE
302 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
304 #undef TARGET_CXX_GUARD_MASK_BIT
305 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
307 #undef TARGET_CXX_GET_COOKIE_SIZE
308 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
310 #undef TARGET_CXX_COOKIE_HAS_SIZE
311 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
313 #undef TARGET_CXX_CDTOR_RETURNS_THIS
314 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
316 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
317 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
319 #undef TARGET_CXX_USE_AEABI_ATEXIT
320 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
322 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
323 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
324 arm_cxx_determine_class_data_visibility
326 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
327 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
329 #undef TARGET_RETURN_IN_MSB
330 #define TARGET_RETURN_IN_MSB arm_return_in_msb
332 #undef TARGET_MUST_PASS_IN_STACK
333 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
335 #ifdef TARGET_UNWIND_INFO
336 #undef TARGET_UNWIND_EMIT
337 #define TARGET_UNWIND_EMIT arm_unwind_emit
339 /* EABI unwinding tables use a different format for the typeinfo tables. */
340 #undef TARGET_ASM_TTYPE
341 #define TARGET_ASM_TTYPE arm_output_ttype
343 #undef TARGET_ARM_EABI_UNWINDER
344 #define TARGET_ARM_EABI_UNWINDER true
345 #endif /* TARGET_UNWIND_INFO */
347 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
348 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
350 #undef TARGET_CANNOT_COPY_INSN_P
351 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
353 #ifdef HAVE_AS_TLS
354 #undef TARGET_HAVE_TLS
355 #define TARGET_HAVE_TLS true
356 #endif
358 #undef TARGET_CANNOT_FORCE_CONST_MEM
359 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
361 #undef TARGET_MANGLE_TYPE
362 #define TARGET_MANGLE_TYPE arm_mangle_type
364 #ifdef HAVE_AS_TLS
365 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
366 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
367 #endif
369 struct gcc_target targetm = TARGET_INITIALIZER;
371 /* Obstack for minipool constant handling. */
372 static struct obstack minipool_obstack;
373 static char * minipool_startobj;
375 /* The maximum number of insns skipped which
376 will be conditionalised if possible. */
377 static int max_insns_skipped = 5;
379 extern FILE * asm_out_file;
381 /* True if we are currently building a constant table. */
382 int making_const_table;
384 /* Define the information needed to generate branch insns. This is
385 stored from the compare operation. */
386 rtx arm_compare_op0, arm_compare_op1;
388 /* The processor for which instructions should be scheduled. */
389 enum processor_type arm_tune = arm_none;
391 /* The default processor used if not overridden by commandline. */
392 static enum processor_type arm_default_cpu = arm_none;
394 /* Which floating point model to use. */
395 enum arm_fp_model arm_fp_model;
397 /* Which floating point hardware is available. */
398 enum fputype arm_fpu_arch;
400 /* Which floating point hardware to schedule for. */
401 enum fputype arm_fpu_tune;
403 /* Whether to use floating point hardware. */
404 enum float_abi_type arm_float_abi;
406 /* Which ABI to use. */
407 enum arm_abi_type arm_abi;
409 /* Which thread pointer model to use. */
410 enum arm_tp_type target_thread_pointer = TP_AUTO;
412 /* Used to parse -mstructure_size_boundary command line option. */
413 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
415 /* Used for Thumb call_via trampolines. */
416 rtx thumb_call_via_label[14];
417 static int thumb_call_reg_needed;
419 /* Bit values used to identify processor capabilities. */
420 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
421 #define FL_ARCH3M (1 << 1) /* Extended multiply */
422 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
423 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
424 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
425 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
426 #define FL_THUMB (1 << 6) /* Thumb aware */
427 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
428 #define FL_STRONG (1 << 8) /* StrongARM */
429 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
430 #define FL_XSCALE (1 << 10) /* XScale */
431 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
432 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
433 media instructions. */
434 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
435 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
436 Note: ARM6 & 7 derivatives only. */
437 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
438 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
439 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
440 profile. */
441 #define FL_DIV (1 << 18) /* Hardware divide. */
442 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
443 #define FL_NEON (1 << 20) /* Neon instructions. */
445 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
447 #define FL_FOR_ARCH2 FL_NOTM
448 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
449 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
450 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
451 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
452 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
453 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
454 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
455 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
456 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
457 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
458 #define FL_FOR_ARCH6J FL_FOR_ARCH6
459 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
460 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
461 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
462 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
463 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
464 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
465 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
466 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
468 /* The bits in this mask specify which
469 instructions we are allowed to generate. */
470 static unsigned long insn_flags = 0;
472 /* The bits in this mask specify which instruction scheduling options should
473 be used. */
474 static unsigned long tune_flags = 0;
476 /* The following are used in the arm.md file as equivalents to bits
477 in the above two flag variables. */
479 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
480 int arm_arch3m = 0;
482 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
483 int arm_arch4 = 0;
485 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
486 int arm_arch4t = 0;
488 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
489 int arm_arch5 = 0;
491 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
492 int arm_arch5e = 0;
494 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
495 int arm_arch6 = 0;
497 /* Nonzero if this chip supports the ARM 6K extensions. */
498 int arm_arch6k = 0;
500 /* Nonzero if instructions not present in the 'M' profile can be used. */
501 int arm_arch_notm = 0;
503 /* Nonzero if this chip can benefit from load scheduling. */
504 int arm_ld_sched = 0;
506 /* Nonzero if this chip is a StrongARM. */
507 int arm_tune_strongarm = 0;
509 /* Nonzero if this chip is a Cirrus variant. */
510 int arm_arch_cirrus = 0;
512 /* Nonzero if this chip supports Intel Wireless MMX technology. */
513 int arm_arch_iwmmxt = 0;
515 /* Nonzero if this chip is an XScale. */
516 int arm_arch_xscale = 0;
518 /* Nonzero if tuning for XScale */
519 int arm_tune_xscale = 0;
521 /* Nonzero if we want to tune for stores that access the write-buffer.
522 This typically means an ARM6 or ARM7 with MMU or MPU. */
523 int arm_tune_wbuf = 0;
525 /* Nonzero if generating Thumb instructions. */
526 int thumb_code = 0;
528 /* Nonzero if we should define __THUMB_INTERWORK__ in the
529 preprocessor.
530 XXX This is a bit of a hack, it's intended to help work around
531 problems in GLD which doesn't understand that armv5t code is
532 interworking clean. */
533 int arm_cpp_interwork = 0;
535 /* Nonzero if chip supports Thumb 2. */
536 int arm_arch_thumb2;
538 /* Nonzero if chip supports integer division instruction. */
539 int arm_arch_hwdiv;
541 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
542 must report the mode of the memory reference from PRINT_OPERAND to
543 PRINT_OPERAND_ADDRESS. */
544 enum machine_mode output_memory_reference_mode;
546 /* The register number to be used for the PIC offset register. */
547 unsigned arm_pic_register = INVALID_REGNUM;
549 /* Set to 1 when a return insn is output, this means that the epilogue
550 is not needed. */
551 int return_used_this_function;
553 /* Set to 1 after arm_reorg has started. Reset to start at the start of
554 the next function. */
555 static int after_arm_reorg = 0;
557 /* The maximum number of insns to be used when loading a constant. */
558 static int arm_constant_limit = 3;
560 /* For an explanation of these variables, see final_prescan_insn below. */
561 int arm_ccfsm_state;
562 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
563 enum arm_cond_code arm_current_cc;
564 rtx arm_target_insn;
565 int arm_target_label;
566 /* The number of conditionally executed insns, including the current insn. */
567 int arm_condexec_count = 0;
568 /* A bitmask specifying the patterns for the IT block.
569 Zero means do not output an IT block before this insn. */
570 int arm_condexec_mask = 0;
571 /* The number of bits used in arm_condexec_mask. */
572 int arm_condexec_masklen = 0;
574 /* The condition codes of the ARM, and the inverse function. */
575 static const char * const arm_condition_codes[] =
577 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
578 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
581 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
582 #define streq(string1, string2) (strcmp (string1, string2) == 0)
584 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
585 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
586 | (1 << PIC_OFFSET_TABLE_REGNUM)))
588 /* Initialization code. */
590 struct processors
592 const char *const name;
593 enum processor_type core;
594 const char *arch;
595 const unsigned long flags;
596 bool (* rtx_costs) (rtx, int, int, int *);
599 /* Not all of these give usefully different compilation alternatives,
600 but there is no simple way of generalizing them. */
601 static const struct processors all_cores[] =
603 /* ARM Cores */
604 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
605 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
606 #include "arm-cores.def"
607 #undef ARM_CORE
608 {NULL, arm_none, NULL, 0, NULL}
611 static const struct processors all_architectures[] =
613 /* ARM Architectures */
614 /* We don't specify rtx_costs here as it will be figured out
615 from the core. */
617 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
618 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
619 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
620 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
621 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
622 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
623 implementations that support it, so we will leave it out for now. */
624 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
625 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
626 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
627 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
628 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
629 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
630 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
631 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
632 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
633 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
634 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
635 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
636 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
637 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
638 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
639 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
640 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
641 {NULL, arm_none, NULL, 0 , NULL}
644 struct arm_cpu_select
646 const char * string;
647 const char * name;
648 const struct processors * processors;
651 /* This is a magic structure. The 'string' field is magically filled in
652 with a pointer to the value specified by the user on the command line
653 assuming that the user has specified such a value. */
655 static struct arm_cpu_select arm_select[] =
657 /* string name processors */
658 { NULL, "-mcpu=", all_cores },
659 { NULL, "-march=", all_architectures },
660 { NULL, "-mtune=", all_cores }
663 /* Defines representing the indexes into the above table. */
664 #define ARM_OPT_SET_CPU 0
665 #define ARM_OPT_SET_ARCH 1
666 #define ARM_OPT_SET_TUNE 2
668 /* The name of the preprocessor macro to define for this architecture. */
670 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
672 struct fpu_desc
674 const char * name;
675 enum fputype fpu;
679 /* Available values for -mfpu=. */
681 static const struct fpu_desc all_fpus[] =
683 {"fpa", FPUTYPE_FPA},
684 {"fpe2", FPUTYPE_FPA_EMU2},
685 {"fpe3", FPUTYPE_FPA_EMU2},
686 {"maverick", FPUTYPE_MAVERICK},
687 {"vfp", FPUTYPE_VFP},
688 {"vfp3", FPUTYPE_VFP3},
689 {"neon", FPUTYPE_NEON}
693 /* Floating point models used by the different hardware.
694 See fputype in arm.h. */
696 static const enum fputype fp_model_for_fpu[] =
698 /* No FP hardware. */
699 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
700 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
701 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
702 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
703 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
704 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
705 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
706 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
710 struct float_abi
712 const char * name;
713 enum float_abi_type abi_type;
717 /* Available values for -mfloat-abi=. */
719 static const struct float_abi all_float_abis[] =
721 {"soft", ARM_FLOAT_ABI_SOFT},
722 {"softfp", ARM_FLOAT_ABI_SOFTFP},
723 {"hard", ARM_FLOAT_ABI_HARD}
727 struct abi_name
729 const char *name;
730 enum arm_abi_type abi_type;
734 /* Available values for -mabi=. */
736 static const struct abi_name arm_all_abis[] =
738 {"apcs-gnu", ARM_ABI_APCS},
739 {"atpcs", ARM_ABI_ATPCS},
740 {"aapcs", ARM_ABI_AAPCS},
741 {"iwmmxt", ARM_ABI_IWMMXT},
742 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
745 /* Supported TLS relocations. */
747 enum tls_reloc {
748 TLS_GD32,
749 TLS_LDM32,
750 TLS_LDO32,
751 TLS_IE32,
752 TLS_LE32
755 /* Emit an insn that's a simple single-set. Both the operands must be known
756 to be valid. */
757 inline static rtx
758 emit_set_insn (rtx x, rtx y)
760 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
763 /* Return the number of bits set in VALUE. */
764 static unsigned
765 bit_count (unsigned long value)
767 unsigned long count = 0;
769 while (value)
771 count++;
772 value &= value - 1; /* Clear the least-significant set bit. */
775 return count;
778 /* Set up library functions unique to ARM. */
780 static void
781 arm_init_libfuncs (void)
783 /* There are no special library functions unless we are using the
784 ARM BPABI. */
785 if (!TARGET_BPABI)
786 return;
788 /* The functions below are described in Section 4 of the "Run-Time
789 ABI for the ARM architecture", Version 1.0. */
791 /* Double-precision floating-point arithmetic. Table 2. */
792 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
793 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
794 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
795 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
796 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
798 /* Double-precision comparisons. Table 3. */
799 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
800 set_optab_libfunc (ne_optab, DFmode, NULL);
801 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
802 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
803 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
804 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
805 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
807 /* Single-precision floating-point arithmetic. Table 4. */
808 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
809 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
810 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
811 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
812 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
814 /* Single-precision comparisons. Table 5. */
815 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
816 set_optab_libfunc (ne_optab, SFmode, NULL);
817 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
818 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
819 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
820 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
821 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
823 /* Floating-point to integer conversions. Table 6. */
824 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
825 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
826 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
827 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
828 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
829 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
830 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
831 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
833 /* Conversions between floating types. Table 7. */
834 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
835 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
837 /* Integer to floating-point conversions. Table 8. */
838 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
839 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
840 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
841 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
842 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
843 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
844 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
845 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
847 /* Long long. Table 9. */
848 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
849 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
850 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
851 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
852 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
853 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
854 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
855 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
857 /* Integer (32/32->32) division. \S 4.3.1. */
858 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
859 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
861 /* The divmod functions are designed so that they can be used for
862 plain division, even though they return both the quotient and the
863 remainder. The quotient is returned in the usual location (i.e.,
864 r0 for SImode, {r0, r1} for DImode), just as would be expected
865 for an ordinary division routine. Because the AAPCS calling
866 conventions specify that all of { r0, r1, r2, r3 } are
867 callee-saved registers, there is no need to tell the compiler
868 explicitly that those registers are clobbered by these
869 routines. */
870 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
871 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
873 /* For SImode division the ABI provides div-without-mod routines,
874 which are faster. */
875 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
876 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
878 /* We don't have mod libcalls. Fortunately gcc knows how to use the
879 divmod libcalls instead. */
880 set_optab_libfunc (smod_optab, DImode, NULL);
881 set_optab_libfunc (umod_optab, DImode, NULL);
882 set_optab_libfunc (smod_optab, SImode, NULL);
883 set_optab_libfunc (umod_optab, SImode, NULL);
886 /* Implement TARGET_HANDLE_OPTION. */
888 static bool
889 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
891 switch (code)
893 case OPT_march_:
894 arm_select[1].string = arg;
895 return true;
897 case OPT_mcpu_:
898 arm_select[0].string = arg;
899 return true;
901 case OPT_mhard_float:
902 target_float_abi_name = "hard";
903 return true;
905 case OPT_msoft_float:
906 target_float_abi_name = "soft";
907 return true;
909 case OPT_mtune_:
910 arm_select[2].string = arg;
911 return true;
913 default:
914 return true;
918 static void
919 arm_target_help (void)
921 int i;
922 static int columns = 0;
923 int remaining;
925 /* If we have not done so already, obtain the desired maximum width of
926 the output. Note - this is a duplication of the code at the start of
927 gcc/opts.c:print_specific_help() - the two copies should probably be
928 replaced by a single function. */
929 if (columns == 0)
931 const char *p;
933 GET_ENVIRONMENT (p, "COLUMNS");
934 if (p != NULL)
936 int value = atoi (p);
938 if (value > 0)
939 columns = value;
942 if (columns == 0)
943 /* Use a reasonable default. */
944 columns = 80;
947 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
949 /* The - 2 is because we know that the last entry in the array is NULL. */
950 i = ARRAY_SIZE (all_cores) - 2;
951 gcc_assert (i > 0);
952 printf (" %s", all_cores[i].name);
953 remaining = columns - (strlen (all_cores[i].name) + 4);
954 gcc_assert (remaining >= 0);
956 while (i--)
958 int len = strlen (all_cores[i].name);
960 if (remaining > len + 2)
962 printf (", %s", all_cores[i].name);
963 remaining -= len + 2;
965 else
967 if (remaining > 0)
968 printf (",");
969 printf ("\n %s", all_cores[i].name);
970 remaining = columns - (len + 4);
974 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
976 i = ARRAY_SIZE (all_architectures) - 2;
977 gcc_assert (i > 0);
979 printf (" %s", all_architectures[i].name);
980 remaining = columns - (strlen (all_architectures[i].name) + 4);
981 gcc_assert (remaining >= 0);
983 while (i--)
985 int len = strlen (all_architectures[i].name);
987 if (remaining > len + 2)
989 printf (", %s", all_architectures[i].name);
990 remaining -= len + 2;
992 else
994 if (remaining > 0)
995 printf (",");
996 printf ("\n %s", all_architectures[i].name);
997 remaining = columns - (len + 4);
1000 printf ("\n");
1004 /* Fix up any incompatible options that the user has specified.
1005 This has now turned into a maze. */
1006 void
1007 arm_override_options (void)
1009 unsigned i;
1010 enum processor_type target_arch_cpu = arm_none;
1012 /* Set up the flags based on the cpu/architecture selected by the user. */
1013 for (i = ARRAY_SIZE (arm_select); i--;)
1015 struct arm_cpu_select * ptr = arm_select + i;
1017 if (ptr->string != NULL && ptr->string[0] != '\0')
1019 const struct processors * sel;
1021 for (sel = ptr->processors; sel->name != NULL; sel++)
1022 if (streq (ptr->string, sel->name))
1024 /* Set the architecture define. */
1025 if (i != ARM_OPT_SET_TUNE)
1026 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1028 /* Determine the processor core for which we should
1029 tune code-generation. */
1030 if (/* -mcpu= is a sensible default. */
1031 i == ARM_OPT_SET_CPU
1032 /* -mtune= overrides -mcpu= and -march=. */
1033 || i == ARM_OPT_SET_TUNE)
1034 arm_tune = (enum processor_type) (sel - ptr->processors);
1036 /* Remember the CPU associated with this architecture.
1037 If no other option is used to set the CPU type,
1038 we'll use this to guess the most suitable tuning
1039 options. */
1040 if (i == ARM_OPT_SET_ARCH)
1041 target_arch_cpu = sel->core;
1043 if (i != ARM_OPT_SET_TUNE)
1045 /* If we have been given an architecture and a processor
1046 make sure that they are compatible. We only generate
1047 a warning though, and we prefer the CPU over the
1048 architecture. */
1049 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1050 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1051 ptr->string);
1053 insn_flags = sel->flags;
1056 break;
1059 if (sel->name == NULL)
1060 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1064 /* Guess the tuning options from the architecture if necessary. */
1065 if (arm_tune == arm_none)
1066 arm_tune = target_arch_cpu;
1068 /* If the user did not specify a processor, choose one for them. */
1069 if (insn_flags == 0)
1071 const struct processors * sel;
1072 unsigned int sought;
1073 enum processor_type cpu;
1075 cpu = TARGET_CPU_DEFAULT;
1076 if (cpu == arm_none)
1078 #ifdef SUBTARGET_CPU_DEFAULT
1079 /* Use the subtarget default CPU if none was specified by
1080 configure. */
1081 cpu = SUBTARGET_CPU_DEFAULT;
1082 #endif
1083 /* Default to ARM6. */
1084 if (cpu == arm_none)
1085 cpu = arm6;
1087 sel = &all_cores[cpu];
1089 insn_flags = sel->flags;
1091 /* Now check to see if the user has specified some command line
1092 switch that require certain abilities from the cpu. */
1093 sought = 0;
1095 if (TARGET_INTERWORK || TARGET_THUMB)
1097 sought |= (FL_THUMB | FL_MODE32);
1099 /* There are no ARM processors that support both APCS-26 and
1100 interworking. Therefore we force FL_MODE26 to be removed
1101 from insn_flags here (if it was set), so that the search
1102 below will always be able to find a compatible processor. */
1103 insn_flags &= ~FL_MODE26;
1106 if (sought != 0 && ((sought & insn_flags) != sought))
1108 /* Try to locate a CPU type that supports all of the abilities
1109 of the default CPU, plus the extra abilities requested by
1110 the user. */
1111 for (sel = all_cores; sel->name != NULL; sel++)
1112 if ((sel->flags & sought) == (sought | insn_flags))
1113 break;
1115 if (sel->name == NULL)
1117 unsigned current_bit_count = 0;
1118 const struct processors * best_fit = NULL;
1120 /* Ideally we would like to issue an error message here
1121 saying that it was not possible to find a CPU compatible
1122 with the default CPU, but which also supports the command
1123 line options specified by the programmer, and so they
1124 ought to use the -mcpu=<name> command line option to
1125 override the default CPU type.
1127 If we cannot find a cpu that has both the
1128 characteristics of the default cpu and the given
1129 command line options we scan the array again looking
1130 for a best match. */
1131 for (sel = all_cores; sel->name != NULL; sel++)
1132 if ((sel->flags & sought) == sought)
1134 unsigned count;
1136 count = bit_count (sel->flags & insn_flags);
1138 if (count >= current_bit_count)
1140 best_fit = sel;
1141 current_bit_count = count;
1145 gcc_assert (best_fit);
1146 sel = best_fit;
1149 insn_flags = sel->flags;
1151 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1152 arm_default_cpu = (enum processor_type) (sel - all_cores);
1153 if (arm_tune == arm_none)
1154 arm_tune = arm_default_cpu;
1157 /* The processor for which we should tune should now have been
1158 chosen. */
1159 gcc_assert (arm_tune != arm_none);
1161 tune_flags = all_cores[(int)arm_tune].flags;
1162 if (optimize_size)
1163 targetm.rtx_costs = arm_size_rtx_costs;
1164 else
1165 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1167 /* Make sure that the processor choice does not conflict with any of the
1168 other command line choices. */
1169 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1170 error ("target CPU does not support ARM mode");
1172 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1174 warning (0, "target CPU does not support interworking" );
1175 target_flags &= ~MASK_INTERWORK;
1178 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1180 warning (0, "target CPU does not support THUMB instructions");
1181 target_flags &= ~MASK_THUMB;
1184 if (TARGET_APCS_FRAME && TARGET_THUMB)
1186 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1187 target_flags &= ~MASK_APCS_FRAME;
1190 /* Callee super interworking implies thumb interworking. Adding
1191 this to the flags here simplifies the logic elsewhere. */
1192 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1193 target_flags |= MASK_INTERWORK;
1195 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1196 from here where no function is being compiled currently. */
1197 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1198 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1200 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1201 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1203 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1204 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1206 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1208 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1209 target_flags |= MASK_APCS_FRAME;
1212 if (TARGET_POKE_FUNCTION_NAME)
1213 target_flags |= MASK_APCS_FRAME;
1215 if (TARGET_APCS_REENT && flag_pic)
1216 error ("-fpic and -mapcs-reent are incompatible");
1218 if (TARGET_APCS_REENT)
1219 warning (0, "APCS reentrant code not supported. Ignored");
1221 /* If this target is normally configured to use APCS frames, warn if they
1222 are turned off and debugging is turned on. */
1223 if (TARGET_ARM
1224 && write_symbols != NO_DEBUG
1225 && !TARGET_APCS_FRAME
1226 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1227 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1229 if (TARGET_APCS_FLOAT)
1230 warning (0, "passing floating point arguments in fp regs not yet supported");
1232 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1233 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1234 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1235 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1236 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1237 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1238 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1239 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1240 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1241 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1242 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1243 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1245 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1246 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1247 thumb_code = (TARGET_ARM == 0);
1248 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1249 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1250 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1251 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1253 /* V5 code we generate is completely interworking capable, so we turn off
1254 TARGET_INTERWORK here to avoid many tests later on. */
1256 /* XXX However, we must pass the right pre-processor defines to CPP
1257 or GLD can get confused. This is a hack. */
1258 if (TARGET_INTERWORK)
1259 arm_cpp_interwork = 1;
1261 if (arm_arch5)
1262 target_flags &= ~MASK_INTERWORK;
1264 if (target_abi_name)
1266 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1268 if (streq (arm_all_abis[i].name, target_abi_name))
1270 arm_abi = arm_all_abis[i].abi_type;
1271 break;
1274 if (i == ARRAY_SIZE (arm_all_abis))
1275 error ("invalid ABI option: -mabi=%s", target_abi_name);
1277 else
1278 arm_abi = ARM_DEFAULT_ABI;
1280 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1281 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1283 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1284 error ("iwmmxt abi requires an iwmmxt capable cpu");
1286 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1287 if (target_fpu_name == NULL && target_fpe_name != NULL)
1289 if (streq (target_fpe_name, "2"))
1290 target_fpu_name = "fpe2";
1291 else if (streq (target_fpe_name, "3"))
1292 target_fpu_name = "fpe3";
1293 else
1294 error ("invalid floating point emulation option: -mfpe=%s",
1295 target_fpe_name);
1297 if (target_fpu_name != NULL)
1299 /* The user specified a FPU. */
1300 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1302 if (streq (all_fpus[i].name, target_fpu_name))
1304 arm_fpu_arch = all_fpus[i].fpu;
1305 arm_fpu_tune = arm_fpu_arch;
1306 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1307 break;
1310 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1311 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1313 else
1315 #ifdef FPUTYPE_DEFAULT
1316 /* Use the default if it is specified for this platform. */
1317 arm_fpu_arch = FPUTYPE_DEFAULT;
1318 arm_fpu_tune = FPUTYPE_DEFAULT;
1319 #else
1320 /* Pick one based on CPU type. */
1321 /* ??? Some targets assume FPA is the default.
1322 if ((insn_flags & FL_VFP) != 0)
1323 arm_fpu_arch = FPUTYPE_VFP;
1324 else
1326 if (arm_arch_cirrus)
1327 arm_fpu_arch = FPUTYPE_MAVERICK;
1328 else
1329 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1330 #endif
1331 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1332 arm_fpu_tune = FPUTYPE_FPA;
1333 else
1334 arm_fpu_tune = arm_fpu_arch;
1335 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1336 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1339 if (target_float_abi_name != NULL)
1341 /* The user specified a FP ABI. */
1342 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1344 if (streq (all_float_abis[i].name, target_float_abi_name))
1346 arm_float_abi = all_float_abis[i].abi_type;
1347 break;
1350 if (i == ARRAY_SIZE (all_float_abis))
1351 error ("invalid floating point abi: -mfloat-abi=%s",
1352 target_float_abi_name);
1354 else
1355 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1357 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1358 sorry ("-mfloat-abi=hard and VFP");
1360 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1361 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1362 will ever exist. GCC makes no attempt to support this combination. */
1363 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1364 sorry ("iWMMXt and hardware floating point");
1366 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1367 if (TARGET_THUMB2 && TARGET_IWMMXT)
1368 sorry ("Thumb-2 iWMMXt");
1370 /* If soft-float is specified then don't use FPU. */
1371 if (TARGET_SOFT_FLOAT)
1372 arm_fpu_arch = FPUTYPE_NONE;
1374 /* For arm2/3 there is no need to do any scheduling if there is only
1375 a floating point emulator, or we are doing software floating-point. */
1376 if ((TARGET_SOFT_FLOAT
1377 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1378 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1379 && (tune_flags & FL_MODE32) == 0)
1380 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1382 if (target_thread_switch)
1384 if (strcmp (target_thread_switch, "soft") == 0)
1385 target_thread_pointer = TP_SOFT;
1386 else if (strcmp (target_thread_switch, "auto") == 0)
1387 target_thread_pointer = TP_AUTO;
1388 else if (strcmp (target_thread_switch, "cp15") == 0)
1389 target_thread_pointer = TP_CP15;
1390 else
1391 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1394 /* Use the cp15 method if it is available. */
1395 if (target_thread_pointer == TP_AUTO)
1397 if (arm_arch6k && !TARGET_THUMB)
1398 target_thread_pointer = TP_CP15;
1399 else
1400 target_thread_pointer = TP_SOFT;
1403 if (TARGET_HARD_TP && TARGET_THUMB1)
1404 error ("can not use -mtp=cp15 with 16-bit Thumb");
1406 /* Override the default structure alignment for AAPCS ABI. */
1407 if (TARGET_AAPCS_BASED)
1408 arm_structure_size_boundary = 8;
1410 if (structure_size_string != NULL)
1412 int size = strtol (structure_size_string, NULL, 0);
1414 if (size == 8 || size == 32
1415 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1416 arm_structure_size_boundary = size;
1417 else
1418 warning (0, "structure size boundary can only be set to %s",
1419 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1422 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1424 error ("RTP PIC is incompatible with Thumb");
1425 flag_pic = 0;
1428 /* If stack checking is disabled, we can use r10 as the PIC register,
1429 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1430 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1432 if (TARGET_VXWORKS_RTP)
1433 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1434 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1437 if (flag_pic && TARGET_VXWORKS_RTP)
1438 arm_pic_register = 9;
1440 if (arm_pic_register_string != NULL)
1442 int pic_register = decode_reg_name (arm_pic_register_string);
1444 if (!flag_pic)
1445 warning (0, "-mpic-register= is useless without -fpic");
1447 /* Prevent the user from choosing an obviously stupid PIC register. */
1448 else if (pic_register < 0 || call_used_regs[pic_register]
1449 || pic_register == HARD_FRAME_POINTER_REGNUM
1450 || pic_register == STACK_POINTER_REGNUM
1451 || pic_register >= PC_REGNUM
1452 || (TARGET_VXWORKS_RTP
1453 && (unsigned int) pic_register != arm_pic_register))
1454 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1455 else
1456 arm_pic_register = pic_register;
1459 /* ??? We might want scheduling for thumb2. */
1460 if (TARGET_THUMB && flag_schedule_insns)
1462 /* Don't warn since it's on by default in -O2. */
1463 flag_schedule_insns = 0;
1466 if (optimize_size)
1468 arm_constant_limit = 1;
1470 /* If optimizing for size, bump the number of instructions that we
1471 are prepared to conditionally execute (even on a StrongARM). */
1472 max_insns_skipped = 6;
1474 else
1476 /* For processors with load scheduling, it never costs more than
1477 2 cycles to load a constant, and the load scheduler may well
1478 reduce that to 1. */
1479 if (arm_ld_sched)
1480 arm_constant_limit = 1;
1482 /* On XScale the longer latency of a load makes it more difficult
1483 to achieve a good schedule, so it's faster to synthesize
1484 constants that can be done in two insns. */
1485 if (arm_tune_xscale)
1486 arm_constant_limit = 2;
1488 /* StrongARM has early execution of branches, so a sequence
1489 that is worth skipping is shorter. */
1490 if (arm_tune_strongarm)
1491 max_insns_skipped = 3;
1494 /* Register global variables with the garbage collector. */
1495 arm_add_gc_roots ();
1498 static void
1499 arm_add_gc_roots (void)
1501 gcc_obstack_init(&minipool_obstack);
1502 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1505 /* A table of known ARM exception types.
1506 For use with the interrupt function attribute. */
1508 typedef struct
1510 const char *const arg;
1511 const unsigned long return_value;
1513 isr_attribute_arg;
1515 static const isr_attribute_arg isr_attribute_args [] =
1517 { "IRQ", ARM_FT_ISR },
1518 { "irq", ARM_FT_ISR },
1519 { "FIQ", ARM_FT_FIQ },
1520 { "fiq", ARM_FT_FIQ },
1521 { "ABORT", ARM_FT_ISR },
1522 { "abort", ARM_FT_ISR },
1523 { "ABORT", ARM_FT_ISR },
1524 { "abort", ARM_FT_ISR },
1525 { "UNDEF", ARM_FT_EXCEPTION },
1526 { "undef", ARM_FT_EXCEPTION },
1527 { "SWI", ARM_FT_EXCEPTION },
1528 { "swi", ARM_FT_EXCEPTION },
1529 { NULL, ARM_FT_NORMAL }
1532 /* Returns the (interrupt) function type of the current
1533 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1535 static unsigned long
1536 arm_isr_value (tree argument)
1538 const isr_attribute_arg * ptr;
1539 const char * arg;
1541 if (!arm_arch_notm)
1542 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1544 /* No argument - default to IRQ. */
1545 if (argument == NULL_TREE)
1546 return ARM_FT_ISR;
1548 /* Get the value of the argument. */
1549 if (TREE_VALUE (argument) == NULL_TREE
1550 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1551 return ARM_FT_UNKNOWN;
1553 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1555 /* Check it against the list of known arguments. */
1556 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1557 if (streq (arg, ptr->arg))
1558 return ptr->return_value;
1560 /* An unrecognized interrupt type. */
1561 return ARM_FT_UNKNOWN;
1564 /* Computes the type of the current function. */
1566 static unsigned long
1567 arm_compute_func_type (void)
1569 unsigned long type = ARM_FT_UNKNOWN;
1570 tree a;
1571 tree attr;
1573 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1575 /* Decide if the current function is volatile. Such functions
1576 never return, and many memory cycles can be saved by not storing
1577 register values that will never be needed again. This optimization
1578 was added to speed up context switching in a kernel application. */
1579 if (optimize > 0
1580 && (TREE_NOTHROW (current_function_decl)
1581 || !(flag_unwind_tables
1582 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1583 && TREE_THIS_VOLATILE (current_function_decl))
1584 type |= ARM_FT_VOLATILE;
1586 if (cfun->static_chain_decl != NULL)
1587 type |= ARM_FT_NESTED;
1589 attr = DECL_ATTRIBUTES (current_function_decl);
1591 a = lookup_attribute ("naked", attr);
1592 if (a != NULL_TREE)
1593 type |= ARM_FT_NAKED;
1595 a = lookup_attribute ("isr", attr);
1596 if (a == NULL_TREE)
1597 a = lookup_attribute ("interrupt", attr);
1599 if (a == NULL_TREE)
1600 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1601 else
1602 type |= arm_isr_value (TREE_VALUE (a));
1604 return type;
1607 /* Returns the type of the current function. */
1609 unsigned long
1610 arm_current_func_type (void)
1612 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1613 cfun->machine->func_type = arm_compute_func_type ();
1615 return cfun->machine->func_type;
1618 /* Return 1 if it is possible to return using a single instruction.
1619 If SIBLING is non-null, this is a test for a return before a sibling
1620 call. SIBLING is the call insn, so we can examine its register usage. */
1623 use_return_insn (int iscond, rtx sibling)
1625 int regno;
1626 unsigned int func_type;
1627 unsigned long saved_int_regs;
1628 unsigned HOST_WIDE_INT stack_adjust;
1629 arm_stack_offsets *offsets;
1631 /* Never use a return instruction before reload has run. */
1632 if (!reload_completed)
1633 return 0;
1635 func_type = arm_current_func_type ();
1637 /* Naked, volatile and stack alignment functions need special
1638 consideration. */
1639 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1640 return 0;
1642 /* So do interrupt functions that use the frame pointer and Thumb
1643 interrupt functions. */
1644 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1645 return 0;
1647 offsets = arm_get_frame_offsets ();
1648 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1650 /* As do variadic functions. */
1651 if (current_function_pretend_args_size
1652 || cfun->machine->uses_anonymous_args
1653 /* Or if the function calls __builtin_eh_return () */
1654 || current_function_calls_eh_return
1655 /* Or if the function calls alloca */
1656 || current_function_calls_alloca
1657 /* Or if there is a stack adjustment. However, if the stack pointer
1658 is saved on the stack, we can use a pre-incrementing stack load. */
1659 || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1660 return 0;
1662 saved_int_regs = arm_compute_save_reg_mask ();
1664 /* Unfortunately, the insn
1666 ldmib sp, {..., sp, ...}
1668 triggers a bug on most SA-110 based devices, such that the stack
1669 pointer won't be correctly restored if the instruction takes a
1670 page fault. We work around this problem by popping r3 along with
1671 the other registers, since that is never slower than executing
1672 another instruction.
1674 We test for !arm_arch5 here, because code for any architecture
1675 less than this could potentially be run on one of the buggy
1676 chips. */
1677 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1679 /* Validate that r3 is a call-clobbered register (always true in
1680 the default abi) ... */
1681 if (!call_used_regs[3])
1682 return 0;
1684 /* ... that it isn't being used for a return value ... */
1685 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1686 return 0;
1688 /* ... or for a tail-call argument ... */
1689 if (sibling)
1691 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1693 if (find_regno_fusage (sibling, USE, 3))
1694 return 0;
1697 /* ... and that there are no call-saved registers in r0-r2
1698 (always true in the default ABI). */
1699 if (saved_int_regs & 0x7)
1700 return 0;
1703 /* Can't be done if interworking with Thumb, and any registers have been
1704 stacked. */
1705 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1706 return 0;
1708 /* On StrongARM, conditional returns are expensive if they aren't
1709 taken and multiple registers have been stacked. */
1710 if (iscond && arm_tune_strongarm)
1712 /* Conditional return when just the LR is stored is a simple
1713 conditional-load instruction, that's not expensive. */
1714 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1715 return 0;
1717 if (flag_pic
1718 && arm_pic_register != INVALID_REGNUM
1719 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1720 return 0;
1723 /* If there are saved registers but the LR isn't saved, then we need
1724 two instructions for the return. */
1725 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1726 return 0;
1728 /* Can't be done if any of the FPA regs are pushed,
1729 since this also requires an insn. */
1730 if (TARGET_HARD_FLOAT && TARGET_FPA)
1731 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1732 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1733 return 0;
1735 /* Likewise VFP regs. */
1736 if (TARGET_HARD_FLOAT && TARGET_VFP)
1737 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1738 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1739 return 0;
1741 if (TARGET_REALLY_IWMMXT)
1742 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1743 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1744 return 0;
1746 return 1;
1749 /* Return TRUE if int I is a valid immediate ARM constant. */
1752 const_ok_for_arm (HOST_WIDE_INT i)
1754 int lowbit;
1756 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1757 be all zero, or all one. */
1758 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1759 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1760 != ((~(unsigned HOST_WIDE_INT) 0)
1761 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1762 return FALSE;
1764 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1766 /* Fast return for 0 and small values. We must do this for zero, since
1767 the code below can't handle that one case. */
1768 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1769 return TRUE;
1771 /* Get the number of trailing zeros. */
1772 lowbit = ffs((int) i) - 1;
1774 /* Only even shifts are allowed in ARM mode so round down to the
1775 nearest even number. */
1776 if (TARGET_ARM)
1777 lowbit &= ~1;
1779 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1780 return TRUE;
1782 if (TARGET_ARM)
1784 /* Allow rotated constants in ARM mode. */
1785 if (lowbit <= 4
1786 && ((i & ~0xc000003f) == 0
1787 || (i & ~0xf000000f) == 0
1788 || (i & ~0xfc000003) == 0))
1789 return TRUE;
1791 else
1793 HOST_WIDE_INT v;
1795 /* Allow repeated pattern. */
1796 v = i & 0xff;
1797 v |= v << 16;
1798 if (i == v || i == (v | (v << 8)))
1799 return TRUE;
1802 return FALSE;
1805 /* Return true if I is a valid constant for the operation CODE. */
1806 static int
1807 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1809 if (const_ok_for_arm (i))
1810 return 1;
1812 switch (code)
1814 case PLUS:
1815 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1817 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1818 case XOR:
1819 case IOR:
1820 return 0;
1822 case AND:
1823 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1825 default:
1826 gcc_unreachable ();
1830 /* Emit a sequence of insns to handle a large constant.
1831 CODE is the code of the operation required, it can be any of SET, PLUS,
1832 IOR, AND, XOR, MINUS;
1833 MODE is the mode in which the operation is being performed;
1834 VAL is the integer to operate on;
1835 SOURCE is the other operand (a register, or a null-pointer for SET);
1836 SUBTARGETS means it is safe to create scratch registers if that will
1837 either produce a simpler sequence, or we will want to cse the values.
1838 Return value is the number of insns emitted. */
1840 /* ??? Tweak this for thumb2. */
1842 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1843 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1845 rtx cond;
1847 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1848 cond = COND_EXEC_TEST (PATTERN (insn));
1849 else
1850 cond = NULL_RTX;
1852 if (subtargets || code == SET
1853 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1854 && REGNO (target) != REGNO (source)))
1856 /* After arm_reorg has been called, we can't fix up expensive
1857 constants by pushing them into memory so we must synthesize
1858 them in-line, regardless of the cost. This is only likely to
1859 be more costly on chips that have load delay slots and we are
1860 compiling without running the scheduler (so no splitting
1861 occurred before the final instruction emission).
1863 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1865 if (!after_arm_reorg
1866 && !cond
1867 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1868 1, 0)
1869 > arm_constant_limit + (code != SET)))
1871 if (code == SET)
1873 /* Currently SET is the only monadic value for CODE, all
1874 the rest are diadic. */
1875 emit_set_insn (target, GEN_INT (val));
1876 return 1;
1878 else
1880 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1882 emit_set_insn (temp, GEN_INT (val));
1883 /* For MINUS, the value is subtracted from, since we never
1884 have subtraction of a constant. */
1885 if (code == MINUS)
1886 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1887 else
1888 emit_set_insn (target,
1889 gen_rtx_fmt_ee (code, mode, source, temp));
1890 return 2;
1895 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1899 /* Return the number of ARM instructions required to synthesize the given
1900 constant. */
1901 static int
1902 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1904 HOST_WIDE_INT temp1;
1905 int num_insns = 0;
1908 int end;
1910 if (i <= 0)
1911 i += 32;
1912 if (remainder & (3 << (i - 2)))
1914 end = i - 8;
1915 if (end < 0)
1916 end += 32;
1917 temp1 = remainder & ((0x0ff << end)
1918 | ((i < end) ? (0xff >> (32 - end)) : 0));
1919 remainder &= ~temp1;
1920 num_insns++;
1921 i -= 6;
1923 i -= 2;
1924 } while (remainder);
1925 return num_insns;
1928 /* Emit an instruction with the indicated PATTERN. If COND is
1929 non-NULL, conditionalize the execution of the instruction on COND
1930 being true. */
1932 static void
1933 emit_constant_insn (rtx cond, rtx pattern)
1935 if (cond)
1936 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1937 emit_insn (pattern);
1940 /* As above, but extra parameter GENERATE which, if clear, suppresses
1941 RTL generation. */
1942 /* ??? This needs more work for thumb2. */
1944 static int
1945 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1946 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1947 int generate)
1949 int can_invert = 0;
1950 int can_negate = 0;
1951 int can_negate_initial = 0;
1952 int can_shift = 0;
1953 int i;
1954 int num_bits_set = 0;
1955 int set_sign_bit_copies = 0;
1956 int clear_sign_bit_copies = 0;
1957 int clear_zero_bit_copies = 0;
1958 int set_zero_bit_copies = 0;
1959 int insns = 0;
1960 unsigned HOST_WIDE_INT temp1, temp2;
1961 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1963 /* Find out which operations are safe for a given CODE. Also do a quick
1964 check for degenerate cases; these can occur when DImode operations
1965 are split. */
1966 switch (code)
1968 case SET:
1969 can_invert = 1;
1970 can_shift = 1;
1971 can_negate = 1;
1972 break;
1974 case PLUS:
1975 can_negate = 1;
1976 can_negate_initial = 1;
1977 break;
1979 case IOR:
1980 if (remainder == 0xffffffff)
1982 if (generate)
1983 emit_constant_insn (cond,
1984 gen_rtx_SET (VOIDmode, target,
1985 GEN_INT (ARM_SIGN_EXTEND (val))));
1986 return 1;
1988 if (remainder == 0)
1990 if (reload_completed && rtx_equal_p (target, source))
1991 return 0;
1992 if (generate)
1993 emit_constant_insn (cond,
1994 gen_rtx_SET (VOIDmode, target, source));
1995 return 1;
1997 break;
1999 case AND:
2000 if (remainder == 0)
2002 if (generate)
2003 emit_constant_insn (cond,
2004 gen_rtx_SET (VOIDmode, target, const0_rtx));
2005 return 1;
2007 if (remainder == 0xffffffff)
2009 if (reload_completed && rtx_equal_p (target, source))
2010 return 0;
2011 if (generate)
2012 emit_constant_insn (cond,
2013 gen_rtx_SET (VOIDmode, target, source));
2014 return 1;
2016 can_invert = 1;
2017 break;
2019 case XOR:
2020 if (remainder == 0)
2022 if (reload_completed && rtx_equal_p (target, source))
2023 return 0;
2024 if (generate)
2025 emit_constant_insn (cond,
2026 gen_rtx_SET (VOIDmode, target, source));
2027 return 1;
2030 /* We don't know how to handle other cases yet. */
2031 gcc_assert (remainder == 0xffffffff);
2033 if (generate)
2034 emit_constant_insn (cond,
2035 gen_rtx_SET (VOIDmode, target,
2036 gen_rtx_NOT (mode, source)));
2037 return 1;
2039 case MINUS:
2040 /* We treat MINUS as (val - source), since (source - val) is always
2041 passed as (source + (-val)). */
2042 if (remainder == 0)
2044 if (generate)
2045 emit_constant_insn (cond,
2046 gen_rtx_SET (VOIDmode, target,
2047 gen_rtx_NEG (mode, source)));
2048 return 1;
2050 if (const_ok_for_arm (val))
2052 if (generate)
2053 emit_constant_insn (cond,
2054 gen_rtx_SET (VOIDmode, target,
2055 gen_rtx_MINUS (mode, GEN_INT (val),
2056 source)));
2057 return 1;
2059 can_negate = 1;
2061 break;
2063 default:
2064 gcc_unreachable ();
2067 /* If we can do it in one insn get out quickly. */
2068 if (const_ok_for_arm (val)
2069 || (can_negate_initial && const_ok_for_arm (-val))
2070 || (can_invert && const_ok_for_arm (~val)))
2072 if (generate)
2073 emit_constant_insn (cond,
2074 gen_rtx_SET (VOIDmode, target,
2075 (source
2076 ? gen_rtx_fmt_ee (code, mode, source,
2077 GEN_INT (val))
2078 : GEN_INT (val))));
2079 return 1;
2082 /* Calculate a few attributes that may be useful for specific
2083 optimizations. */
2084 for (i = 31; i >= 0; i--)
2086 if ((remainder & (1 << i)) == 0)
2087 clear_sign_bit_copies++;
2088 else
2089 break;
2092 for (i = 31; i >= 0; i--)
2094 if ((remainder & (1 << i)) != 0)
2095 set_sign_bit_copies++;
2096 else
2097 break;
2100 for (i = 0; i <= 31; i++)
2102 if ((remainder & (1 << i)) == 0)
2103 clear_zero_bit_copies++;
2104 else
2105 break;
2108 for (i = 0; i <= 31; i++)
2110 if ((remainder & (1 << i)) != 0)
2111 set_zero_bit_copies++;
2112 else
2113 break;
2116 switch (code)
2118 case SET:
2119 /* See if we can use movw. */
2120 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2122 if (generate)
2123 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2124 GEN_INT (val)));
2125 return 1;
2128 /* See if we can do this by sign_extending a constant that is known
2129 to be negative. This is a good, way of doing it, since the shift
2130 may well merge into a subsequent insn. */
2131 if (set_sign_bit_copies > 1)
2133 if (const_ok_for_arm
2134 (temp1 = ARM_SIGN_EXTEND (remainder
2135 << (set_sign_bit_copies - 1))))
2137 if (generate)
2139 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2140 emit_constant_insn (cond,
2141 gen_rtx_SET (VOIDmode, new_src,
2142 GEN_INT (temp1)));
2143 emit_constant_insn (cond,
2144 gen_ashrsi3 (target, new_src,
2145 GEN_INT (set_sign_bit_copies - 1)));
2147 return 2;
2149 /* For an inverted constant, we will need to set the low bits,
2150 these will be shifted out of harm's way. */
2151 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2152 if (const_ok_for_arm (~temp1))
2154 if (generate)
2156 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2157 emit_constant_insn (cond,
2158 gen_rtx_SET (VOIDmode, new_src,
2159 GEN_INT (temp1)));
2160 emit_constant_insn (cond,
2161 gen_ashrsi3 (target, new_src,
2162 GEN_INT (set_sign_bit_copies - 1)));
2164 return 2;
2168 /* See if we can calculate the value as the difference between two
2169 valid immediates. */
2170 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2172 int topshift = clear_sign_bit_copies & ~1;
2174 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2175 & (0xff000000 >> topshift));
2177 /* If temp1 is zero, then that means the 9 most significant
2178 bits of remainder were 1 and we've caused it to overflow.
2179 When topshift is 0 we don't need to do anything since we
2180 can borrow from 'bit 32'. */
2181 if (temp1 == 0 && topshift != 0)
2182 temp1 = 0x80000000 >> (topshift - 1);
2184 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2186 if (const_ok_for_arm (temp2))
2188 if (generate)
2190 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2191 emit_constant_insn (cond,
2192 gen_rtx_SET (VOIDmode, new_src,
2193 GEN_INT (temp1)));
2194 emit_constant_insn (cond,
2195 gen_addsi3 (target, new_src,
2196 GEN_INT (-temp2)));
2199 return 2;
2203 /* See if we can generate this by setting the bottom (or the top)
2204 16 bits, and then shifting these into the other half of the
2205 word. We only look for the simplest cases, to do more would cost
2206 too much. Be careful, however, not to generate this when the
2207 alternative would take fewer insns. */
2208 if (val & 0xffff0000)
2210 temp1 = remainder & 0xffff0000;
2211 temp2 = remainder & 0x0000ffff;
2213 /* Overlaps outside this range are best done using other methods. */
2214 for (i = 9; i < 24; i++)
2216 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2217 && !const_ok_for_arm (temp2))
2219 rtx new_src = (subtargets
2220 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2221 : target);
2222 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2223 source, subtargets, generate);
2224 source = new_src;
2225 if (generate)
2226 emit_constant_insn
2227 (cond,
2228 gen_rtx_SET
2229 (VOIDmode, target,
2230 gen_rtx_IOR (mode,
2231 gen_rtx_ASHIFT (mode, source,
2232 GEN_INT (i)),
2233 source)));
2234 return insns + 1;
2238 /* Don't duplicate cases already considered. */
2239 for (i = 17; i < 24; i++)
2241 if (((temp1 | (temp1 >> i)) == remainder)
2242 && !const_ok_for_arm (temp1))
2244 rtx new_src = (subtargets
2245 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2246 : target);
2247 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2248 source, subtargets, generate);
2249 source = new_src;
2250 if (generate)
2251 emit_constant_insn
2252 (cond,
2253 gen_rtx_SET (VOIDmode, target,
2254 gen_rtx_IOR
2255 (mode,
2256 gen_rtx_LSHIFTRT (mode, source,
2257 GEN_INT (i)),
2258 source)));
2259 return insns + 1;
2263 break;
2265 case IOR:
2266 case XOR:
2267 /* If we have IOR or XOR, and the constant can be loaded in a
2268 single instruction, and we can find a temporary to put it in,
2269 then this can be done in two instructions instead of 3-4. */
2270 if (subtargets
2271 /* TARGET can't be NULL if SUBTARGETS is 0 */
2272 || (reload_completed && !reg_mentioned_p (target, source)))
2274 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2276 if (generate)
2278 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2280 emit_constant_insn (cond,
2281 gen_rtx_SET (VOIDmode, sub,
2282 GEN_INT (val)));
2283 emit_constant_insn (cond,
2284 gen_rtx_SET (VOIDmode, target,
2285 gen_rtx_fmt_ee (code, mode,
2286 source, sub)));
2288 return 2;
2292 if (code == XOR)
2293 break;
2295 if (set_sign_bit_copies > 8
2296 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2298 if (generate)
2300 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2301 rtx shift = GEN_INT (set_sign_bit_copies);
2303 emit_constant_insn
2304 (cond,
2305 gen_rtx_SET (VOIDmode, sub,
2306 gen_rtx_NOT (mode,
2307 gen_rtx_ASHIFT (mode,
2308 source,
2309 shift))));
2310 emit_constant_insn
2311 (cond,
2312 gen_rtx_SET (VOIDmode, target,
2313 gen_rtx_NOT (mode,
2314 gen_rtx_LSHIFTRT (mode, sub,
2315 shift))));
2317 return 2;
2320 if (set_zero_bit_copies > 8
2321 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2323 if (generate)
2325 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2326 rtx shift = GEN_INT (set_zero_bit_copies);
2328 emit_constant_insn
2329 (cond,
2330 gen_rtx_SET (VOIDmode, sub,
2331 gen_rtx_NOT (mode,
2332 gen_rtx_LSHIFTRT (mode,
2333 source,
2334 shift))));
2335 emit_constant_insn
2336 (cond,
2337 gen_rtx_SET (VOIDmode, target,
2338 gen_rtx_NOT (mode,
2339 gen_rtx_ASHIFT (mode, sub,
2340 shift))));
2342 return 2;
2345 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2347 if (generate)
2349 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2350 emit_constant_insn (cond,
2351 gen_rtx_SET (VOIDmode, sub,
2352 gen_rtx_NOT (mode, source)));
2353 source = sub;
2354 if (subtargets)
2355 sub = gen_reg_rtx (mode);
2356 emit_constant_insn (cond,
2357 gen_rtx_SET (VOIDmode, sub,
2358 gen_rtx_AND (mode, source,
2359 GEN_INT (temp1))));
2360 emit_constant_insn (cond,
2361 gen_rtx_SET (VOIDmode, target,
2362 gen_rtx_NOT (mode, sub)));
2364 return 3;
2366 break;
2368 case AND:
2369 /* See if two shifts will do 2 or more insn's worth of work. */
2370 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2372 HOST_WIDE_INT shift_mask = ((0xffffffff
2373 << (32 - clear_sign_bit_copies))
2374 & 0xffffffff);
2376 if ((remainder | shift_mask) != 0xffffffff)
2378 if (generate)
2380 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2381 insns = arm_gen_constant (AND, mode, cond,
2382 remainder | shift_mask,
2383 new_src, source, subtargets, 1);
2384 source = new_src;
2386 else
2388 rtx targ = subtargets ? NULL_RTX : target;
2389 insns = arm_gen_constant (AND, mode, cond,
2390 remainder | shift_mask,
2391 targ, source, subtargets, 0);
2395 if (generate)
2397 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2398 rtx shift = GEN_INT (clear_sign_bit_copies);
2400 emit_insn (gen_ashlsi3 (new_src, source, shift));
2401 emit_insn (gen_lshrsi3 (target, new_src, shift));
2404 return insns + 2;
2407 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2409 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2411 if ((remainder | shift_mask) != 0xffffffff)
2413 if (generate)
2415 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2417 insns = arm_gen_constant (AND, mode, cond,
2418 remainder | shift_mask,
2419 new_src, source, subtargets, 1);
2420 source = new_src;
2422 else
2424 rtx targ = subtargets ? NULL_RTX : target;
2426 insns = arm_gen_constant (AND, mode, cond,
2427 remainder | shift_mask,
2428 targ, source, subtargets, 0);
2432 if (generate)
2434 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2435 rtx shift = GEN_INT (clear_zero_bit_copies);
2437 emit_insn (gen_lshrsi3 (new_src, source, shift));
2438 emit_insn (gen_ashlsi3 (target, new_src, shift));
2441 return insns + 2;
2444 break;
2446 default:
2447 break;
2450 for (i = 0; i < 32; i++)
2451 if (remainder & (1 << i))
2452 num_bits_set++;
2454 if (code == AND || (can_invert && num_bits_set > 16))
2455 remainder = (~remainder) & 0xffffffff;
2456 else if (code == PLUS && num_bits_set > 16)
2457 remainder = (-remainder) & 0xffffffff;
2458 else
2460 can_invert = 0;
2461 can_negate = 0;
2464 /* Now try and find a way of doing the job in either two or three
2465 instructions.
2466 We start by looking for the largest block of zeros that are aligned on
2467 a 2-bit boundary, we then fill up the temps, wrapping around to the
2468 top of the word when we drop off the bottom.
2469 In the worst case this code should produce no more than four insns.
2470 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2471 best place to start. */
2473 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2474 the same. */
2476 int best_start = 0;
2477 if (!TARGET_THUMB2)
2479 int best_consecutive_zeros = 0;
2481 for (i = 0; i < 32; i += 2)
2483 int consecutive_zeros = 0;
2485 if (!(remainder & (3 << i)))
2487 while ((i < 32) && !(remainder & (3 << i)))
2489 consecutive_zeros += 2;
2490 i += 2;
2492 if (consecutive_zeros > best_consecutive_zeros)
2494 best_consecutive_zeros = consecutive_zeros;
2495 best_start = i - consecutive_zeros;
2497 i -= 2;
2501 /* So long as it won't require any more insns to do so, it's
2502 desirable to emit a small constant (in bits 0...9) in the last
2503 insn. This way there is more chance that it can be combined with
2504 a later addressing insn to form a pre-indexed load or store
2505 operation. Consider:
2507 *((volatile int *)0xe0000100) = 1;
2508 *((volatile int *)0xe0000110) = 2;
2510 We want this to wind up as:
2512 mov rA, #0xe0000000
2513 mov rB, #1
2514 str rB, [rA, #0x100]
2515 mov rB, #2
2516 str rB, [rA, #0x110]
2518 rather than having to synthesize both large constants from scratch.
2520 Therefore, we calculate how many insns would be required to emit
2521 the constant starting from `best_start', and also starting from
2522 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2523 yield a shorter sequence, we may as well use zero. */
2524 if (best_start != 0
2525 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2526 && (count_insns_for_constant (remainder, 0) <=
2527 count_insns_for_constant (remainder, best_start)))
2528 best_start = 0;
2531 /* Now start emitting the insns. */
2532 i = best_start;
2535 int end;
2537 if (i <= 0)
2538 i += 32;
2539 if (remainder & (3 << (i - 2)))
2541 end = i - 8;
2542 if (end < 0)
2543 end += 32;
2544 temp1 = remainder & ((0x0ff << end)
2545 | ((i < end) ? (0xff >> (32 - end)) : 0));
2546 remainder &= ~temp1;
2548 if (generate)
2550 rtx new_src, temp1_rtx;
2552 if (code == SET || code == MINUS)
2554 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2555 if (can_invert && code != MINUS)
2556 temp1 = ~temp1;
2558 else
2560 if (remainder && subtargets)
2561 new_src = gen_reg_rtx (mode);
2562 else
2563 new_src = target;
2564 if (can_invert)
2565 temp1 = ~temp1;
2566 else if (can_negate)
2567 temp1 = -temp1;
2570 temp1 = trunc_int_for_mode (temp1, mode);
2571 temp1_rtx = GEN_INT (temp1);
2573 if (code == SET)
2575 else if (code == MINUS)
2576 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2577 else
2578 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2580 emit_constant_insn (cond,
2581 gen_rtx_SET (VOIDmode, new_src,
2582 temp1_rtx));
2583 source = new_src;
2586 if (code == SET)
2588 can_invert = 0;
2589 code = PLUS;
2591 else if (code == MINUS)
2592 code = PLUS;
2594 insns++;
2595 if (TARGET_ARM)
2596 i -= 6;
2597 else
2598 i -= 7;
2600 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2601 shifts. */
2602 if (TARGET_ARM)
2603 i -= 2;
2604 else
2605 i--;
2607 while (remainder);
2610 return insns;
2613 /* Canonicalize a comparison so that we are more likely to recognize it.
2614 This can be done for a few constant compares, where we can make the
2615 immediate value easier to load. */
2617 enum rtx_code
2618 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2619 rtx * op1)
2621 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2622 unsigned HOST_WIDE_INT maxval;
2623 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2625 switch (code)
2627 case EQ:
2628 case NE:
2629 return code;
2631 case GT:
2632 case LE:
2633 if (i != maxval
2634 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2636 *op1 = GEN_INT (i + 1);
2637 return code == GT ? GE : LT;
2639 break;
2641 case GE:
2642 case LT:
2643 if (i != ~maxval
2644 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2646 *op1 = GEN_INT (i - 1);
2647 return code == GE ? GT : LE;
2649 break;
2651 case GTU:
2652 case LEU:
2653 if (i != ~((unsigned HOST_WIDE_INT) 0)
2654 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2656 *op1 = GEN_INT (i + 1);
2657 return code == GTU ? GEU : LTU;
2659 break;
2661 case GEU:
2662 case LTU:
2663 if (i != 0
2664 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2666 *op1 = GEN_INT (i - 1);
2667 return code == GEU ? GTU : LEU;
2669 break;
2671 default:
2672 gcc_unreachable ();
2675 return code;
2679 /* Define how to find the value returned by a function. */
2682 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
2684 enum machine_mode mode;
2685 int unsignedp ATTRIBUTE_UNUSED;
2686 rtx r ATTRIBUTE_UNUSED;
2688 mode = TYPE_MODE (type);
2689 /* Promote integer types. */
2690 if (INTEGRAL_TYPE_P (type))
2691 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2693 /* Promotes small structs returned in a register to full-word size
2694 for big-endian AAPCS. */
2695 if (arm_return_in_msb (type))
2697 HOST_WIDE_INT size = int_size_in_bytes (type);
2698 if (size % UNITS_PER_WORD != 0)
2700 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2701 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2705 return LIBCALL_VALUE(mode);
2708 /* Determine the amount of memory needed to store the possible return
2709 registers of an untyped call. */
2711 arm_apply_result_size (void)
2713 int size = 16;
2715 if (TARGET_ARM)
2717 if (TARGET_HARD_FLOAT_ABI)
2719 if (TARGET_FPA)
2720 size += 12;
2721 if (TARGET_MAVERICK)
2722 size += 8;
2724 if (TARGET_IWMMXT_ABI)
2725 size += 8;
2728 return size;
2731 /* Decide whether a type should be returned in memory (true)
2732 or in a register (false). This is called by the macro
2733 RETURN_IN_MEMORY. */
2735 arm_return_in_memory (const_tree type)
2737 HOST_WIDE_INT size;
2739 size = int_size_in_bytes (type);
2741 /* Vector values should be returned using ARM registers, not memory (unless
2742 they're over 16 bytes, which will break since we only have four
2743 call-clobbered registers to play with). */
2744 if (TREE_CODE (type) == VECTOR_TYPE)
2745 return (size < 0 || size > (4 * UNITS_PER_WORD));
2747 if (!AGGREGATE_TYPE_P (type) &&
2748 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2749 /* All simple types are returned in registers.
2750 For AAPCS, complex types are treated the same as aggregates. */
2751 return 0;
2753 if (arm_abi != ARM_ABI_APCS)
2755 /* ATPCS and later return aggregate types in memory only if they are
2756 larger than a word (or are variable size). */
2757 return (size < 0 || size > UNITS_PER_WORD);
2760 /* For the arm-wince targets we choose to be compatible with Microsoft's
2761 ARM and Thumb compilers, which always return aggregates in memory. */
2762 #ifndef ARM_WINCE
2763 /* All structures/unions bigger than one word are returned in memory.
2764 Also catch the case where int_size_in_bytes returns -1. In this case
2765 the aggregate is either huge or of variable size, and in either case
2766 we will want to return it via memory and not in a register. */
2767 if (size < 0 || size > UNITS_PER_WORD)
2768 return 1;
2770 if (TREE_CODE (type) == RECORD_TYPE)
2772 tree field;
2774 /* For a struct the APCS says that we only return in a register
2775 if the type is 'integer like' and every addressable element
2776 has an offset of zero. For practical purposes this means
2777 that the structure can have at most one non bit-field element
2778 and that this element must be the first one in the structure. */
2780 /* Find the first field, ignoring non FIELD_DECL things which will
2781 have been created by C++. */
2782 for (field = TYPE_FIELDS (type);
2783 field && TREE_CODE (field) != FIELD_DECL;
2784 field = TREE_CHAIN (field))
2785 continue;
2787 if (field == NULL)
2788 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2790 /* Check that the first field is valid for returning in a register. */
2792 /* ... Floats are not allowed */
2793 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2794 return 1;
2796 /* ... Aggregates that are not themselves valid for returning in
2797 a register are not allowed. */
2798 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2799 return 1;
2801 /* Now check the remaining fields, if any. Only bitfields are allowed,
2802 since they are not addressable. */
2803 for (field = TREE_CHAIN (field);
2804 field;
2805 field = TREE_CHAIN (field))
2807 if (TREE_CODE (field) != FIELD_DECL)
2808 continue;
2810 if (!DECL_BIT_FIELD_TYPE (field))
2811 return 1;
2814 return 0;
2817 if (TREE_CODE (type) == UNION_TYPE)
2819 tree field;
2821 /* Unions can be returned in registers if every element is
2822 integral, or can be returned in an integer register. */
2823 for (field = TYPE_FIELDS (type);
2824 field;
2825 field = TREE_CHAIN (field))
2827 if (TREE_CODE (field) != FIELD_DECL)
2828 continue;
2830 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2831 return 1;
2833 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2834 return 1;
2837 return 0;
2839 #endif /* not ARM_WINCE */
2841 /* Return all other types in memory. */
2842 return 1;
2845 /* Indicate whether or not words of a double are in big-endian order. */
2848 arm_float_words_big_endian (void)
2850 if (TARGET_MAVERICK)
2851 return 0;
2853 /* For FPA, float words are always big-endian. For VFP, floats words
2854 follow the memory system mode. */
2856 if (TARGET_FPA)
2858 return 1;
2861 if (TARGET_VFP)
2862 return (TARGET_BIG_END ? 1 : 0);
2864 return 1;
2867 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2868 for a call to a function whose data type is FNTYPE.
2869 For a library call, FNTYPE is NULL. */
2870 void
2871 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2872 rtx libname ATTRIBUTE_UNUSED,
2873 tree fndecl ATTRIBUTE_UNUSED)
2875 /* On the ARM, the offset starts at 0. */
2876 pcum->nregs = 0;
2877 pcum->iwmmxt_nregs = 0;
2878 pcum->can_split = true;
2880 /* Varargs vectors are treated the same as long long.
2881 named_count avoids having to change the way arm handles 'named' */
2882 pcum->named_count = 0;
2883 pcum->nargs = 0;
2885 if (TARGET_REALLY_IWMMXT && fntype)
2887 tree fn_arg;
2889 for (fn_arg = TYPE_ARG_TYPES (fntype);
2890 fn_arg;
2891 fn_arg = TREE_CHAIN (fn_arg))
2892 pcum->named_count += 1;
2894 if (! pcum->named_count)
2895 pcum->named_count = INT_MAX;
2900 /* Return true if mode/type need doubleword alignment. */
2901 bool
2902 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2904 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2905 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2909 /* Determine where to put an argument to a function.
2910 Value is zero to push the argument on the stack,
2911 or a hard register in which to store the argument.
2913 MODE is the argument's machine mode.
2914 TYPE is the data type of the argument (as a tree).
2915 This is null for libcalls where that information may
2916 not be available.
2917 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2918 the preceding args and about the function being called.
2919 NAMED is nonzero if this argument is a named parameter
2920 (otherwise it is an extra parameter matching an ellipsis). */
2923 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2924 tree type, int named)
2926 int nregs;
2928 /* Varargs vectors are treated the same as long long.
2929 named_count avoids having to change the way arm handles 'named' */
2930 if (TARGET_IWMMXT_ABI
2931 && arm_vector_mode_supported_p (mode)
2932 && pcum->named_count > pcum->nargs + 1)
2934 if (pcum->iwmmxt_nregs <= 9)
2935 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2936 else
2938 pcum->can_split = false;
2939 return NULL_RTX;
2943 /* Put doubleword aligned quantities in even register pairs. */
2944 if (pcum->nregs & 1
2945 && ARM_DOUBLEWORD_ALIGN
2946 && arm_needs_doubleword_align (mode, type))
2947 pcum->nregs++;
2949 if (mode == VOIDmode)
2950 /* Pick an arbitrary value for operand 2 of the call insn. */
2951 return const0_rtx;
2953 /* Only allow splitting an arg between regs and memory if all preceding
2954 args were allocated to regs. For args passed by reference we only count
2955 the reference pointer. */
2956 if (pcum->can_split)
2957 nregs = 1;
2958 else
2959 nregs = ARM_NUM_REGS2 (mode, type);
2961 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2962 return NULL_RTX;
2964 return gen_rtx_REG (mode, pcum->nregs);
2967 static int
2968 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2969 tree type, bool named ATTRIBUTE_UNUSED)
2971 int nregs = pcum->nregs;
2973 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
2974 return 0;
2976 if (NUM_ARG_REGS > nregs
2977 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2978 && pcum->can_split)
2979 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2981 return 0;
2984 /* Variable sized types are passed by reference. This is a GCC
2985 extension to the ARM ABI. */
2987 static bool
2988 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2989 enum machine_mode mode ATTRIBUTE_UNUSED,
2990 const_tree type, bool named ATTRIBUTE_UNUSED)
2992 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2995 /* Encode the current state of the #pragma [no_]long_calls. */
2996 typedef enum
2998 OFF, /* No #pragma [no_]long_calls is in effect. */
2999 LONG, /* #pragma long_calls is in effect. */
3000 SHORT /* #pragma no_long_calls is in effect. */
3001 } arm_pragma_enum;
3003 static arm_pragma_enum arm_pragma_long_calls = OFF;
3005 void
3006 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3008 arm_pragma_long_calls = LONG;
3011 void
3012 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3014 arm_pragma_long_calls = SHORT;
3017 void
3018 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3020 arm_pragma_long_calls = OFF;
3023 /* Table of machine attributes. */
3024 const struct attribute_spec arm_attribute_table[] =
3026 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3027 /* Function calls made to this symbol must be done indirectly, because
3028 it may lie outside of the 26 bit addressing range of a normal function
3029 call. */
3030 { "long_call", 0, 0, false, true, true, NULL },
3031 /* Whereas these functions are always known to reside within the 26 bit
3032 addressing range. */
3033 { "short_call", 0, 0, false, true, true, NULL },
3034 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3035 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3036 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3037 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3038 #ifdef ARM_PE
3039 /* ARM/PE has three new attributes:
3040 interfacearm - ?
3041 dllexport - for exporting a function/variable that will live in a dll
3042 dllimport - for importing a function/variable from a dll
3044 Microsoft allows multiple declspecs in one __declspec, separating
3045 them with spaces. We do NOT support this. Instead, use __declspec
3046 multiple times.
3048 { "dllimport", 0, 0, true, false, false, NULL },
3049 { "dllexport", 0, 0, true, false, false, NULL },
3050 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3051 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3052 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3053 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3054 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3055 #endif
3056 { NULL, 0, 0, false, false, false, NULL }
3059 /* Handle an attribute requiring a FUNCTION_DECL;
3060 arguments as in struct attribute_spec.handler. */
3061 static tree
3062 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3063 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3065 if (TREE_CODE (*node) != FUNCTION_DECL)
3067 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3068 IDENTIFIER_POINTER (name));
3069 *no_add_attrs = true;
3072 return NULL_TREE;
3075 /* Handle an "interrupt" or "isr" attribute;
3076 arguments as in struct attribute_spec.handler. */
3077 static tree
3078 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3079 bool *no_add_attrs)
3081 if (DECL_P (*node))
3083 if (TREE_CODE (*node) != FUNCTION_DECL)
3085 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3086 IDENTIFIER_POINTER (name));
3087 *no_add_attrs = true;
3089 /* FIXME: the argument if any is checked for type attributes;
3090 should it be checked for decl ones? */
3092 else
3094 if (TREE_CODE (*node) == FUNCTION_TYPE
3095 || TREE_CODE (*node) == METHOD_TYPE)
3097 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3099 warning (OPT_Wattributes, "%qs attribute ignored",
3100 IDENTIFIER_POINTER (name));
3101 *no_add_attrs = true;
3104 else if (TREE_CODE (*node) == POINTER_TYPE
3105 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3106 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3107 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3109 *node = build_variant_type_copy (*node);
3110 TREE_TYPE (*node) = build_type_attribute_variant
3111 (TREE_TYPE (*node),
3112 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3113 *no_add_attrs = true;
3115 else
3117 /* Possibly pass this attribute on from the type to a decl. */
3118 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3119 | (int) ATTR_FLAG_FUNCTION_NEXT
3120 | (int) ATTR_FLAG_ARRAY_NEXT))
3122 *no_add_attrs = true;
3123 return tree_cons (name, args, NULL_TREE);
3125 else
3127 warning (OPT_Wattributes, "%qs attribute ignored",
3128 IDENTIFIER_POINTER (name));
3133 return NULL_TREE;
3136 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3137 /* Handle the "notshared" attribute. This attribute is another way of
3138 requesting hidden visibility. ARM's compiler supports
3139 "__declspec(notshared)"; we support the same thing via an
3140 attribute. */
3142 static tree
3143 arm_handle_notshared_attribute (tree *node,
3144 tree name ATTRIBUTE_UNUSED,
3145 tree args ATTRIBUTE_UNUSED,
3146 int flags ATTRIBUTE_UNUSED,
3147 bool *no_add_attrs)
3149 tree decl = TYPE_NAME (*node);
3151 if (decl)
3153 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3154 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3155 *no_add_attrs = false;
3157 return NULL_TREE;
3159 #endif
3161 /* Return 0 if the attributes for two types are incompatible, 1 if they
3162 are compatible, and 2 if they are nearly compatible (which causes a
3163 warning to be generated). */
3164 static int
3165 arm_comp_type_attributes (const_tree type1, const_tree type2)
3167 int l1, l2, s1, s2;
3169 /* Check for mismatch of non-default calling convention. */
3170 if (TREE_CODE (type1) != FUNCTION_TYPE)
3171 return 1;
3173 /* Check for mismatched call attributes. */
3174 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3175 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3176 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3177 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3179 /* Only bother to check if an attribute is defined. */
3180 if (l1 | l2 | s1 | s2)
3182 /* If one type has an attribute, the other must have the same attribute. */
3183 if ((l1 != l2) || (s1 != s2))
3184 return 0;
3186 /* Disallow mixed attributes. */
3187 if ((l1 & s2) || (l2 & s1))
3188 return 0;
3191 /* Check for mismatched ISR attribute. */
3192 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3193 if (! l1)
3194 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3195 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3196 if (! l2)
3197 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3198 if (l1 != l2)
3199 return 0;
3201 return 1;
3204 /* Assigns default attributes to newly defined type. This is used to
3205 set short_call/long_call attributes for function types of
3206 functions defined inside corresponding #pragma scopes. */
3207 static void
3208 arm_set_default_type_attributes (tree type)
3210 /* Add __attribute__ ((long_call)) to all functions, when
3211 inside #pragma long_calls or __attribute__ ((short_call)),
3212 when inside #pragma no_long_calls. */
3213 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3215 tree type_attr_list, attr_name;
3216 type_attr_list = TYPE_ATTRIBUTES (type);
3218 if (arm_pragma_long_calls == LONG)
3219 attr_name = get_identifier ("long_call");
3220 else if (arm_pragma_long_calls == SHORT)
3221 attr_name = get_identifier ("short_call");
3222 else
3223 return;
3225 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3226 TYPE_ATTRIBUTES (type) = type_attr_list;
3230 /* Return true if DECL is known to be linked into section SECTION. */
3232 static bool
3233 arm_function_in_section_p (tree decl, section *section)
3235 /* We can only be certain about functions defined in the same
3236 compilation unit. */
3237 if (!TREE_STATIC (decl))
3238 return false;
3240 /* Make sure that SYMBOL always binds to the definition in this
3241 compilation unit. */
3242 if (!targetm.binds_local_p (decl))
3243 return false;
3245 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3246 if (!DECL_SECTION_NAME (decl))
3248 /* Only cater for unit-at-a-time mode, where we know that the user
3249 cannot later specify a section for DECL. */
3250 if (!flag_unit_at_a_time)
3251 return false;
3253 /* Make sure that we will not create a unique section for DECL. */
3254 if (flag_function_sections || DECL_ONE_ONLY (decl))
3255 return false;
3258 return function_section (decl) == section;
3261 /* Return nonzero if a 32-bit "long_call" should be generated for
3262 a call from the current function to DECL. We generate a long_call
3263 if the function:
3265 a. has an __attribute__((long call))
3266 or b. is within the scope of a #pragma long_calls
3267 or c. the -mlong-calls command line switch has been specified
3269 However we do not generate a long call if the function:
3271 d. has an __attribute__ ((short_call))
3272 or e. is inside the scope of a #pragma no_long_calls
3273 or f. is defined in the same section as the current function. */
3275 bool
3276 arm_is_long_call_p (tree decl)
3278 tree attrs;
3280 if (!decl)
3281 return TARGET_LONG_CALLS;
3283 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3284 if (lookup_attribute ("short_call", attrs))
3285 return false;
3287 /* For "f", be conservative, and only cater for cases in which the
3288 whole of the current function is placed in the same section. */
3289 if (!flag_reorder_blocks_and_partition
3290 && arm_function_in_section_p (decl, current_function_section ()))
3291 return false;
3293 if (lookup_attribute ("long_call", attrs))
3294 return true;
3296 return TARGET_LONG_CALLS;
3299 /* Return nonzero if it is ok to make a tail-call to DECL. */
3300 static bool
3301 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3303 unsigned long func_type;
3305 if (cfun->machine->sibcall_blocked)
3306 return false;
3308 /* Never tailcall something for which we have no decl, or if we
3309 are in Thumb mode. */
3310 if (decl == NULL || TARGET_THUMB)
3311 return false;
3313 /* The PIC register is live on entry to VxWorks PLT entries, so we
3314 must make the call before restoring the PIC register. */
3315 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3316 return false;
3318 /* Cannot tail-call to long calls, since these are out of range of
3319 a branch instruction. */
3320 if (arm_is_long_call_p (decl))
3321 return false;
3323 /* If we are interworking and the function is not declared static
3324 then we can't tail-call it unless we know that it exists in this
3325 compilation unit (since it might be a Thumb routine). */
3326 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3327 return false;
3329 func_type = arm_current_func_type ();
3330 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3331 if (IS_INTERRUPT (func_type))
3332 return false;
3334 /* Never tailcall if function may be called with a misaligned SP. */
3335 if (IS_STACKALIGN (func_type))
3336 return false;
3338 /* Everything else is ok. */
3339 return true;
3343 /* Addressing mode support functions. */
3345 /* Return nonzero if X is a legitimate immediate operand when compiling
3346 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3348 legitimate_pic_operand_p (rtx x)
3350 if (GET_CODE (x) == SYMBOL_REF
3351 || (GET_CODE (x) == CONST
3352 && GET_CODE (XEXP (x, 0)) == PLUS
3353 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3354 return 0;
3356 return 1;
3359 /* Record that the current function needs a PIC register. Initialize
3360 cfun->machine->pic_reg if we have not already done so. */
3362 static void
3363 require_pic_register (void)
3365 /* A lot of the logic here is made obscure by the fact that this
3366 routine gets called as part of the rtx cost estimation process.
3367 We don't want those calls to affect any assumptions about the real
3368 function; and further, we can't call entry_of_function() until we
3369 start the real expansion process. */
3370 if (!current_function_uses_pic_offset_table)
3372 gcc_assert (can_create_pseudo_p ());
3373 if (arm_pic_register != INVALID_REGNUM)
3375 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3377 /* Play games to avoid marking the function as needing pic
3378 if we are being called as part of the cost-estimation
3379 process. */
3380 if (current_ir_type () != IR_GIMPLE)
3381 current_function_uses_pic_offset_table = 1;
3383 else
3385 rtx seq;
3387 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3389 /* Play games to avoid marking the function as needing pic
3390 if we are being called as part of the cost-estimation
3391 process. */
3392 if (current_ir_type () != IR_GIMPLE)
3394 current_function_uses_pic_offset_table = 1;
3395 start_sequence ();
3397 arm_load_pic_register (0UL);
3399 seq = get_insns ();
3400 end_sequence ();
3401 emit_insn_after (seq, entry_of_function ());
3408 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3410 if (GET_CODE (orig) == SYMBOL_REF
3411 || GET_CODE (orig) == LABEL_REF)
3413 rtx pic_ref, address;
3414 rtx insn;
3415 int subregs = 0;
3417 /* If this function doesn't have a pic register, create one now. */
3418 require_pic_register ();
3420 if (reg == 0)
3422 gcc_assert (can_create_pseudo_p ());
3423 reg = gen_reg_rtx (Pmode);
3425 subregs = 1;
3428 if (subregs)
3429 address = gen_reg_rtx (Pmode);
3430 else
3431 address = reg;
3433 if (TARGET_ARM)
3434 emit_insn (gen_pic_load_addr_arm (address, orig));
3435 else if (TARGET_THUMB2)
3436 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3437 else /* TARGET_THUMB1 */
3438 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3440 /* VxWorks does not impose a fixed gap between segments; the run-time
3441 gap can be different from the object-file gap. We therefore can't
3442 use GOTOFF unless we are absolutely sure that the symbol is in the
3443 same segment as the GOT. Unfortunately, the flexibility of linker
3444 scripts means that we can't be sure of that in general, so assume
3445 that GOTOFF is never valid on VxWorks. */
3446 if ((GET_CODE (orig) == LABEL_REF
3447 || (GET_CODE (orig) == SYMBOL_REF &&
3448 SYMBOL_REF_LOCAL_P (orig)))
3449 && NEED_GOT_RELOC
3450 && !TARGET_VXWORKS_RTP)
3451 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3452 else
3454 pic_ref = gen_const_mem (Pmode,
3455 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3456 address));
3459 insn = emit_move_insn (reg, pic_ref);
3461 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3462 by loop. */
3463 set_unique_reg_note (insn, REG_EQUAL, orig);
3465 return reg;
3467 else if (GET_CODE (orig) == CONST)
3469 rtx base, offset;
3471 if (GET_CODE (XEXP (orig, 0)) == PLUS
3472 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3473 return orig;
3475 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3476 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3477 return orig;
3479 if (reg == 0)
3481 gcc_assert (can_create_pseudo_p ());
3482 reg = gen_reg_rtx (Pmode);
3485 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3487 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3488 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3489 base == reg ? 0 : reg);
3491 if (GET_CODE (offset) == CONST_INT)
3493 /* The base register doesn't really matter, we only want to
3494 test the index for the appropriate mode. */
3495 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3497 gcc_assert (can_create_pseudo_p ());
3498 offset = force_reg (Pmode, offset);
3501 if (GET_CODE (offset) == CONST_INT)
3502 return plus_constant (base, INTVAL (offset));
3505 if (GET_MODE_SIZE (mode) > 4
3506 && (GET_MODE_CLASS (mode) == MODE_INT
3507 || TARGET_SOFT_FLOAT))
3509 emit_insn (gen_addsi3 (reg, base, offset));
3510 return reg;
3513 return gen_rtx_PLUS (Pmode, base, offset);
3516 return orig;
3520 /* Find a spare register to use during the prolog of a function. */
3522 static int
3523 thumb_find_work_register (unsigned long pushed_regs_mask)
3525 int reg;
3527 /* Check the argument registers first as these are call-used. The
3528 register allocation order means that sometimes r3 might be used
3529 but earlier argument registers might not, so check them all. */
3530 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3531 if (!df_regs_ever_live_p (reg))
3532 return reg;
3534 /* Before going on to check the call-saved registers we can try a couple
3535 more ways of deducing that r3 is available. The first is when we are
3536 pushing anonymous arguments onto the stack and we have less than 4
3537 registers worth of fixed arguments(*). In this case r3 will be part of
3538 the variable argument list and so we can be sure that it will be
3539 pushed right at the start of the function. Hence it will be available
3540 for the rest of the prologue.
3541 (*): ie current_function_pretend_args_size is greater than 0. */
3542 if (cfun->machine->uses_anonymous_args
3543 && current_function_pretend_args_size > 0)
3544 return LAST_ARG_REGNUM;
3546 /* The other case is when we have fixed arguments but less than 4 registers
3547 worth. In this case r3 might be used in the body of the function, but
3548 it is not being used to convey an argument into the function. In theory
3549 we could just check current_function_args_size to see how many bytes are
3550 being passed in argument registers, but it seems that it is unreliable.
3551 Sometimes it will have the value 0 when in fact arguments are being
3552 passed. (See testcase execute/20021111-1.c for an example). So we also
3553 check the args_info.nregs field as well. The problem with this field is
3554 that it makes no allowances for arguments that are passed to the
3555 function but which are not used. Hence we could miss an opportunity
3556 when a function has an unused argument in r3. But it is better to be
3557 safe than to be sorry. */
3558 if (! cfun->machine->uses_anonymous_args
3559 && current_function_args_size >= 0
3560 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3561 && cfun->args_info.nregs < 4)
3562 return LAST_ARG_REGNUM;
3564 /* Otherwise look for a call-saved register that is going to be pushed. */
3565 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3566 if (pushed_regs_mask & (1 << reg))
3567 return reg;
3569 if (TARGET_THUMB2)
3571 /* Thumb-2 can use high regs. */
3572 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3573 if (pushed_regs_mask & (1 << reg))
3574 return reg;
3576 /* Something went wrong - thumb_compute_save_reg_mask()
3577 should have arranged for a suitable register to be pushed. */
3578 gcc_unreachable ();
3581 static GTY(()) int pic_labelno;
3583 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3584 low register. */
3586 void
3587 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3589 rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
3590 rtx global_offset_table;
3592 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3593 return;
3595 gcc_assert (flag_pic);
3597 pic_reg = cfun->machine->pic_reg;
3598 if (TARGET_VXWORKS_RTP)
3600 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3601 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3602 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3604 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3606 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3607 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3609 else
3611 /* We use an UNSPEC rather than a LABEL_REF because this label
3612 never appears in the code stream. */
3614 labelno = GEN_INT (pic_labelno++);
3615 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3616 l1 = gen_rtx_CONST (VOIDmode, l1);
3618 global_offset_table
3619 = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3620 /* On the ARM the PC register contains 'dot + 8' at the time of the
3621 addition, on the Thumb it is 'dot + 4'. */
3622 pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3623 if (GOT_PCREL)
3625 pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
3626 pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
3628 else
3629 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3631 pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
3632 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3634 if (TARGET_ARM)
3636 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3637 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3639 else if (TARGET_THUMB2)
3641 /* Thumb-2 only allows very limited access to the PC. Calculate the
3642 address in a temporary register. */
3643 if (arm_pic_register != INVALID_REGNUM)
3645 pic_tmp = gen_rtx_REG (SImode,
3646 thumb_find_work_register (saved_regs));
3648 else
3650 gcc_assert (can_create_pseudo_p ());
3651 pic_tmp = gen_reg_rtx (Pmode);
3654 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3655 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3656 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3658 else /* TARGET_THUMB1 */
3660 if (arm_pic_register != INVALID_REGNUM
3661 && REGNO (pic_reg) > LAST_LO_REGNUM)
3663 /* We will have pushed the pic register, so we should always be
3664 able to find a work register. */
3665 pic_tmp = gen_rtx_REG (SImode,
3666 thumb_find_work_register (saved_regs));
3667 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3668 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3670 else
3671 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3672 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3676 /* Need to emit this whether or not we obey regdecls,
3677 since setjmp/longjmp can cause life info to screw up. */
3678 emit_insn (gen_rtx_USE (VOIDmode, pic_reg));
3682 /* Return nonzero if X is valid as an ARM state addressing register. */
3683 static int
3684 arm_address_register_rtx_p (rtx x, int strict_p)
3686 int regno;
3688 if (GET_CODE (x) != REG)
3689 return 0;
3691 regno = REGNO (x);
3693 if (strict_p)
3694 return ARM_REGNO_OK_FOR_BASE_P (regno);
3696 return (regno <= LAST_ARM_REGNUM
3697 || regno >= FIRST_PSEUDO_REGISTER
3698 || regno == FRAME_POINTER_REGNUM
3699 || regno == ARG_POINTER_REGNUM);
3702 /* Return TRUE if this rtx is the difference of a symbol and a label,
3703 and will reduce to a PC-relative relocation in the object file.
3704 Expressions like this can be left alone when generating PIC, rather
3705 than forced through the GOT. */
3706 static int
3707 pcrel_constant_p (rtx x)
3709 if (GET_CODE (x) == MINUS)
3710 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3712 return FALSE;
3715 /* Return nonzero if X is a valid ARM state address operand. */
3717 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3718 int strict_p)
3720 bool use_ldrd;
3721 enum rtx_code code = GET_CODE (x);
3723 if (arm_address_register_rtx_p (x, strict_p))
3724 return 1;
3726 use_ldrd = (TARGET_LDRD
3727 && (mode == DImode
3728 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3730 if (code == POST_INC || code == PRE_DEC
3731 || ((code == PRE_INC || code == POST_DEC)
3732 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3733 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3735 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3736 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3737 && GET_CODE (XEXP (x, 1)) == PLUS
3738 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3740 rtx addend = XEXP (XEXP (x, 1), 1);
3742 /* Don't allow ldrd post increment by register because it's hard
3743 to fixup invalid register choices. */
3744 if (use_ldrd
3745 && GET_CODE (x) == POST_MODIFY
3746 && GET_CODE (addend) == REG)
3747 return 0;
3749 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3750 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3753 /* After reload constants split into minipools will have addresses
3754 from a LABEL_REF. */
3755 else if (reload_completed
3756 && (code == LABEL_REF
3757 || (code == CONST
3758 && GET_CODE (XEXP (x, 0)) == PLUS
3759 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3760 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3761 return 1;
3763 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3764 return 0;
3766 else if (code == PLUS)
3768 rtx xop0 = XEXP (x, 0);
3769 rtx xop1 = XEXP (x, 1);
3771 return ((arm_address_register_rtx_p (xop0, strict_p)
3772 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3773 || (arm_address_register_rtx_p (xop1, strict_p)
3774 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3777 #if 0
3778 /* Reload currently can't handle MINUS, so disable this for now */
3779 else if (GET_CODE (x) == MINUS)
3781 rtx xop0 = XEXP (x, 0);
3782 rtx xop1 = XEXP (x, 1);
3784 return (arm_address_register_rtx_p (xop0, strict_p)
3785 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3787 #endif
3789 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3790 && code == SYMBOL_REF
3791 && CONSTANT_POOL_ADDRESS_P (x)
3792 && ! (flag_pic
3793 && symbol_mentioned_p (get_pool_constant (x))
3794 && ! pcrel_constant_p (get_pool_constant (x))))
3795 return 1;
3797 return 0;
3800 /* Return nonzero if X is a valid Thumb-2 address operand. */
3802 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3804 bool use_ldrd;
3805 enum rtx_code code = GET_CODE (x);
3807 if (arm_address_register_rtx_p (x, strict_p))
3808 return 1;
3810 use_ldrd = (TARGET_LDRD
3811 && (mode == DImode
3812 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3814 if (code == POST_INC || code == PRE_DEC
3815 || ((code == PRE_INC || code == POST_DEC)
3816 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3817 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3819 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3820 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3821 && GET_CODE (XEXP (x, 1)) == PLUS
3822 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3824 /* Thumb-2 only has autoincrement by constant. */
3825 rtx addend = XEXP (XEXP (x, 1), 1);
3826 HOST_WIDE_INT offset;
3828 if (GET_CODE (addend) != CONST_INT)
3829 return 0;
3831 offset = INTVAL(addend);
3832 if (GET_MODE_SIZE (mode) <= 4)
3833 return (offset > -256 && offset < 256);
3835 return (use_ldrd && offset > -1024 && offset < 1024
3836 && (offset & 3) == 0);
3839 /* After reload constants split into minipools will have addresses
3840 from a LABEL_REF. */
3841 else if (reload_completed
3842 && (code == LABEL_REF
3843 || (code == CONST
3844 && GET_CODE (XEXP (x, 0)) == PLUS
3845 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3846 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3847 return 1;
3849 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3850 return 0;
3852 else if (code == PLUS)
3854 rtx xop0 = XEXP (x, 0);
3855 rtx xop1 = XEXP (x, 1);
3857 return ((arm_address_register_rtx_p (xop0, strict_p)
3858 && thumb2_legitimate_index_p (mode, xop1, strict_p))
3859 || (arm_address_register_rtx_p (xop1, strict_p)
3860 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
3863 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3864 && code == SYMBOL_REF
3865 && CONSTANT_POOL_ADDRESS_P (x)
3866 && ! (flag_pic
3867 && symbol_mentioned_p (get_pool_constant (x))
3868 && ! pcrel_constant_p (get_pool_constant (x))))
3869 return 1;
3871 return 0;
3874 /* Return nonzero if INDEX is valid for an address index operand in
3875 ARM state. */
3876 static int
3877 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3878 int strict_p)
3880 HOST_WIDE_INT range;
3881 enum rtx_code code = GET_CODE (index);
3883 /* Standard coprocessor addressing modes. */
3884 if (TARGET_HARD_FLOAT
3885 && (TARGET_FPA || TARGET_MAVERICK)
3886 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3887 || (TARGET_MAVERICK && mode == DImode)))
3888 return (code == CONST_INT && INTVAL (index) < 1024
3889 && INTVAL (index) > -1024
3890 && (INTVAL (index) & 3) == 0);
3892 if (TARGET_NEON
3893 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
3894 return (code == CONST_INT
3895 && INTVAL (index) < 1016
3896 && INTVAL (index) > -1024
3897 && (INTVAL (index) & 3) == 0);
3899 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3900 return (code == CONST_INT
3901 && INTVAL (index) < 1024
3902 && INTVAL (index) > -1024
3903 && (INTVAL (index) & 3) == 0);
3905 if (arm_address_register_rtx_p (index, strict_p)
3906 && (GET_MODE_SIZE (mode) <= 4))
3907 return 1;
3909 if (mode == DImode || mode == DFmode)
3911 if (code == CONST_INT)
3913 HOST_WIDE_INT val = INTVAL (index);
3915 if (TARGET_LDRD)
3916 return val > -256 && val < 256;
3917 else
3918 return val > -4096 && val < 4092;
3921 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3924 if (GET_MODE_SIZE (mode) <= 4
3925 && ! (arm_arch4
3926 && (mode == HImode
3927 || (mode == QImode && outer == SIGN_EXTEND))))
3929 if (code == MULT)
3931 rtx xiop0 = XEXP (index, 0);
3932 rtx xiop1 = XEXP (index, 1);
3934 return ((arm_address_register_rtx_p (xiop0, strict_p)
3935 && power_of_two_operand (xiop1, SImode))
3936 || (arm_address_register_rtx_p (xiop1, strict_p)
3937 && power_of_two_operand (xiop0, SImode)));
3939 else if (code == LSHIFTRT || code == ASHIFTRT
3940 || code == ASHIFT || code == ROTATERT)
3942 rtx op = XEXP (index, 1);
3944 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3945 && GET_CODE (op) == CONST_INT
3946 && INTVAL (op) > 0
3947 && INTVAL (op) <= 31);
3951 /* For ARM v4 we may be doing a sign-extend operation during the
3952 load. */
3953 if (arm_arch4)
3955 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3956 range = 256;
3957 else
3958 range = 4096;
3960 else
3961 range = (mode == HImode) ? 4095 : 4096;
3963 return (code == CONST_INT
3964 && INTVAL (index) < range
3965 && INTVAL (index) > -range);
3968 /* Return true if OP is a valid index scaling factor for Thumb-2 address
3969 index operand. i.e. 1, 2, 4 or 8. */
3970 static bool
3971 thumb2_index_mul_operand (rtx op)
3973 HOST_WIDE_INT val;
3975 if (GET_CODE(op) != CONST_INT)
3976 return false;
3978 val = INTVAL(op);
3979 return (val == 1 || val == 2 || val == 4 || val == 8);
3982 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
3983 static int
3984 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
3986 enum rtx_code code = GET_CODE (index);
3988 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
3989 /* Standard coprocessor addressing modes. */
3990 if (TARGET_HARD_FLOAT
3991 && (TARGET_FPA || TARGET_MAVERICK)
3992 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3993 || (TARGET_MAVERICK && mode == DImode)))
3994 return (code == CONST_INT && INTVAL (index) < 1024
3995 && INTVAL (index) > -1024
3996 && (INTVAL (index) & 3) == 0);
3998 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
4000 /* For DImode assume values will usually live in core regs
4001 and only allow LDRD addressing modes. */
4002 if (!TARGET_LDRD || mode != DImode)
4003 return (code == CONST_INT
4004 && INTVAL (index) < 1024
4005 && INTVAL (index) > -1024
4006 && (INTVAL (index) & 3) == 0);
4009 if (TARGET_NEON
4010 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4011 return (code == CONST_INT
4012 && INTVAL (index) < 1016
4013 && INTVAL (index) > -1024
4014 && (INTVAL (index) & 3) == 0);
4016 if (arm_address_register_rtx_p (index, strict_p)
4017 && (GET_MODE_SIZE (mode) <= 4))
4018 return 1;
4020 if (mode == DImode || mode == DFmode)
4022 HOST_WIDE_INT val = INTVAL (index);
4023 /* ??? Can we assume ldrd for thumb2? */
4024 /* Thumb-2 ldrd only has reg+const addressing modes. */
4025 if (code != CONST_INT)
4026 return 0;
4028 /* ldrd supports offsets of +-1020.
4029 However the ldr fallback does not. */
4030 return val > -256 && val < 256 && (val & 3) == 0;
4033 if (code == MULT)
4035 rtx xiop0 = XEXP (index, 0);
4036 rtx xiop1 = XEXP (index, 1);
4038 return ((arm_address_register_rtx_p (xiop0, strict_p)
4039 && thumb2_index_mul_operand (xiop1))
4040 || (arm_address_register_rtx_p (xiop1, strict_p)
4041 && thumb2_index_mul_operand (xiop0)));
4043 else if (code == ASHIFT)
4045 rtx op = XEXP (index, 1);
4047 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4048 && GET_CODE (op) == CONST_INT
4049 && INTVAL (op) > 0
4050 && INTVAL (op) <= 3);
4053 return (code == CONST_INT
4054 && INTVAL (index) < 4096
4055 && INTVAL (index) > -256);
4058 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4059 static int
4060 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4062 int regno;
4064 if (GET_CODE (x) != REG)
4065 return 0;
4067 regno = REGNO (x);
4069 if (strict_p)
4070 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4072 return (regno <= LAST_LO_REGNUM
4073 || regno > LAST_VIRTUAL_REGISTER
4074 || regno == FRAME_POINTER_REGNUM
4075 || (GET_MODE_SIZE (mode) >= 4
4076 && (regno == STACK_POINTER_REGNUM
4077 || regno >= FIRST_PSEUDO_REGISTER
4078 || x == hard_frame_pointer_rtx
4079 || x == arg_pointer_rtx)));
4082 /* Return nonzero if x is a legitimate index register. This is the case
4083 for any base register that can access a QImode object. */
4084 inline static int
4085 thumb1_index_register_rtx_p (rtx x, int strict_p)
4087 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4090 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4092 The AP may be eliminated to either the SP or the FP, so we use the
4093 least common denominator, e.g. SImode, and offsets from 0 to 64.
4095 ??? Verify whether the above is the right approach.
4097 ??? Also, the FP may be eliminated to the SP, so perhaps that
4098 needs special handling also.
4100 ??? Look at how the mips16 port solves this problem. It probably uses
4101 better ways to solve some of these problems.
4103 Although it is not incorrect, we don't accept QImode and HImode
4104 addresses based on the frame pointer or arg pointer until the
4105 reload pass starts. This is so that eliminating such addresses
4106 into stack based ones won't produce impossible code. */
4108 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4110 /* ??? Not clear if this is right. Experiment. */
4111 if (GET_MODE_SIZE (mode) < 4
4112 && !(reload_in_progress || reload_completed)
4113 && (reg_mentioned_p (frame_pointer_rtx, x)
4114 || reg_mentioned_p (arg_pointer_rtx, x)
4115 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4116 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4117 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4118 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4119 return 0;
4121 /* Accept any base register. SP only in SImode or larger. */
4122 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4123 return 1;
4125 /* This is PC relative data before arm_reorg runs. */
4126 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4127 && GET_CODE (x) == SYMBOL_REF
4128 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4129 return 1;
4131 /* This is PC relative data after arm_reorg runs. */
4132 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4133 && (GET_CODE (x) == LABEL_REF
4134 || (GET_CODE (x) == CONST
4135 && GET_CODE (XEXP (x, 0)) == PLUS
4136 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4137 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4138 return 1;
4140 /* Post-inc indexing only supported for SImode and larger. */
4141 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4142 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4143 return 1;
4145 else if (GET_CODE (x) == PLUS)
4147 /* REG+REG address can be any two index registers. */
4148 /* We disallow FRAME+REG addressing since we know that FRAME
4149 will be replaced with STACK, and SP relative addressing only
4150 permits SP+OFFSET. */
4151 if (GET_MODE_SIZE (mode) <= 4
4152 && XEXP (x, 0) != frame_pointer_rtx
4153 && XEXP (x, 1) != frame_pointer_rtx
4154 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4155 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4156 return 1;
4158 /* REG+const has 5-7 bit offset for non-SP registers. */
4159 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4160 || XEXP (x, 0) == arg_pointer_rtx)
4161 && GET_CODE (XEXP (x, 1)) == CONST_INT
4162 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4163 return 1;
4165 /* REG+const has 10-bit offset for SP, but only SImode and
4166 larger is supported. */
4167 /* ??? Should probably check for DI/DFmode overflow here
4168 just like GO_IF_LEGITIMATE_OFFSET does. */
4169 else if (GET_CODE (XEXP (x, 0)) == REG
4170 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4171 && GET_MODE_SIZE (mode) >= 4
4172 && GET_CODE (XEXP (x, 1)) == CONST_INT
4173 && INTVAL (XEXP (x, 1)) >= 0
4174 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4175 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4176 return 1;
4178 else if (GET_CODE (XEXP (x, 0)) == REG
4179 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4180 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4181 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4182 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4183 && GET_MODE_SIZE (mode) >= 4
4184 && GET_CODE (XEXP (x, 1)) == CONST_INT
4185 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4186 return 1;
4189 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4190 && GET_MODE_SIZE (mode) == 4
4191 && GET_CODE (x) == SYMBOL_REF
4192 && CONSTANT_POOL_ADDRESS_P (x)
4193 && ! (flag_pic
4194 && symbol_mentioned_p (get_pool_constant (x))
4195 && ! pcrel_constant_p (get_pool_constant (x))))
4196 return 1;
4198 return 0;
4201 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4202 instruction of mode MODE. */
4204 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4206 switch (GET_MODE_SIZE (mode))
4208 case 1:
4209 return val >= 0 && val < 32;
4211 case 2:
4212 return val >= 0 && val < 64 && (val & 1) == 0;
4214 default:
4215 return (val >= 0
4216 && (val + GET_MODE_SIZE (mode)) <= 128
4217 && (val & 3) == 0);
4221 /* Build the SYMBOL_REF for __tls_get_addr. */
4223 static GTY(()) rtx tls_get_addr_libfunc;
4225 static rtx
4226 get_tls_get_addr (void)
4228 if (!tls_get_addr_libfunc)
4229 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4230 return tls_get_addr_libfunc;
4233 static rtx
4234 arm_load_tp (rtx target)
4236 if (!target)
4237 target = gen_reg_rtx (SImode);
4239 if (TARGET_HARD_TP)
4241 /* Can return in any reg. */
4242 emit_insn (gen_load_tp_hard (target));
4244 else
4246 /* Always returned in r0. Immediately copy the result into a pseudo,
4247 otherwise other uses of r0 (e.g. setting up function arguments) may
4248 clobber the value. */
4250 rtx tmp;
4252 emit_insn (gen_load_tp_soft ());
4254 tmp = gen_rtx_REG (SImode, 0);
4255 emit_move_insn (target, tmp);
4257 return target;
4260 static rtx
4261 load_tls_operand (rtx x, rtx reg)
4263 rtx tmp;
4265 if (reg == NULL_RTX)
4266 reg = gen_reg_rtx (SImode);
4268 tmp = gen_rtx_CONST (SImode, x);
4270 emit_move_insn (reg, tmp);
4272 return reg;
4275 static rtx
4276 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4278 rtx insns, label, labelno, sum;
4280 start_sequence ();
4282 labelno = GEN_INT (pic_labelno++);
4283 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4284 label = gen_rtx_CONST (VOIDmode, label);
4286 sum = gen_rtx_UNSPEC (Pmode,
4287 gen_rtvec (4, x, GEN_INT (reloc), label,
4288 GEN_INT (TARGET_ARM ? 8 : 4)),
4289 UNSPEC_TLS);
4290 reg = load_tls_operand (sum, reg);
4292 if (TARGET_ARM)
4293 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4294 else if (TARGET_THUMB2)
4296 rtx tmp;
4297 /* Thumb-2 only allows very limited access to the PC. Calculate
4298 the address in a temporary register. */
4299 tmp = gen_reg_rtx (SImode);
4300 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4301 emit_insn (gen_addsi3(reg, reg, tmp));
4303 else /* TARGET_THUMB1 */
4304 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4306 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4307 Pmode, 1, reg, Pmode);
4309 insns = get_insns ();
4310 end_sequence ();
4312 return insns;
4316 legitimize_tls_address (rtx x, rtx reg)
4318 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4319 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4321 switch (model)
4323 case TLS_MODEL_GLOBAL_DYNAMIC:
4324 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4325 dest = gen_reg_rtx (Pmode);
4326 emit_libcall_block (insns, dest, ret, x);
4327 return dest;
4329 case TLS_MODEL_LOCAL_DYNAMIC:
4330 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4332 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4333 share the LDM result with other LD model accesses. */
4334 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4335 UNSPEC_TLS);
4336 dest = gen_reg_rtx (Pmode);
4337 emit_libcall_block (insns, dest, ret, eqv);
4339 /* Load the addend. */
4340 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4341 UNSPEC_TLS);
4342 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4343 return gen_rtx_PLUS (Pmode, dest, addend);
4345 case TLS_MODEL_INITIAL_EXEC:
4346 labelno = GEN_INT (pic_labelno++);
4347 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4348 label = gen_rtx_CONST (VOIDmode, label);
4349 sum = gen_rtx_UNSPEC (Pmode,
4350 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4351 GEN_INT (TARGET_ARM ? 8 : 4)),
4352 UNSPEC_TLS);
4353 reg = load_tls_operand (sum, reg);
4355 if (TARGET_ARM)
4356 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4357 else if (TARGET_THUMB2)
4359 rtx tmp;
4360 /* Thumb-2 only allows very limited access to the PC. Calculate
4361 the address in a temporary register. */
4362 tmp = gen_reg_rtx (SImode);
4363 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4364 emit_insn (gen_addsi3(reg, reg, tmp));
4365 emit_move_insn (reg, gen_const_mem (SImode, reg));
4367 else
4369 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4370 emit_move_insn (reg, gen_const_mem (SImode, reg));
4373 tp = arm_load_tp (NULL_RTX);
4375 return gen_rtx_PLUS (Pmode, tp, reg);
4377 case TLS_MODEL_LOCAL_EXEC:
4378 tp = arm_load_tp (NULL_RTX);
4380 reg = gen_rtx_UNSPEC (Pmode,
4381 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4382 UNSPEC_TLS);
4383 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4385 return gen_rtx_PLUS (Pmode, tp, reg);
4387 default:
4388 abort ();
4392 /* Try machine-dependent ways of modifying an illegitimate address
4393 to be legitimate. If we find one, return the new, valid address. */
4395 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4397 if (arm_tls_symbol_p (x))
4398 return legitimize_tls_address (x, NULL_RTX);
4400 if (GET_CODE (x) == PLUS)
4402 rtx xop0 = XEXP (x, 0);
4403 rtx xop1 = XEXP (x, 1);
4405 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4406 xop0 = force_reg (SImode, xop0);
4408 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4409 xop1 = force_reg (SImode, xop1);
4411 if (ARM_BASE_REGISTER_RTX_P (xop0)
4412 && GET_CODE (xop1) == CONST_INT)
4414 HOST_WIDE_INT n, low_n;
4415 rtx base_reg, val;
4416 n = INTVAL (xop1);
4418 /* VFP addressing modes actually allow greater offsets, but for
4419 now we just stick with the lowest common denominator. */
4420 if (mode == DImode
4421 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4423 low_n = n & 0x0f;
4424 n &= ~0x0f;
4425 if (low_n > 4)
4427 n += 16;
4428 low_n -= 16;
4431 else
4433 low_n = ((mode) == TImode ? 0
4434 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4435 n -= low_n;
4438 base_reg = gen_reg_rtx (SImode);
4439 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4440 emit_move_insn (base_reg, val);
4441 x = plus_constant (base_reg, low_n);
4443 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4444 x = gen_rtx_PLUS (SImode, xop0, xop1);
4447 /* XXX We don't allow MINUS any more -- see comment in
4448 arm_legitimate_address_p (). */
4449 else if (GET_CODE (x) == MINUS)
4451 rtx xop0 = XEXP (x, 0);
4452 rtx xop1 = XEXP (x, 1);
4454 if (CONSTANT_P (xop0))
4455 xop0 = force_reg (SImode, xop0);
4457 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4458 xop1 = force_reg (SImode, xop1);
4460 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4461 x = gen_rtx_MINUS (SImode, xop0, xop1);
4464 /* Make sure to take full advantage of the pre-indexed addressing mode
4465 with absolute addresses which often allows for the base register to
4466 be factorized for multiple adjacent memory references, and it might
4467 even allows for the mini pool to be avoided entirely. */
4468 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4470 unsigned int bits;
4471 HOST_WIDE_INT mask, base, index;
4472 rtx base_reg;
4474 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4475 use a 8-bit index. So let's use a 12-bit index for SImode only and
4476 hope that arm_gen_constant will enable ldrb to use more bits. */
4477 bits = (mode == SImode) ? 12 : 8;
4478 mask = (1 << bits) - 1;
4479 base = INTVAL (x) & ~mask;
4480 index = INTVAL (x) & mask;
4481 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4483 /* It'll most probably be more efficient to generate the base
4484 with more bits set and use a negative index instead. */
4485 base |= mask;
4486 index -= mask;
4488 base_reg = force_reg (SImode, GEN_INT (base));
4489 x = plus_constant (base_reg, index);
4492 if (flag_pic)
4494 /* We need to find and carefully transform any SYMBOL and LABEL
4495 references; so go back to the original address expression. */
4496 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4498 if (new_x != orig_x)
4499 x = new_x;
4502 return x;
4506 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4507 to be legitimate. If we find one, return the new, valid address. */
4509 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4511 if (arm_tls_symbol_p (x))
4512 return legitimize_tls_address (x, NULL_RTX);
4514 if (GET_CODE (x) == PLUS
4515 && GET_CODE (XEXP (x, 1)) == CONST_INT
4516 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4517 || INTVAL (XEXP (x, 1)) < 0))
4519 rtx xop0 = XEXP (x, 0);
4520 rtx xop1 = XEXP (x, 1);
4521 HOST_WIDE_INT offset = INTVAL (xop1);
4523 /* Try and fold the offset into a biasing of the base register and
4524 then offsetting that. Don't do this when optimizing for space
4525 since it can cause too many CSEs. */
4526 if (optimize_size && offset >= 0
4527 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4529 HOST_WIDE_INT delta;
4531 if (offset >= 256)
4532 delta = offset - (256 - GET_MODE_SIZE (mode));
4533 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4534 delta = 31 * GET_MODE_SIZE (mode);
4535 else
4536 delta = offset & (~31 * GET_MODE_SIZE (mode));
4538 xop0 = force_operand (plus_constant (xop0, offset - delta),
4539 NULL_RTX);
4540 x = plus_constant (xop0, delta);
4542 else if (offset < 0 && offset > -256)
4543 /* Small negative offsets are best done with a subtract before the
4544 dereference, forcing these into a register normally takes two
4545 instructions. */
4546 x = force_operand (x, NULL_RTX);
4547 else
4549 /* For the remaining cases, force the constant into a register. */
4550 xop1 = force_reg (SImode, xop1);
4551 x = gen_rtx_PLUS (SImode, xop0, xop1);
4554 else if (GET_CODE (x) == PLUS
4555 && s_register_operand (XEXP (x, 1), SImode)
4556 && !s_register_operand (XEXP (x, 0), SImode))
4558 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4560 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4563 if (flag_pic)
4565 /* We need to find and carefully transform any SYMBOL and LABEL
4566 references; so go back to the original address expression. */
4567 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4569 if (new_x != orig_x)
4570 x = new_x;
4573 return x;
4577 thumb_legitimize_reload_address (rtx *x_p,
4578 enum machine_mode mode,
4579 int opnum, int type,
4580 int ind_levels ATTRIBUTE_UNUSED)
4582 rtx x = *x_p;
4584 if (GET_CODE (x) == PLUS
4585 && GET_MODE_SIZE (mode) < 4
4586 && REG_P (XEXP (x, 0))
4587 && XEXP (x, 0) == stack_pointer_rtx
4588 && GET_CODE (XEXP (x, 1)) == CONST_INT
4589 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4591 rtx orig_x = x;
4593 x = copy_rtx (x);
4594 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4595 Pmode, VOIDmode, 0, 0, opnum, type);
4596 return x;
4599 /* If both registers are hi-regs, then it's better to reload the
4600 entire expression rather than each register individually. That
4601 only requires one reload register rather than two. */
4602 if (GET_CODE (x) == PLUS
4603 && REG_P (XEXP (x, 0))
4604 && REG_P (XEXP (x, 1))
4605 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4606 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4608 rtx orig_x = x;
4610 x = copy_rtx (x);
4611 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4612 Pmode, VOIDmode, 0, 0, opnum, type);
4613 return x;
4616 return NULL;
4619 /* Test for various thread-local symbols. */
4621 /* Return TRUE if X is a thread-local symbol. */
4623 static bool
4624 arm_tls_symbol_p (rtx x)
4626 if (! TARGET_HAVE_TLS)
4627 return false;
4629 if (GET_CODE (x) != SYMBOL_REF)
4630 return false;
4632 return SYMBOL_REF_TLS_MODEL (x) != 0;
4635 /* Helper for arm_tls_referenced_p. */
4637 static int
4638 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4640 if (GET_CODE (*x) == SYMBOL_REF)
4641 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4643 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4644 TLS offsets, not real symbol references. */
4645 if (GET_CODE (*x) == UNSPEC
4646 && XINT (*x, 1) == UNSPEC_TLS)
4647 return -1;
4649 return 0;
4652 /* Return TRUE if X contains any TLS symbol references. */
4654 bool
4655 arm_tls_referenced_p (rtx x)
4657 if (! TARGET_HAVE_TLS)
4658 return false;
4660 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4663 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4665 bool
4666 arm_cannot_force_const_mem (rtx x)
4668 rtx base, offset;
4670 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4672 split_const (x, &base, &offset);
4673 if (GET_CODE (base) == SYMBOL_REF
4674 && !offset_within_block_p (base, INTVAL (offset)))
4675 return true;
4677 return arm_tls_referenced_p (x);
4680 #define REG_OR_SUBREG_REG(X) \
4681 (GET_CODE (X) == REG \
4682 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4684 #define REG_OR_SUBREG_RTX(X) \
4685 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4687 #ifndef COSTS_N_INSNS
4688 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4689 #endif
4690 static inline int
4691 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4693 enum machine_mode mode = GET_MODE (x);
4695 switch (code)
4697 case ASHIFT:
4698 case ASHIFTRT:
4699 case LSHIFTRT:
4700 case ROTATERT:
4701 case PLUS:
4702 case MINUS:
4703 case COMPARE:
4704 case NEG:
4705 case NOT:
4706 return COSTS_N_INSNS (1);
4708 case MULT:
4709 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4711 int cycles = 0;
4712 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4714 while (i)
4716 i >>= 2;
4717 cycles++;
4719 return COSTS_N_INSNS (2) + cycles;
4721 return COSTS_N_INSNS (1) + 16;
4723 case SET:
4724 return (COSTS_N_INSNS (1)
4725 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4726 + GET_CODE (SET_DEST (x)) == MEM));
4728 case CONST_INT:
4729 if (outer == SET)
4731 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4732 return 0;
4733 if (thumb_shiftable_const (INTVAL (x)))
4734 return COSTS_N_INSNS (2);
4735 return COSTS_N_INSNS (3);
4737 else if ((outer == PLUS || outer == COMPARE)
4738 && INTVAL (x) < 256 && INTVAL (x) > -256)
4739 return 0;
4740 else if (outer == AND
4741 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4742 return COSTS_N_INSNS (1);
4743 else if (outer == ASHIFT || outer == ASHIFTRT
4744 || outer == LSHIFTRT)
4745 return 0;
4746 return COSTS_N_INSNS (2);
4748 case CONST:
4749 case CONST_DOUBLE:
4750 case LABEL_REF:
4751 case SYMBOL_REF:
4752 return COSTS_N_INSNS (3);
4754 case UDIV:
4755 case UMOD:
4756 case DIV:
4757 case MOD:
4758 return 100;
4760 case TRUNCATE:
4761 return 99;
4763 case AND:
4764 case XOR:
4765 case IOR:
4766 /* XXX guess. */
4767 return 8;
4769 case MEM:
4770 /* XXX another guess. */
4771 /* Memory costs quite a lot for the first word, but subsequent words
4772 load at the equivalent of a single insn each. */
4773 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4774 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4775 ? 4 : 0));
4777 case IF_THEN_ELSE:
4778 /* XXX a guess. */
4779 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4780 return 14;
4781 return 2;
4783 case ZERO_EXTEND:
4784 /* XXX still guessing. */
4785 switch (GET_MODE (XEXP (x, 0)))
4787 case QImode:
4788 return (1 + (mode == DImode ? 4 : 0)
4789 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4791 case HImode:
4792 return (4 + (mode == DImode ? 4 : 0)
4793 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4795 case SImode:
4796 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4798 default:
4799 return 99;
4802 default:
4803 return 99;
4808 /* Worker routine for arm_rtx_costs. */
4809 /* ??? This needs updating for thumb2. */
4810 static inline int
4811 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4813 enum machine_mode mode = GET_MODE (x);
4814 enum rtx_code subcode;
4815 int extra_cost;
4817 switch (code)
4819 case MEM:
4820 /* Memory costs quite a lot for the first word, but subsequent words
4821 load at the equivalent of a single insn each. */
4822 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4823 + (GET_CODE (x) == SYMBOL_REF
4824 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4826 case DIV:
4827 case MOD:
4828 case UDIV:
4829 case UMOD:
4830 return optimize_size ? COSTS_N_INSNS (2) : 100;
4832 case ROTATE:
4833 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4834 return 4;
4835 /* Fall through */
4836 case ROTATERT:
4837 if (mode != SImode)
4838 return 8;
4839 /* Fall through */
4840 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4841 if (mode == DImode)
4842 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4843 + ((GET_CODE (XEXP (x, 0)) == REG
4844 || (GET_CODE (XEXP (x, 0)) == SUBREG
4845 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4846 ? 0 : 8));
4847 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4848 || (GET_CODE (XEXP (x, 0)) == SUBREG
4849 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4850 ? 0 : 4)
4851 + ((GET_CODE (XEXP (x, 1)) == REG
4852 || (GET_CODE (XEXP (x, 1)) == SUBREG
4853 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4854 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4855 ? 0 : 4));
4857 case MINUS:
4858 if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
4860 extra_cost = rtx_cost (XEXP (x, 1), code);
4861 if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
4862 extra_cost += 4 * ARM_NUM_REGS (mode);
4863 return extra_cost;
4866 if (mode == DImode)
4867 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4868 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4869 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4870 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4871 ? 0 : 8));
4873 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4874 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4875 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4876 && arm_const_double_rtx (XEXP (x, 1))))
4877 ? 0 : 8)
4878 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4879 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4880 && arm_const_double_rtx (XEXP (x, 0))))
4881 ? 0 : 8));
4883 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4884 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4885 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4886 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4887 || subcode == ASHIFTRT || subcode == LSHIFTRT
4888 || subcode == ROTATE || subcode == ROTATERT
4889 || (subcode == MULT
4890 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4891 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4892 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4893 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4894 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4895 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4896 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4897 return 1;
4898 /* Fall through */
4900 case PLUS:
4901 if (GET_CODE (XEXP (x, 0)) == MULT)
4903 extra_cost = rtx_cost (XEXP (x, 0), code);
4904 if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4905 extra_cost += 4 * ARM_NUM_REGS (mode);
4906 return extra_cost;
4909 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4910 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4911 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4912 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4913 && arm_const_double_rtx (XEXP (x, 1))))
4914 ? 0 : 8));
4916 /* Fall through */
4917 case AND: case XOR: case IOR:
4918 extra_cost = 0;
4920 /* Normally the frame registers will be spilt into reg+const during
4921 reload, so it is a bad idea to combine them with other instructions,
4922 since then they might not be moved outside of loops. As a compromise
4923 we allow integration with ops that have a constant as their second
4924 operand. */
4925 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4926 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4927 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4928 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4929 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4930 extra_cost = 4;
4932 if (mode == DImode)
4933 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4934 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4935 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4936 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4937 ? 0 : 8));
4939 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4940 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4941 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4942 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4943 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4944 ? 0 : 4));
4946 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4947 return (1 + extra_cost
4948 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4949 || subcode == LSHIFTRT || subcode == ASHIFTRT
4950 || subcode == ROTATE || subcode == ROTATERT
4951 || (subcode == MULT
4952 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4953 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4954 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4955 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4956 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4957 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4958 ? 0 : 4));
4960 return 8;
4962 case MULT:
4963 /* This should have been handled by the CPU specific routines. */
4964 gcc_unreachable ();
4966 case TRUNCATE:
4967 if (arm_arch3m && mode == SImode
4968 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4969 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4970 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4971 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4972 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4973 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4974 return 8;
4975 return 99;
4977 case NEG:
4978 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4979 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4980 /* Fall through */
4981 case NOT:
4982 if (mode == DImode)
4983 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4985 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4987 case IF_THEN_ELSE:
4988 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4989 return 14;
4990 return 2;
4992 case COMPARE:
4993 return 1;
4995 case ABS:
4996 return 4 + (mode == DImode ? 4 : 0);
4998 case SIGN_EXTEND:
4999 /* ??? value extensions are cheaper on armv6. */
5000 if (GET_MODE (XEXP (x, 0)) == QImode)
5001 return (4 + (mode == DImode ? 4 : 0)
5002 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5003 /* Fall through */
5004 case ZERO_EXTEND:
5005 switch (GET_MODE (XEXP (x, 0)))
5007 case QImode:
5008 return (1 + (mode == DImode ? 4 : 0)
5009 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5011 case HImode:
5012 return (4 + (mode == DImode ? 4 : 0)
5013 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5015 case SImode:
5016 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5018 case V8QImode:
5019 case V4HImode:
5020 case V2SImode:
5021 case V4QImode:
5022 case V2HImode:
5023 return 1;
5025 default:
5026 gcc_unreachable ();
5028 gcc_unreachable ();
5030 case CONST_INT:
5031 if (const_ok_for_arm (INTVAL (x)))
5032 return outer == SET ? 2 : -1;
5033 else if (outer == AND
5034 && const_ok_for_arm (~INTVAL (x)))
5035 return -1;
5036 else if ((outer == COMPARE
5037 || outer == PLUS || outer == MINUS)
5038 && const_ok_for_arm (-INTVAL (x)))
5039 return -1;
5040 else
5041 return 5;
5043 case CONST:
5044 case LABEL_REF:
5045 case SYMBOL_REF:
5046 return 6;
5048 case CONST_DOUBLE:
5049 if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
5050 return outer == SET ? 2 : -1;
5051 else if ((outer == COMPARE || outer == PLUS)
5052 && neg_const_double_rtx_ok_for_fpa (x))
5053 return -1;
5054 return 7;
5056 default:
5057 return 99;
5061 /* RTX costs when optimizing for size. */
5062 static bool
5063 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
5065 enum machine_mode mode = GET_MODE (x);
5067 if (TARGET_THUMB)
5069 /* XXX TBD. For now, use the standard costs. */
5070 *total = thumb1_rtx_costs (x, code, outer_code);
5071 return true;
5074 switch (code)
5076 case MEM:
5077 /* A memory access costs 1 insn if the mode is small, or the address is
5078 a single register, otherwise it costs one insn per word. */
5079 if (REG_P (XEXP (x, 0)))
5080 *total = COSTS_N_INSNS (1);
5081 else
5082 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5083 return true;
5085 case DIV:
5086 case MOD:
5087 case UDIV:
5088 case UMOD:
5089 /* Needs a libcall, so it costs about this. */
5090 *total = COSTS_N_INSNS (2);
5091 return false;
5093 case ROTATE:
5094 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5096 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
5097 return true;
5099 /* Fall through */
5100 case ROTATERT:
5101 case ASHIFT:
5102 case LSHIFTRT:
5103 case ASHIFTRT:
5104 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5106 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
5107 return true;
5109 else if (mode == SImode)
5111 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
5112 /* Slightly disparage register shifts, but not by much. */
5113 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5114 *total += 1 + rtx_cost (XEXP (x, 1), code);
5115 return true;
5118 /* Needs a libcall. */
5119 *total = COSTS_N_INSNS (2);
5120 return false;
5122 case MINUS:
5123 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5125 *total = COSTS_N_INSNS (1);
5126 return false;
5129 if (mode == SImode)
5131 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5132 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5134 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5135 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5136 || subcode1 == ROTATE || subcode1 == ROTATERT
5137 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5138 || subcode1 == ASHIFTRT)
5140 /* It's just the cost of the two operands. */
5141 *total = 0;
5142 return false;
5145 *total = COSTS_N_INSNS (1);
5146 return false;
5149 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5150 return false;
5152 case PLUS:
5153 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5155 *total = COSTS_N_INSNS (1);
5156 return false;
5159 /* Fall through */
5160 case AND: case XOR: case IOR:
5161 if (mode == SImode)
5163 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5165 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5166 || subcode == LSHIFTRT || subcode == ASHIFTRT
5167 || (code == AND && subcode == NOT))
5169 /* It's just the cost of the two operands. */
5170 *total = 0;
5171 return false;
5175 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5176 return false;
5178 case MULT:
5179 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5180 return false;
5182 case NEG:
5183 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5184 *total = COSTS_N_INSNS (1);
5185 /* Fall through */
5186 case NOT:
5187 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5189 return false;
5191 case IF_THEN_ELSE:
5192 *total = 0;
5193 return false;
5195 case COMPARE:
5196 if (cc_register (XEXP (x, 0), VOIDmode))
5197 * total = 0;
5198 else
5199 *total = COSTS_N_INSNS (1);
5200 return false;
5202 case ABS:
5203 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5204 *total = COSTS_N_INSNS (1);
5205 else
5206 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5207 return false;
5209 case SIGN_EXTEND:
5210 *total = 0;
5211 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5213 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5214 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5216 if (mode == DImode)
5217 *total += COSTS_N_INSNS (1);
5218 return false;
5220 case ZERO_EXTEND:
5221 *total = 0;
5222 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5224 switch (GET_MODE (XEXP (x, 0)))
5226 case QImode:
5227 *total += COSTS_N_INSNS (1);
5228 break;
5230 case HImode:
5231 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5233 case SImode:
5234 break;
5236 default:
5237 *total += COSTS_N_INSNS (2);
5241 if (mode == DImode)
5242 *total += COSTS_N_INSNS (1);
5244 return false;
5246 case CONST_INT:
5247 if (const_ok_for_arm (INTVAL (x)))
5248 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5249 else if (const_ok_for_arm (~INTVAL (x)))
5250 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5251 else if (const_ok_for_arm (-INTVAL (x)))
5253 if (outer_code == COMPARE || outer_code == PLUS
5254 || outer_code == MINUS)
5255 *total = 0;
5256 else
5257 *total = COSTS_N_INSNS (1);
5259 else
5260 *total = COSTS_N_INSNS (2);
5261 return true;
5263 case CONST:
5264 case LABEL_REF:
5265 case SYMBOL_REF:
5266 *total = COSTS_N_INSNS (2);
5267 return true;
5269 case CONST_DOUBLE:
5270 *total = COSTS_N_INSNS (4);
5271 return true;
5273 default:
5274 if (mode != VOIDmode)
5275 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5276 else
5277 *total = COSTS_N_INSNS (4); /* How knows? */
5278 return false;
5282 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5283 supported on any "slowmul" cores, so it can be ignored. */
5285 static bool
5286 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5288 enum machine_mode mode = GET_MODE (x);
5290 if (TARGET_THUMB)
5292 *total = thumb1_rtx_costs (x, code, outer_code);
5293 return true;
5296 switch (code)
5298 case MULT:
5299 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5300 || mode == DImode)
5302 *total = 30;
5303 return true;
5306 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5308 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5309 & (unsigned HOST_WIDE_INT) 0xffffffff);
5310 int cost, const_ok = const_ok_for_arm (i);
5311 int j, booth_unit_size;
5313 /* Tune as appropriate. */
5314 cost = const_ok ? 4 : 8;
5315 booth_unit_size = 2;
5316 for (j = 0; i && j < 32; j += booth_unit_size)
5318 i >>= booth_unit_size;
5319 cost += 2;
5322 *total = cost;
5323 return true;
5326 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5327 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5328 return true;
5330 default:
5331 *total = arm_rtx_costs_1 (x, code, outer_code);
5332 return true;
5337 /* RTX cost for cores with a fast multiply unit (M variants). */
5339 static bool
5340 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5342 enum machine_mode mode = GET_MODE (x);
5344 if (TARGET_THUMB1)
5346 *total = thumb1_rtx_costs (x, code, outer_code);
5347 return true;
5350 /* ??? should thumb2 use different costs? */
5351 switch (code)
5353 case MULT:
5354 /* There is no point basing this on the tuning, since it is always the
5355 fast variant if it exists at all. */
5356 if (mode == DImode
5357 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5358 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5359 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5361 *total = 8;
5362 return true;
5366 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5367 || mode == DImode)
5369 *total = 30;
5370 return true;
5373 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5375 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5376 & (unsigned HOST_WIDE_INT) 0xffffffff);
5377 int cost, const_ok = const_ok_for_arm (i);
5378 int j, booth_unit_size;
5380 /* Tune as appropriate. */
5381 cost = const_ok ? 4 : 8;
5382 booth_unit_size = 8;
5383 for (j = 0; i && j < 32; j += booth_unit_size)
5385 i >>= booth_unit_size;
5386 cost += 2;
5389 *total = cost;
5390 return true;
5393 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5394 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5395 return true;
5397 default:
5398 *total = arm_rtx_costs_1 (x, code, outer_code);
5399 return true;
5404 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5405 so it can be ignored. */
5407 static bool
5408 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
5410 enum machine_mode mode = GET_MODE (x);
5412 if (TARGET_THUMB)
5414 *total = thumb1_rtx_costs (x, code, outer_code);
5415 return true;
5418 switch (code)
5420 case MULT:
5421 /* There is no point basing this on the tuning, since it is always the
5422 fast variant if it exists at all. */
5423 if (mode == DImode
5424 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5425 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5426 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5428 *total = 8;
5429 return true;
5433 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5434 || mode == DImode)
5436 *total = 30;
5437 return true;
5440 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5442 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5443 & (unsigned HOST_WIDE_INT) 0xffffffff);
5444 int cost, const_ok = const_ok_for_arm (i);
5445 unsigned HOST_WIDE_INT masked_const;
5447 /* The cost will be related to two insns.
5448 First a load of the constant (MOV or LDR), then a multiply. */
5449 cost = 2;
5450 if (! const_ok)
5451 cost += 1; /* LDR is probably more expensive because
5452 of longer result latency. */
5453 masked_const = i & 0xffff8000;
5454 if (masked_const != 0 && masked_const != 0xffff8000)
5456 masked_const = i & 0xf8000000;
5457 if (masked_const == 0 || masked_const == 0xf8000000)
5458 cost += 1;
5459 else
5460 cost += 2;
5462 *total = cost;
5463 return true;
5466 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5467 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5468 return true;
5470 case COMPARE:
5471 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5472 will stall until the multiplication is complete. */
5473 if (GET_CODE (XEXP (x, 0)) == MULT)
5474 *total = 4 + rtx_cost (XEXP (x, 0), code);
5475 else
5476 *total = arm_rtx_costs_1 (x, code, outer_code);
5477 return true;
5479 default:
5480 *total = arm_rtx_costs_1 (x, code, outer_code);
5481 return true;
5486 /* RTX costs for 9e (and later) cores. */
5488 static bool
5489 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5491 enum machine_mode mode = GET_MODE (x);
5492 int nonreg_cost;
5493 int cost;
5495 if (TARGET_THUMB1)
5497 switch (code)
5499 case MULT:
5500 *total = COSTS_N_INSNS (3);
5501 return true;
5503 default:
5504 *total = thumb1_rtx_costs (x, code, outer_code);
5505 return true;
5509 switch (code)
5511 case MULT:
5512 /* There is no point basing this on the tuning, since it is always the
5513 fast variant if it exists at all. */
5514 if (mode == DImode
5515 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5516 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5517 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5519 *total = 3;
5520 return true;
5524 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5526 *total = 30;
5527 return true;
5529 if (mode == DImode)
5531 cost = 7;
5532 nonreg_cost = 8;
5534 else
5536 cost = 2;
5537 nonreg_cost = 4;
5541 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5542 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5543 return true;
5545 default:
5546 *total = arm_rtx_costs_1 (x, code, outer_code);
5547 return true;
5550 /* All address computations that can be done are free, but rtx cost returns
5551 the same for practically all of them. So we weight the different types
5552 of address here in the order (most pref first):
5553 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5554 static inline int
5555 arm_arm_address_cost (rtx x)
5557 enum rtx_code c = GET_CODE (x);
5559 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5560 return 0;
5561 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5562 return 10;
5564 if (c == PLUS || c == MINUS)
5566 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5567 return 2;
5569 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5570 return 3;
5572 return 4;
5575 return 6;
5578 static inline int
5579 arm_thumb_address_cost (rtx x)
5581 enum rtx_code c = GET_CODE (x);
5583 if (c == REG)
5584 return 1;
5585 if (c == PLUS
5586 && GET_CODE (XEXP (x, 0)) == REG
5587 && GET_CODE (XEXP (x, 1)) == CONST_INT)
5588 return 1;
5590 return 2;
5593 static int
5594 arm_address_cost (rtx x)
5596 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5599 static int
5600 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5602 rtx i_pat, d_pat;
5604 /* Some true dependencies can have a higher cost depending
5605 on precisely how certain input operands are used. */
5606 if (arm_tune_xscale
5607 && REG_NOTE_KIND (link) == 0
5608 && recog_memoized (insn) >= 0
5609 && recog_memoized (dep) >= 0)
5611 int shift_opnum = get_attr_shift (insn);
5612 enum attr_type attr_type = get_attr_type (dep);
5614 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5615 operand for INSN. If we have a shifted input operand and the
5616 instruction we depend on is another ALU instruction, then we may
5617 have to account for an additional stall. */
5618 if (shift_opnum != 0
5619 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5621 rtx shifted_operand;
5622 int opno;
5624 /* Get the shifted operand. */
5625 extract_insn (insn);
5626 shifted_operand = recog_data.operand[shift_opnum];
5628 /* Iterate over all the operands in DEP. If we write an operand
5629 that overlaps with SHIFTED_OPERAND, then we have increase the
5630 cost of this dependency. */
5631 extract_insn (dep);
5632 preprocess_constraints ();
5633 for (opno = 0; opno < recog_data.n_operands; opno++)
5635 /* We can ignore strict inputs. */
5636 if (recog_data.operand_type[opno] == OP_IN)
5637 continue;
5639 if (reg_overlap_mentioned_p (recog_data.operand[opno],
5640 shifted_operand))
5641 return 2;
5646 /* XXX This is not strictly true for the FPA. */
5647 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5648 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5649 return 0;
5651 /* Call insns don't incur a stall, even if they follow a load. */
5652 if (REG_NOTE_KIND (link) == 0
5653 && GET_CODE (insn) == CALL_INSN)
5654 return 1;
5656 if ((i_pat = single_set (insn)) != NULL
5657 && GET_CODE (SET_SRC (i_pat)) == MEM
5658 && (d_pat = single_set (dep)) != NULL
5659 && GET_CODE (SET_DEST (d_pat)) == MEM)
5661 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5662 /* This is a load after a store, there is no conflict if the load reads
5663 from a cached area. Assume that loads from the stack, and from the
5664 constant pool are cached, and that others will miss. This is a
5665 hack. */
5667 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5668 || reg_mentioned_p (stack_pointer_rtx, src_mem)
5669 || reg_mentioned_p (frame_pointer_rtx, src_mem)
5670 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5671 return 1;
5674 return cost;
5677 static int fp_consts_inited = 0;
5679 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5680 static const char * const strings_fp[8] =
5682 "0", "1", "2", "3",
5683 "4", "5", "0.5", "10"
5686 static REAL_VALUE_TYPE values_fp[8];
5688 static void
5689 init_fp_table (void)
5691 int i;
5692 REAL_VALUE_TYPE r;
5694 if (TARGET_VFP)
5695 fp_consts_inited = 1;
5696 else
5697 fp_consts_inited = 8;
5699 for (i = 0; i < fp_consts_inited; i++)
5701 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5702 values_fp[i] = r;
5706 /* Return TRUE if rtx X is a valid immediate FP constant. */
5708 arm_const_double_rtx (rtx x)
5710 REAL_VALUE_TYPE r;
5711 int i;
5713 if (!fp_consts_inited)
5714 init_fp_table ();
5716 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5717 if (REAL_VALUE_MINUS_ZERO (r))
5718 return 0;
5720 for (i = 0; i < fp_consts_inited; i++)
5721 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5722 return 1;
5724 return 0;
5727 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5729 neg_const_double_rtx_ok_for_fpa (rtx x)
5731 REAL_VALUE_TYPE r;
5732 int i;
5734 if (!fp_consts_inited)
5735 init_fp_table ();
5737 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5738 r = REAL_VALUE_NEGATE (r);
5739 if (REAL_VALUE_MINUS_ZERO (r))
5740 return 0;
5742 for (i = 0; i < 8; i++)
5743 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5744 return 1;
5746 return 0;
5750 /* VFPv3 has a fairly wide range of representable immediates, formed from
5751 "quarter-precision" floating-point values. These can be evaluated using this
5752 formula (with ^ for exponentiation):
5754 -1^s * n * 2^-r
5756 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5757 16 <= n <= 31 and 0 <= r <= 7.
5759 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5761 - A (most-significant) is the sign bit.
5762 - BCD are the exponent (encoded as r XOR 3).
5763 - EFGH are the mantissa (encoded as n - 16).
5766 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5767 fconst[sd] instruction, or -1 if X isn't suitable. */
5768 static int
5769 vfp3_const_double_index (rtx x)
5771 REAL_VALUE_TYPE r, m;
5772 int sign, exponent;
5773 unsigned HOST_WIDE_INT mantissa, mant_hi;
5774 unsigned HOST_WIDE_INT mask;
5775 HOST_WIDE_INT m1, m2;
5776 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
5778 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
5779 return -1;
5781 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5783 /* We can't represent these things, so detect them first. */
5784 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
5785 return -1;
5787 /* Extract sign, exponent and mantissa. */
5788 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
5789 r = REAL_VALUE_ABS (r);
5790 exponent = REAL_EXP (&r);
5791 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5792 highest (sign) bit, with a fixed binary point at bit point_pos.
5793 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5794 bits for the mantissa, this may fail (low bits would be lost). */
5795 real_ldexp (&m, &r, point_pos - exponent);
5796 REAL_VALUE_TO_INT (&m1, &m2, m);
5797 mantissa = m1;
5798 mant_hi = m2;
5800 /* If there are bits set in the low part of the mantissa, we can't
5801 represent this value. */
5802 if (mantissa != 0)
5803 return -1;
5805 /* Now make it so that mantissa contains the most-significant bits, and move
5806 the point_pos to indicate that the least-significant bits have been
5807 discarded. */
5808 point_pos -= HOST_BITS_PER_WIDE_INT;
5809 mantissa = mant_hi;
5811 /* We can permit four significant bits of mantissa only, plus a high bit
5812 which is always 1. */
5813 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
5814 if ((mantissa & mask) != 0)
5815 return -1;
5817 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5818 mantissa >>= point_pos - 5;
5820 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5821 floating-point immediate zero with Neon using an integer-zero load, but
5822 that case is handled elsewhere.) */
5823 if (mantissa == 0)
5824 return -1;
5826 gcc_assert (mantissa >= 16 && mantissa <= 31);
5828 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5829 normalized significands are in the range [1, 2). (Our mantissa is shifted
5830 left 4 places at this point relative to normalized IEEE754 values). GCC
5831 internally uses [0.5, 1) (see real.c), so the exponent returned from
5832 REAL_EXP must be altered. */
5833 exponent = 5 - exponent;
5835 if (exponent < 0 || exponent > 7)
5836 return -1;
5838 /* Sign, mantissa and exponent are now in the correct form to plug into the
5839 formula described in the comment above. */
5840 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
5843 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5845 vfp3_const_double_rtx (rtx x)
5847 if (!TARGET_VFP3)
5848 return 0;
5850 return vfp3_const_double_index (x) != -1;
5853 /* Recognize immediates which can be used in various Neon instructions. Legal
5854 immediates are described by the following table (for VMVN variants, the
5855 bitwise inverse of the constant shown is recognized. In either case, VMOV
5856 is output and the correct instruction to use for a given constant is chosen
5857 by the assembler). The constant shown is replicated across all elements of
5858 the destination vector.
5860 insn elems variant constant (binary)
5861 ---- ----- ------- -----------------
5862 vmov i32 0 00000000 00000000 00000000 abcdefgh
5863 vmov i32 1 00000000 00000000 abcdefgh 00000000
5864 vmov i32 2 00000000 abcdefgh 00000000 00000000
5865 vmov i32 3 abcdefgh 00000000 00000000 00000000
5866 vmov i16 4 00000000 abcdefgh
5867 vmov i16 5 abcdefgh 00000000
5868 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5869 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5870 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5871 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5872 vmvn i16 10 00000000 abcdefgh
5873 vmvn i16 11 abcdefgh 00000000
5874 vmov i32 12 00000000 00000000 abcdefgh 11111111
5875 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5876 vmov i32 14 00000000 abcdefgh 11111111 11111111
5877 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5878 vmov i8 16 abcdefgh
5879 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5880 eeeeeeee ffffffff gggggggg hhhhhhhh
5881 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
5883 For case 18, B = !b. Representable values are exactly those accepted by
5884 vfp3_const_double_index, but are output as floating-point numbers rather
5885 than indices.
5887 Variants 0-5 (inclusive) may also be used as immediates for the second
5888 operand of VORR/VBIC instructions.
5890 The INVERSE argument causes the bitwise inverse of the given operand to be
5891 recognized instead (used for recognizing legal immediates for the VAND/VORN
5892 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
5893 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
5894 output, rather than the real insns vbic/vorr).
5896 INVERSE makes no difference to the recognition of float vectors.
5898 The return value is the variant of immediate as shown in the above table, or
5899 -1 if the given value doesn't match any of the listed patterns.
5901 static int
5902 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5903 rtx *modconst, int *elementwidth)
5905 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
5906 matches = 1; \
5907 for (i = 0; i < idx; i += (STRIDE)) \
5908 if (!(TEST)) \
5909 matches = 0; \
5910 if (matches) \
5912 immtype = (CLASS); \
5913 elsize = (ELSIZE); \
5914 break; \
5917 unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5918 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5919 unsigned char bytes[16];
5920 int immtype = -1, matches;
5921 unsigned int invmask = inverse ? 0xff : 0;
5923 /* Vectors of float constants. */
5924 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5926 rtx el0 = CONST_VECTOR_ELT (op, 0);
5927 REAL_VALUE_TYPE r0;
5929 if (!vfp3_const_double_rtx (el0))
5930 return -1;
5932 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
5934 for (i = 1; i < n_elts; i++)
5936 rtx elt = CONST_VECTOR_ELT (op, i);
5937 REAL_VALUE_TYPE re;
5939 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
5941 if (!REAL_VALUES_EQUAL (r0, re))
5942 return -1;
5945 if (modconst)
5946 *modconst = CONST_VECTOR_ELT (op, 0);
5948 if (elementwidth)
5949 *elementwidth = 0;
5951 return 18;
5954 /* Splat vector constant out into a byte vector. */
5955 for (i = 0; i < n_elts; i++)
5957 rtx el = CONST_VECTOR_ELT (op, i);
5958 unsigned HOST_WIDE_INT elpart;
5959 unsigned int part, parts;
5961 if (GET_CODE (el) == CONST_INT)
5963 elpart = INTVAL (el);
5964 parts = 1;
5966 else if (GET_CODE (el) == CONST_DOUBLE)
5968 elpart = CONST_DOUBLE_LOW (el);
5969 parts = 2;
5971 else
5972 gcc_unreachable ();
5974 for (part = 0; part < parts; part++)
5976 unsigned int byte;
5977 for (byte = 0; byte < innersize; byte++)
5979 bytes[idx++] = (elpart & 0xff) ^ invmask;
5980 elpart >>= BITS_PER_UNIT;
5982 if (GET_CODE (el) == CONST_DOUBLE)
5983 elpart = CONST_DOUBLE_HIGH (el);
5987 /* Sanity check. */
5988 gcc_assert (idx == GET_MODE_SIZE (mode));
5992 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
5993 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
5995 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
5996 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
5998 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
5999 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6001 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6002 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6004 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6006 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6008 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6009 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6011 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6012 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6014 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6015 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6017 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6018 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6020 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6022 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6024 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6025 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6027 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6028 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6030 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6031 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6033 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6034 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6036 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6038 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6039 && bytes[i] == bytes[(i + 8) % idx]);
6041 while (0);
6043 if (immtype == -1)
6044 return -1;
6046 if (elementwidth)
6047 *elementwidth = elsize;
6049 if (modconst)
6051 unsigned HOST_WIDE_INT imm = 0;
6053 /* Un-invert bytes of recognized vector, if necessary. */
6054 if (invmask != 0)
6055 for (i = 0; i < idx; i++)
6056 bytes[i] ^= invmask;
6058 if (immtype == 17)
6060 /* FIXME: Broken on 32-bit H_W_I hosts. */
6061 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6063 for (i = 0; i < 8; i++)
6064 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6065 << (i * BITS_PER_UNIT);
6067 *modconst = GEN_INT (imm);
6069 else
6071 unsigned HOST_WIDE_INT imm = 0;
6073 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6074 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6076 *modconst = GEN_INT (imm);
6080 return immtype;
6081 #undef CHECK
6084 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6085 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6086 float elements), and a modified constant (whatever should be output for a
6087 VMOV) in *MODCONST. */
6090 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6091 rtx *modconst, int *elementwidth)
6093 rtx tmpconst;
6094 int tmpwidth;
6095 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6097 if (retval == -1)
6098 return 0;
6100 if (modconst)
6101 *modconst = tmpconst;
6103 if (elementwidth)
6104 *elementwidth = tmpwidth;
6106 return 1;
6109 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6110 the immediate is valid, write a constant suitable for using as an operand
6111 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6112 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6115 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6116 rtx *modconst, int *elementwidth)
6118 rtx tmpconst;
6119 int tmpwidth;
6120 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6122 if (retval < 0 || retval > 5)
6123 return 0;
6125 if (modconst)
6126 *modconst = tmpconst;
6128 if (elementwidth)
6129 *elementwidth = tmpwidth;
6131 return 1;
6134 /* Return a string suitable for output of Neon immediate logic operation
6135 MNEM. */
6137 char *
6138 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6139 int inverse, int quad)
6141 int width, is_valid;
6142 static char templ[40];
6144 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6146 gcc_assert (is_valid != 0);
6148 if (quad)
6149 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6150 else
6151 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6153 return templ;
6156 /* Output a sequence of pairwise operations to implement a reduction.
6157 NOTE: We do "too much work" here, because pairwise operations work on two
6158 registers-worth of operands in one go. Unfortunately we can't exploit those
6159 extra calculations to do the full operation in fewer steps, I don't think.
6160 Although all vector elements of the result but the first are ignored, we
6161 actually calculate the same result in each of the elements. An alternative
6162 such as initially loading a vector with zero to use as each of the second
6163 operands would use up an additional register and take an extra instruction,
6164 for no particular gain. */
6166 void
6167 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6168 rtx (*reduc) (rtx, rtx, rtx))
6170 enum machine_mode inner = GET_MODE_INNER (mode);
6171 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6172 rtx tmpsum = op1;
6174 for (i = parts / 2; i >= 1; i /= 2)
6176 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6177 emit_insn (reduc (dest, tmpsum, tmpsum));
6178 tmpsum = dest;
6182 /* Initialize a vector with non-constant elements. FIXME: We can do better
6183 than the current implementation (building a vector on the stack and then
6184 loading it) in many cases. See rs6000.c. */
6186 void
6187 neon_expand_vector_init (rtx target, rtx vals)
6189 enum machine_mode mode = GET_MODE (target);
6190 enum machine_mode inner = GET_MODE_INNER (mode);
6191 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6192 rtx mem;
6194 gcc_assert (VECTOR_MODE_P (mode));
6196 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6197 for (i = 0; i < n_elts; i++)
6198 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6199 XVECEXP (vals, 0, i));
6201 emit_move_insn (target, mem);
6204 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6205 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6206 reported source locations are bogus. */
6208 static void
6209 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6210 const char *err)
6212 HOST_WIDE_INT lane;
6214 gcc_assert (GET_CODE (operand) == CONST_INT);
6216 lane = INTVAL (operand);
6218 if (lane < low || lane >= high)
6219 error (err);
6222 /* Bounds-check lanes. */
6224 void
6225 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6227 bounds_check (operand, low, high, "lane out of range");
6230 /* Bounds-check constants. */
6232 void
6233 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6235 bounds_check (operand, low, high, "constant out of range");
6238 HOST_WIDE_INT
6239 neon_element_bits (enum machine_mode mode)
6241 if (mode == DImode)
6242 return GET_MODE_BITSIZE (mode);
6243 else
6244 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6248 /* Predicates for `match_operand' and `match_operator'. */
6250 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6252 cirrus_memory_offset (rtx op)
6254 /* Reject eliminable registers. */
6255 if (! (reload_in_progress || reload_completed)
6256 && ( reg_mentioned_p (frame_pointer_rtx, op)
6257 || reg_mentioned_p (arg_pointer_rtx, op)
6258 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6259 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6260 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6261 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6262 return 0;
6264 if (GET_CODE (op) == MEM)
6266 rtx ind;
6268 ind = XEXP (op, 0);
6270 /* Match: (mem (reg)). */
6271 if (GET_CODE (ind) == REG)
6272 return 1;
6274 /* Match:
6275 (mem (plus (reg)
6276 (const))). */
6277 if (GET_CODE (ind) == PLUS
6278 && GET_CODE (XEXP (ind, 0)) == REG
6279 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6280 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6281 return 1;
6284 return 0;
6287 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6288 WB is true if full writeback address modes are allowed and is false
6289 if limited writeback address modes (POST_INC and PRE_DEC) are
6290 allowed. */
6293 arm_coproc_mem_operand (rtx op, bool wb)
6295 rtx ind;
6297 /* Reject eliminable registers. */
6298 if (! (reload_in_progress || reload_completed)
6299 && ( reg_mentioned_p (frame_pointer_rtx, op)
6300 || reg_mentioned_p (arg_pointer_rtx, op)
6301 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6302 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6303 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6304 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6305 return FALSE;
6307 /* Constants are converted into offsets from labels. */
6308 if (GET_CODE (op) != MEM)
6309 return FALSE;
6311 ind = XEXP (op, 0);
6313 if (reload_completed
6314 && (GET_CODE (ind) == LABEL_REF
6315 || (GET_CODE (ind) == CONST
6316 && GET_CODE (XEXP (ind, 0)) == PLUS
6317 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6318 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6319 return TRUE;
6321 /* Match: (mem (reg)). */
6322 if (GET_CODE (ind) == REG)
6323 return arm_address_register_rtx_p (ind, 0);
6325 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6326 acceptable in any case (subject to verification by
6327 arm_address_register_rtx_p). We need WB to be true to accept
6328 PRE_INC and POST_DEC. */
6329 if (GET_CODE (ind) == POST_INC
6330 || GET_CODE (ind) == PRE_DEC
6331 || (wb
6332 && (GET_CODE (ind) == PRE_INC
6333 || GET_CODE (ind) == POST_DEC)))
6334 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6336 if (wb
6337 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6338 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6339 && GET_CODE (XEXP (ind, 1)) == PLUS
6340 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6341 ind = XEXP (ind, 1);
6343 /* Match:
6344 (plus (reg)
6345 (const)). */
6346 if (GET_CODE (ind) == PLUS
6347 && GET_CODE (XEXP (ind, 0)) == REG
6348 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6349 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6350 && INTVAL (XEXP (ind, 1)) > -1024
6351 && INTVAL (XEXP (ind, 1)) < 1024
6352 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6353 return TRUE;
6355 return FALSE;
6358 /* Return TRUE if OP is a memory operand which we can load or store a vector
6359 to/from. If CORE is true, we're moving from ARM registers not Neon
6360 registers. */
6362 neon_vector_mem_operand (rtx op, bool core)
6364 rtx ind;
6366 /* Reject eliminable registers. */
6367 if (! (reload_in_progress || reload_completed)
6368 && ( reg_mentioned_p (frame_pointer_rtx, op)
6369 || reg_mentioned_p (arg_pointer_rtx, op)
6370 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6371 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6372 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6373 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6374 return FALSE;
6376 /* Constants are converted into offsets from labels. */
6377 if (GET_CODE (op) != MEM)
6378 return FALSE;
6380 ind = XEXP (op, 0);
6382 if (reload_completed
6383 && (GET_CODE (ind) == LABEL_REF
6384 || (GET_CODE (ind) == CONST
6385 && GET_CODE (XEXP (ind, 0)) == PLUS
6386 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6387 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6388 return TRUE;
6390 /* Match: (mem (reg)). */
6391 if (GET_CODE (ind) == REG)
6392 return arm_address_register_rtx_p (ind, 0);
6394 /* Allow post-increment with Neon registers. */
6395 if (!core && GET_CODE (ind) == POST_INC)
6396 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6398 #if 0
6399 /* FIXME: We can support this too if we use VLD1/VST1. */
6400 if (!core
6401 && GET_CODE (ind) == POST_MODIFY
6402 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6403 && GET_CODE (XEXP (ind, 1)) == PLUS
6404 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6405 ind = XEXP (ind, 1);
6406 #endif
6408 /* Match:
6409 (plus (reg)
6410 (const)). */
6411 if (!core
6412 && GET_CODE (ind) == PLUS
6413 && GET_CODE (XEXP (ind, 0)) == REG
6414 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6415 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6416 && INTVAL (XEXP (ind, 1)) > -1024
6417 && INTVAL (XEXP (ind, 1)) < 1016
6418 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6419 return TRUE;
6421 return FALSE;
6424 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6425 type. */
6427 neon_struct_mem_operand (rtx op)
6429 rtx ind;
6431 /* Reject eliminable registers. */
6432 if (! (reload_in_progress || reload_completed)
6433 && ( reg_mentioned_p (frame_pointer_rtx, op)
6434 || reg_mentioned_p (arg_pointer_rtx, op)
6435 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6436 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6437 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6438 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6439 return FALSE;
6441 /* Constants are converted into offsets from labels. */
6442 if (GET_CODE (op) != MEM)
6443 return FALSE;
6445 ind = XEXP (op, 0);
6447 if (reload_completed
6448 && (GET_CODE (ind) == LABEL_REF
6449 || (GET_CODE (ind) == CONST
6450 && GET_CODE (XEXP (ind, 0)) == PLUS
6451 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6452 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6453 return TRUE;
6455 /* Match: (mem (reg)). */
6456 if (GET_CODE (ind) == REG)
6457 return arm_address_register_rtx_p (ind, 0);
6459 return FALSE;
6462 /* Return true if X is a register that will be eliminated later on. */
6464 arm_eliminable_register (rtx x)
6466 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
6467 || REGNO (x) == ARG_POINTER_REGNUM
6468 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
6469 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
6472 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6473 coprocessor registers. Otherwise return NO_REGS. */
6475 enum reg_class
6476 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
6478 if (TARGET_NEON
6479 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6480 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6481 && neon_vector_mem_operand (x, FALSE))
6482 return NO_REGS;
6484 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
6485 return NO_REGS;
6487 return GENERAL_REGS;
6490 /* Values which must be returned in the most-significant end of the return
6491 register. */
6493 static bool
6494 arm_return_in_msb (const_tree valtype)
6496 return (TARGET_AAPCS_BASED
6497 && BYTES_BIG_ENDIAN
6498 && (AGGREGATE_TYPE_P (valtype)
6499 || TREE_CODE (valtype) == COMPLEX_TYPE));
6502 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6503 Use by the Cirrus Maverick code which has to workaround
6504 a hardware bug triggered by such instructions. */
6505 static bool
6506 arm_memory_load_p (rtx insn)
6508 rtx body, lhs, rhs;;
6510 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
6511 return false;
6513 body = PATTERN (insn);
6515 if (GET_CODE (body) != SET)
6516 return false;
6518 lhs = XEXP (body, 0);
6519 rhs = XEXP (body, 1);
6521 lhs = REG_OR_SUBREG_RTX (lhs);
6523 /* If the destination is not a general purpose
6524 register we do not have to worry. */
6525 if (GET_CODE (lhs) != REG
6526 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
6527 return false;
6529 /* As well as loads from memory we also have to react
6530 to loads of invalid constants which will be turned
6531 into loads from the minipool. */
6532 return (GET_CODE (rhs) == MEM
6533 || GET_CODE (rhs) == SYMBOL_REF
6534 || note_invalid_constants (insn, -1, false));
6537 /* Return TRUE if INSN is a Cirrus instruction. */
6538 static bool
6539 arm_cirrus_insn_p (rtx insn)
6541 enum attr_cirrus attr;
6543 /* get_attr cannot accept USE or CLOBBER. */
6544 if (!insn
6545 || GET_CODE (insn) != INSN
6546 || GET_CODE (PATTERN (insn)) == USE
6547 || GET_CODE (PATTERN (insn)) == CLOBBER)
6548 return 0;
6550 attr = get_attr_cirrus (insn);
6552 return attr != CIRRUS_NOT;
6555 /* Cirrus reorg for invalid instruction combinations. */
6556 static void
6557 cirrus_reorg (rtx first)
6559 enum attr_cirrus attr;
6560 rtx body = PATTERN (first);
6561 rtx t;
6562 int nops;
6564 /* Any branch must be followed by 2 non Cirrus instructions. */
6565 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
6567 nops = 0;
6568 t = next_nonnote_insn (first);
6570 if (arm_cirrus_insn_p (t))
6571 ++ nops;
6573 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6574 ++ nops;
6576 while (nops --)
6577 emit_insn_after (gen_nop (), first);
6579 return;
6582 /* (float (blah)) is in parallel with a clobber. */
6583 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
6584 body = XVECEXP (body, 0, 0);
6586 if (GET_CODE (body) == SET)
6588 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
6590 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6591 be followed by a non Cirrus insn. */
6592 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
6594 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
6595 emit_insn_after (gen_nop (), first);
6597 return;
6599 else if (arm_memory_load_p (first))
6601 unsigned int arm_regno;
6603 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6604 ldr/cfmv64hr combination where the Rd field is the same
6605 in both instructions must be split with a non Cirrus
6606 insn. Example:
6608 ldr r0, blah
6610 cfmvsr mvf0, r0. */
6612 /* Get Arm register number for ldr insn. */
6613 if (GET_CODE (lhs) == REG)
6614 arm_regno = REGNO (lhs);
6615 else
6617 gcc_assert (GET_CODE (rhs) == REG);
6618 arm_regno = REGNO (rhs);
6621 /* Next insn. */
6622 first = next_nonnote_insn (first);
6624 if (! arm_cirrus_insn_p (first))
6625 return;
6627 body = PATTERN (first);
6629 /* (float (blah)) is in parallel with a clobber. */
6630 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
6631 body = XVECEXP (body, 0, 0);
6633 if (GET_CODE (body) == FLOAT)
6634 body = XEXP (body, 0);
6636 if (get_attr_cirrus (first) == CIRRUS_MOVE
6637 && GET_CODE (XEXP (body, 1)) == REG
6638 && arm_regno == REGNO (XEXP (body, 1)))
6639 emit_insn_after (gen_nop (), first);
6641 return;
6645 /* get_attr cannot accept USE or CLOBBER. */
6646 if (!first
6647 || GET_CODE (first) != INSN
6648 || GET_CODE (PATTERN (first)) == USE
6649 || GET_CODE (PATTERN (first)) == CLOBBER)
6650 return;
6652 attr = get_attr_cirrus (first);
6654 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6655 must be followed by a non-coprocessor instruction. */
6656 if (attr == CIRRUS_COMPARE)
6658 nops = 0;
6660 t = next_nonnote_insn (first);
6662 if (arm_cirrus_insn_p (t))
6663 ++ nops;
6665 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6666 ++ nops;
6668 while (nops --)
6669 emit_insn_after (gen_nop (), first);
6671 return;
6675 /* Return TRUE if X references a SYMBOL_REF. */
6677 symbol_mentioned_p (rtx x)
6679 const char * fmt;
6680 int i;
6682 if (GET_CODE (x) == SYMBOL_REF)
6683 return 1;
6685 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6686 are constant offsets, not symbols. */
6687 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6688 return 0;
6690 fmt = GET_RTX_FORMAT (GET_CODE (x));
6692 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6694 if (fmt[i] == 'E')
6696 int j;
6698 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6699 if (symbol_mentioned_p (XVECEXP (x, i, j)))
6700 return 1;
6702 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
6703 return 1;
6706 return 0;
6709 /* Return TRUE if X references a LABEL_REF. */
6711 label_mentioned_p (rtx x)
6713 const char * fmt;
6714 int i;
6716 if (GET_CODE (x) == LABEL_REF)
6717 return 1;
6719 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6720 instruction, but they are constant offsets, not symbols. */
6721 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6722 return 0;
6724 fmt = GET_RTX_FORMAT (GET_CODE (x));
6725 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6727 if (fmt[i] == 'E')
6729 int j;
6731 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6732 if (label_mentioned_p (XVECEXP (x, i, j)))
6733 return 1;
6735 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
6736 return 1;
6739 return 0;
6743 tls_mentioned_p (rtx x)
6745 switch (GET_CODE (x))
6747 case CONST:
6748 return tls_mentioned_p (XEXP (x, 0));
6750 case UNSPEC:
6751 if (XINT (x, 1) == UNSPEC_TLS)
6752 return 1;
6754 default:
6755 return 0;
6759 /* Must not copy a SET whose source operand is PC-relative. */
6761 static bool
6762 arm_cannot_copy_insn_p (rtx insn)
6764 rtx pat = PATTERN (insn);
6766 if (GET_CODE (pat) == SET)
6768 rtx rhs = SET_SRC (pat);
6770 if (GET_CODE (rhs) == UNSPEC
6771 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
6772 return TRUE;
6774 if (GET_CODE (rhs) == MEM
6775 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
6776 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
6777 return TRUE;
6780 return FALSE;
6783 enum rtx_code
6784 minmax_code (rtx x)
6786 enum rtx_code code = GET_CODE (x);
6788 switch (code)
6790 case SMAX:
6791 return GE;
6792 case SMIN:
6793 return LE;
6794 case UMIN:
6795 return LEU;
6796 case UMAX:
6797 return GEU;
6798 default:
6799 gcc_unreachable ();
6803 /* Return 1 if memory locations are adjacent. */
6805 adjacent_mem_locations (rtx a, rtx b)
6807 /* We don't guarantee to preserve the order of these memory refs. */
6808 if (volatile_refs_p (a) || volatile_refs_p (b))
6809 return 0;
6811 if ((GET_CODE (XEXP (a, 0)) == REG
6812 || (GET_CODE (XEXP (a, 0)) == PLUS
6813 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
6814 && (GET_CODE (XEXP (b, 0)) == REG
6815 || (GET_CODE (XEXP (b, 0)) == PLUS
6816 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
6818 HOST_WIDE_INT val0 = 0, val1 = 0;
6819 rtx reg0, reg1;
6820 int val_diff;
6822 if (GET_CODE (XEXP (a, 0)) == PLUS)
6824 reg0 = XEXP (XEXP (a, 0), 0);
6825 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
6827 else
6828 reg0 = XEXP (a, 0);
6830 if (GET_CODE (XEXP (b, 0)) == PLUS)
6832 reg1 = XEXP (XEXP (b, 0), 0);
6833 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
6835 else
6836 reg1 = XEXP (b, 0);
6838 /* Don't accept any offset that will require multiple
6839 instructions to handle, since this would cause the
6840 arith_adjacentmem pattern to output an overlong sequence. */
6841 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
6842 return 0;
6844 /* Don't allow an eliminable register: register elimination can make
6845 the offset too large. */
6846 if (arm_eliminable_register (reg0))
6847 return 0;
6849 val_diff = val1 - val0;
6851 if (arm_ld_sched)
6853 /* If the target has load delay slots, then there's no benefit
6854 to using an ldm instruction unless the offset is zero and
6855 we are optimizing for size. */
6856 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
6857 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
6858 && (val_diff == 4 || val_diff == -4));
6861 return ((REGNO (reg0) == REGNO (reg1))
6862 && (val_diff == 4 || val_diff == -4));
6865 return 0;
6869 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6870 HOST_WIDE_INT *load_offset)
6872 int unsorted_regs[4];
6873 HOST_WIDE_INT unsorted_offsets[4];
6874 int order[4];
6875 int base_reg = -1;
6876 int i;
6878 /* Can only handle 2, 3, or 4 insns at present,
6879 though could be easily extended if required. */
6880 gcc_assert (nops >= 2 && nops <= 4);
6882 /* Loop over the operands and check that the memory references are
6883 suitable (i.e. immediate offsets from the same base register). At
6884 the same time, extract the target register, and the memory
6885 offsets. */
6886 for (i = 0; i < nops; i++)
6888 rtx reg;
6889 rtx offset;
6891 /* Convert a subreg of a mem into the mem itself. */
6892 if (GET_CODE (operands[nops + i]) == SUBREG)
6893 operands[nops + i] = alter_subreg (operands + (nops + i));
6895 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6897 /* Don't reorder volatile memory references; it doesn't seem worth
6898 looking for the case where the order is ok anyway. */
6899 if (MEM_VOLATILE_P (operands[nops + i]))
6900 return 0;
6902 offset = const0_rtx;
6904 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6905 || (GET_CODE (reg) == SUBREG
6906 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6907 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6908 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6909 == REG)
6910 || (GET_CODE (reg) == SUBREG
6911 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6912 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6913 == CONST_INT)))
6915 if (i == 0)
6917 base_reg = REGNO (reg);
6918 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6919 ? REGNO (operands[i])
6920 : REGNO (SUBREG_REG (operands[i])));
6921 order[0] = 0;
6923 else
6925 if (base_reg != (int) REGNO (reg))
6926 /* Not addressed from the same base register. */
6927 return 0;
6929 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6930 ? REGNO (operands[i])
6931 : REGNO (SUBREG_REG (operands[i])));
6932 if (unsorted_regs[i] < unsorted_regs[order[0]])
6933 order[0] = i;
6936 /* If it isn't an integer register, or if it overwrites the
6937 base register but isn't the last insn in the list, then
6938 we can't do this. */
6939 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
6940 || (i != nops - 1 && unsorted_regs[i] == base_reg))
6941 return 0;
6943 unsorted_offsets[i] = INTVAL (offset);
6945 else
6946 /* Not a suitable memory address. */
6947 return 0;
6950 /* All the useful information has now been extracted from the
6951 operands into unsorted_regs and unsorted_offsets; additionally,
6952 order[0] has been set to the lowest numbered register in the
6953 list. Sort the registers into order, and check that the memory
6954 offsets are ascending and adjacent. */
6956 for (i = 1; i < nops; i++)
6958 int j;
6960 order[i] = order[i - 1];
6961 for (j = 0; j < nops; j++)
6962 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6963 && (order[i] == order[i - 1]
6964 || unsorted_regs[j] < unsorted_regs[order[i]]))
6965 order[i] = j;
6967 /* Have we found a suitable register? if not, one must be used more
6968 than once. */
6969 if (order[i] == order[i - 1])
6970 return 0;
6972 /* Is the memory address adjacent and ascending? */
6973 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
6974 return 0;
6977 if (base)
6979 *base = base_reg;
6981 for (i = 0; i < nops; i++)
6982 regs[i] = unsorted_regs[order[i]];
6984 *load_offset = unsorted_offsets[order[0]];
6987 if (unsorted_offsets[order[0]] == 0)
6988 return 1; /* ldmia */
6990 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
6991 return 2; /* ldmib */
6993 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
6994 return 3; /* ldmda */
6996 if (unsorted_offsets[order[nops - 1]] == -4)
6997 return 4; /* ldmdb */
6999 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
7000 if the offset isn't small enough. The reason 2 ldrs are faster
7001 is because these ARMs are able to do more than one cache access
7002 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7003 whilst the ARM8 has a double bandwidth cache. This means that
7004 these cores can do both an instruction fetch and a data fetch in
7005 a single cycle, so the trick of calculating the address into a
7006 scratch register (one of the result regs) and then doing a load
7007 multiple actually becomes slower (and no smaller in code size).
7008 That is the transformation
7010 ldr rd1, [rbase + offset]
7011 ldr rd2, [rbase + offset + 4]
7015 add rd1, rbase, offset
7016 ldmia rd1, {rd1, rd2}
7018 produces worse code -- '3 cycles + any stalls on rd2' instead of
7019 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7020 access per cycle, the first sequence could never complete in less
7021 than 6 cycles, whereas the ldm sequence would only take 5 and
7022 would make better use of sequential accesses if not hitting the
7023 cache.
7025 We cheat here and test 'arm_ld_sched' which we currently know to
7026 only be true for the ARM8, ARM9 and StrongARM. If this ever
7027 changes, then the test below needs to be reworked. */
7028 if (nops == 2 && arm_ld_sched)
7029 return 0;
7031 /* Can't do it without setting up the offset, only do this if it takes
7032 no more than one insn. */
7033 return (const_ok_for_arm (unsorted_offsets[order[0]])
7034 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7037 const char *
7038 emit_ldm_seq (rtx *operands, int nops)
7040 int regs[4];
7041 int base_reg;
7042 HOST_WIDE_INT offset;
7043 char buf[100];
7044 int i;
7046 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7048 case 1:
7049 strcpy (buf, "ldm%(ia%)\t");
7050 break;
7052 case 2:
7053 strcpy (buf, "ldm%(ib%)\t");
7054 break;
7056 case 3:
7057 strcpy (buf, "ldm%(da%)\t");
7058 break;
7060 case 4:
7061 strcpy (buf, "ldm%(db%)\t");
7062 break;
7064 case 5:
7065 if (offset >= 0)
7066 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7067 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7068 (long) offset);
7069 else
7070 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7071 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7072 (long) -offset);
7073 output_asm_insn (buf, operands);
7074 base_reg = regs[0];
7075 strcpy (buf, "ldm%(ia%)\t");
7076 break;
7078 default:
7079 gcc_unreachable ();
7082 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7083 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7085 for (i = 1; i < nops; i++)
7086 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7087 reg_names[regs[i]]);
7089 strcat (buf, "}\t%@ phole ldm");
7091 output_asm_insn (buf, operands);
7092 return "";
7096 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7097 HOST_WIDE_INT * load_offset)
7099 int unsorted_regs[4];
7100 HOST_WIDE_INT unsorted_offsets[4];
7101 int order[4];
7102 int base_reg = -1;
7103 int i;
7105 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7106 extended if required. */
7107 gcc_assert (nops >= 2 && nops <= 4);
7109 /* Loop over the operands and check that the memory references are
7110 suitable (i.e. immediate offsets from the same base register). At
7111 the same time, extract the target register, and the memory
7112 offsets. */
7113 for (i = 0; i < nops; i++)
7115 rtx reg;
7116 rtx offset;
7118 /* Convert a subreg of a mem into the mem itself. */
7119 if (GET_CODE (operands[nops + i]) == SUBREG)
7120 operands[nops + i] = alter_subreg (operands + (nops + i));
7122 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7124 /* Don't reorder volatile memory references; it doesn't seem worth
7125 looking for the case where the order is ok anyway. */
7126 if (MEM_VOLATILE_P (operands[nops + i]))
7127 return 0;
7129 offset = const0_rtx;
7131 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7132 || (GET_CODE (reg) == SUBREG
7133 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7134 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7135 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7136 == REG)
7137 || (GET_CODE (reg) == SUBREG
7138 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7139 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7140 == CONST_INT)))
7142 if (i == 0)
7144 base_reg = REGNO (reg);
7145 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7146 ? REGNO (operands[i])
7147 : REGNO (SUBREG_REG (operands[i])));
7148 order[0] = 0;
7150 else
7152 if (base_reg != (int) REGNO (reg))
7153 /* Not addressed from the same base register. */
7154 return 0;
7156 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7157 ? REGNO (operands[i])
7158 : REGNO (SUBREG_REG (operands[i])));
7159 if (unsorted_regs[i] < unsorted_regs[order[0]])
7160 order[0] = i;
7163 /* If it isn't an integer register, then we can't do this. */
7164 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7165 return 0;
7167 unsorted_offsets[i] = INTVAL (offset);
7169 else
7170 /* Not a suitable memory address. */
7171 return 0;
7174 /* All the useful information has now been extracted from the
7175 operands into unsorted_regs and unsorted_offsets; additionally,
7176 order[0] has been set to the lowest numbered register in the
7177 list. Sort the registers into order, and check that the memory
7178 offsets are ascending and adjacent. */
7180 for (i = 1; i < nops; i++)
7182 int j;
7184 order[i] = order[i - 1];
7185 for (j = 0; j < nops; j++)
7186 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7187 && (order[i] == order[i - 1]
7188 || unsorted_regs[j] < unsorted_regs[order[i]]))
7189 order[i] = j;
7191 /* Have we found a suitable register? if not, one must be used more
7192 than once. */
7193 if (order[i] == order[i - 1])
7194 return 0;
7196 /* Is the memory address adjacent and ascending? */
7197 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7198 return 0;
7201 if (base)
7203 *base = base_reg;
7205 for (i = 0; i < nops; i++)
7206 regs[i] = unsorted_regs[order[i]];
7208 *load_offset = unsorted_offsets[order[0]];
7211 if (unsorted_offsets[order[0]] == 0)
7212 return 1; /* stmia */
7214 if (unsorted_offsets[order[0]] == 4)
7215 return 2; /* stmib */
7217 if (unsorted_offsets[order[nops - 1]] == 0)
7218 return 3; /* stmda */
7220 if (unsorted_offsets[order[nops - 1]] == -4)
7221 return 4; /* stmdb */
7223 return 0;
7226 const char *
7227 emit_stm_seq (rtx *operands, int nops)
7229 int regs[4];
7230 int base_reg;
7231 HOST_WIDE_INT offset;
7232 char buf[100];
7233 int i;
7235 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7237 case 1:
7238 strcpy (buf, "stm%(ia%)\t");
7239 break;
7241 case 2:
7242 strcpy (buf, "stm%(ib%)\t");
7243 break;
7245 case 3:
7246 strcpy (buf, "stm%(da%)\t");
7247 break;
7249 case 4:
7250 strcpy (buf, "stm%(db%)\t");
7251 break;
7253 default:
7254 gcc_unreachable ();
7257 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7258 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7260 for (i = 1; i < nops; i++)
7261 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7262 reg_names[regs[i]]);
7264 strcat (buf, "}\t%@ phole stm");
7266 output_asm_insn (buf, operands);
7267 return "";
7270 /* Routines for use in generating RTL. */
7273 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7274 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7276 HOST_WIDE_INT offset = *offsetp;
7277 int i = 0, j;
7278 rtx result;
7279 int sign = up ? 1 : -1;
7280 rtx mem, addr;
7282 /* XScale has load-store double instructions, but they have stricter
7283 alignment requirements than load-store multiple, so we cannot
7284 use them.
7286 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7287 the pipeline until completion.
7289 NREGS CYCLES
7295 An ldr instruction takes 1-3 cycles, but does not block the
7296 pipeline.
7298 NREGS CYCLES
7299 1 1-3
7300 2 2-6
7301 3 3-9
7302 4 4-12
7304 Best case ldr will always win. However, the more ldr instructions
7305 we issue, the less likely we are to be able to schedule them well.
7306 Using ldr instructions also increases code size.
7308 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7309 for counts of 3 or 4 regs. */
7310 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7312 rtx seq;
7314 start_sequence ();
7316 for (i = 0; i < count; i++)
7318 addr = plus_constant (from, i * 4 * sign);
7319 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7320 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7321 offset += 4 * sign;
7324 if (write_back)
7326 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7327 *offsetp = offset;
7330 seq = get_insns ();
7331 end_sequence ();
7333 return seq;
7336 result = gen_rtx_PARALLEL (VOIDmode,
7337 rtvec_alloc (count + (write_back ? 1 : 0)));
7338 if (write_back)
7340 XVECEXP (result, 0, 0)
7341 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7342 i = 1;
7343 count++;
7346 for (j = 0; i < count; i++, j++)
7348 addr = plus_constant (from, j * 4 * sign);
7349 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7350 XVECEXP (result, 0, i)
7351 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7352 offset += 4 * sign;
7355 if (write_back)
7356 *offsetp = offset;
7358 return result;
7362 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7363 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7365 HOST_WIDE_INT offset = *offsetp;
7366 int i = 0, j;
7367 rtx result;
7368 int sign = up ? 1 : -1;
7369 rtx mem, addr;
7371 /* See arm_gen_load_multiple for discussion of
7372 the pros/cons of ldm/stm usage for XScale. */
7373 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7375 rtx seq;
7377 start_sequence ();
7379 for (i = 0; i < count; i++)
7381 addr = plus_constant (to, i * 4 * sign);
7382 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7383 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7384 offset += 4 * sign;
7387 if (write_back)
7389 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7390 *offsetp = offset;
7393 seq = get_insns ();
7394 end_sequence ();
7396 return seq;
7399 result = gen_rtx_PARALLEL (VOIDmode,
7400 rtvec_alloc (count + (write_back ? 1 : 0)));
7401 if (write_back)
7403 XVECEXP (result, 0, 0)
7404 = gen_rtx_SET (VOIDmode, to,
7405 plus_constant (to, count * 4 * sign));
7406 i = 1;
7407 count++;
7410 for (j = 0; i < count; i++, j++)
7412 addr = plus_constant (to, j * 4 * sign);
7413 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7414 XVECEXP (result, 0, i)
7415 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7416 offset += 4 * sign;
7419 if (write_back)
7420 *offsetp = offset;
7422 return result;
7426 arm_gen_movmemqi (rtx *operands)
7428 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7429 HOST_WIDE_INT srcoffset, dstoffset;
7430 int i;
7431 rtx src, dst, srcbase, dstbase;
7432 rtx part_bytes_reg = NULL;
7433 rtx mem;
7435 if (GET_CODE (operands[2]) != CONST_INT
7436 || GET_CODE (operands[3]) != CONST_INT
7437 || INTVAL (operands[2]) > 64
7438 || INTVAL (operands[3]) & 3)
7439 return 0;
7441 dstbase = operands[0];
7442 srcbase = operands[1];
7444 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
7445 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
7447 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
7448 out_words_to_go = INTVAL (operands[2]) / 4;
7449 last_bytes = INTVAL (operands[2]) & 3;
7450 dstoffset = srcoffset = 0;
7452 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
7453 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
7455 for (i = 0; in_words_to_go >= 2; i+=4)
7457 if (in_words_to_go > 4)
7458 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
7459 srcbase, &srcoffset));
7460 else
7461 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
7462 FALSE, srcbase, &srcoffset));
7464 if (out_words_to_go)
7466 if (out_words_to_go > 4)
7467 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
7468 dstbase, &dstoffset));
7469 else if (out_words_to_go != 1)
7470 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
7471 dst, TRUE,
7472 (last_bytes == 0
7473 ? FALSE : TRUE),
7474 dstbase, &dstoffset));
7475 else
7477 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7478 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
7479 if (last_bytes != 0)
7481 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
7482 dstoffset += 4;
7487 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
7488 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
7491 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7492 if (out_words_to_go)
7494 rtx sreg;
7496 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7497 sreg = copy_to_reg (mem);
7499 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7500 emit_move_insn (mem, sreg);
7501 in_words_to_go--;
7503 gcc_assert (!in_words_to_go); /* Sanity check */
7506 if (in_words_to_go)
7508 gcc_assert (in_words_to_go > 0);
7510 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7511 part_bytes_reg = copy_to_mode_reg (SImode, mem);
7514 gcc_assert (!last_bytes || part_bytes_reg);
7516 if (BYTES_BIG_ENDIAN && last_bytes)
7518 rtx tmp = gen_reg_rtx (SImode);
7520 /* The bytes we want are in the top end of the word. */
7521 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
7522 GEN_INT (8 * (4 - last_bytes))));
7523 part_bytes_reg = tmp;
7525 while (last_bytes)
7527 mem = adjust_automodify_address (dstbase, QImode,
7528 plus_constant (dst, last_bytes - 1),
7529 dstoffset + last_bytes - 1);
7530 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7532 if (--last_bytes)
7534 tmp = gen_reg_rtx (SImode);
7535 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
7536 part_bytes_reg = tmp;
7541 else
7543 if (last_bytes > 1)
7545 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
7546 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
7547 last_bytes -= 2;
7548 if (last_bytes)
7550 rtx tmp = gen_reg_rtx (SImode);
7551 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
7552 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
7553 part_bytes_reg = tmp;
7554 dstoffset += 2;
7558 if (last_bytes)
7560 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
7561 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7565 return 1;
7568 /* Select a dominance comparison mode if possible for a test of the general
7569 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7570 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7571 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7572 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7573 In all cases OP will be either EQ or NE, but we don't need to know which
7574 here. If we are unable to support a dominance comparison we return
7575 CC mode. This will then fail to match for the RTL expressions that
7576 generate this call. */
7577 enum machine_mode
7578 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
7580 enum rtx_code cond1, cond2;
7581 int swapped = 0;
7583 /* Currently we will probably get the wrong result if the individual
7584 comparisons are not simple. This also ensures that it is safe to
7585 reverse a comparison if necessary. */
7586 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
7587 != CCmode)
7588 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
7589 != CCmode))
7590 return CCmode;
7592 /* The if_then_else variant of this tests the second condition if the
7593 first passes, but is true if the first fails. Reverse the first
7594 condition to get a true "inclusive-or" expression. */
7595 if (cond_or == DOM_CC_NX_OR_Y)
7596 cond1 = reverse_condition (cond1);
7598 /* If the comparisons are not equal, and one doesn't dominate the other,
7599 then we can't do this. */
7600 if (cond1 != cond2
7601 && !comparison_dominates_p (cond1, cond2)
7602 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
7603 return CCmode;
7605 if (swapped)
7607 enum rtx_code temp = cond1;
7608 cond1 = cond2;
7609 cond2 = temp;
7612 switch (cond1)
7614 case EQ:
7615 if (cond_or == DOM_CC_X_AND_Y)
7616 return CC_DEQmode;
7618 switch (cond2)
7620 case EQ: return CC_DEQmode;
7621 case LE: return CC_DLEmode;
7622 case LEU: return CC_DLEUmode;
7623 case GE: return CC_DGEmode;
7624 case GEU: return CC_DGEUmode;
7625 default: gcc_unreachable ();
7628 case LT:
7629 if (cond_or == DOM_CC_X_AND_Y)
7630 return CC_DLTmode;
7632 switch (cond2)
7634 case LT:
7635 return CC_DLTmode;
7636 case LE:
7637 return CC_DLEmode;
7638 case NE:
7639 return CC_DNEmode;
7640 default:
7641 gcc_unreachable ();
7644 case GT:
7645 if (cond_or == DOM_CC_X_AND_Y)
7646 return CC_DGTmode;
7648 switch (cond2)
7650 case GT:
7651 return CC_DGTmode;
7652 case GE:
7653 return CC_DGEmode;
7654 case NE:
7655 return CC_DNEmode;
7656 default:
7657 gcc_unreachable ();
7660 case LTU:
7661 if (cond_or == DOM_CC_X_AND_Y)
7662 return CC_DLTUmode;
7664 switch (cond2)
7666 case LTU:
7667 return CC_DLTUmode;
7668 case LEU:
7669 return CC_DLEUmode;
7670 case NE:
7671 return CC_DNEmode;
7672 default:
7673 gcc_unreachable ();
7676 case GTU:
7677 if (cond_or == DOM_CC_X_AND_Y)
7678 return CC_DGTUmode;
7680 switch (cond2)
7682 case GTU:
7683 return CC_DGTUmode;
7684 case GEU:
7685 return CC_DGEUmode;
7686 case NE:
7687 return CC_DNEmode;
7688 default:
7689 gcc_unreachable ();
7692 /* The remaining cases only occur when both comparisons are the
7693 same. */
7694 case NE:
7695 gcc_assert (cond1 == cond2);
7696 return CC_DNEmode;
7698 case LE:
7699 gcc_assert (cond1 == cond2);
7700 return CC_DLEmode;
7702 case GE:
7703 gcc_assert (cond1 == cond2);
7704 return CC_DGEmode;
7706 case LEU:
7707 gcc_assert (cond1 == cond2);
7708 return CC_DLEUmode;
7710 case GEU:
7711 gcc_assert (cond1 == cond2);
7712 return CC_DGEUmode;
7714 default:
7715 gcc_unreachable ();
7719 enum machine_mode
7720 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
7722 /* All floating point compares return CCFP if it is an equality
7723 comparison, and CCFPE otherwise. */
7724 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
7726 switch (op)
7728 case EQ:
7729 case NE:
7730 case UNORDERED:
7731 case ORDERED:
7732 case UNLT:
7733 case UNLE:
7734 case UNGT:
7735 case UNGE:
7736 case UNEQ:
7737 case LTGT:
7738 return CCFPmode;
7740 case LT:
7741 case LE:
7742 case GT:
7743 case GE:
7744 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
7745 return CCFPmode;
7746 return CCFPEmode;
7748 default:
7749 gcc_unreachable ();
7753 /* A compare with a shifted operand. Because of canonicalization, the
7754 comparison will have to be swapped when we emit the assembler. */
7755 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
7756 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7757 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
7758 || GET_CODE (x) == ROTATERT))
7759 return CC_SWPmode;
7761 /* This operation is performed swapped, but since we only rely on the Z
7762 flag we don't need an additional mode. */
7763 if (GET_MODE (y) == SImode && REG_P (y)
7764 && GET_CODE (x) == NEG
7765 && (op == EQ || op == NE))
7766 return CC_Zmode;
7768 /* This is a special case that is used by combine to allow a
7769 comparison of a shifted byte load to be split into a zero-extend
7770 followed by a comparison of the shifted integer (only valid for
7771 equalities and unsigned inequalities). */
7772 if (GET_MODE (x) == SImode
7773 && GET_CODE (x) == ASHIFT
7774 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
7775 && GET_CODE (XEXP (x, 0)) == SUBREG
7776 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
7777 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
7778 && (op == EQ || op == NE
7779 || op == GEU || op == GTU || op == LTU || op == LEU)
7780 && GET_CODE (y) == CONST_INT)
7781 return CC_Zmode;
7783 /* A construct for a conditional compare, if the false arm contains
7784 0, then both conditions must be true, otherwise either condition
7785 must be true. Not all conditions are possible, so CCmode is
7786 returned if it can't be done. */
7787 if (GET_CODE (x) == IF_THEN_ELSE
7788 && (XEXP (x, 2) == const0_rtx
7789 || XEXP (x, 2) == const1_rtx)
7790 && COMPARISON_P (XEXP (x, 0))
7791 && COMPARISON_P (XEXP (x, 1)))
7792 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7793 INTVAL (XEXP (x, 2)));
7795 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7796 if (GET_CODE (x) == AND
7797 && COMPARISON_P (XEXP (x, 0))
7798 && COMPARISON_P (XEXP (x, 1)))
7799 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7800 DOM_CC_X_AND_Y);
7802 if (GET_CODE (x) == IOR
7803 && COMPARISON_P (XEXP (x, 0))
7804 && COMPARISON_P (XEXP (x, 1)))
7805 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7806 DOM_CC_X_OR_Y);
7808 /* An operation (on Thumb) where we want to test for a single bit.
7809 This is done by shifting that bit up into the top bit of a
7810 scratch register; we can then branch on the sign bit. */
7811 if (TARGET_THUMB1
7812 && GET_MODE (x) == SImode
7813 && (op == EQ || op == NE)
7814 && GET_CODE (x) == ZERO_EXTRACT
7815 && XEXP (x, 1) == const1_rtx)
7816 return CC_Nmode;
7818 /* An operation that sets the condition codes as a side-effect, the
7819 V flag is not set correctly, so we can only use comparisons where
7820 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7821 instead.) */
7822 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7823 if (GET_MODE (x) == SImode
7824 && y == const0_rtx
7825 && (op == EQ || op == NE || op == LT || op == GE)
7826 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
7827 || GET_CODE (x) == AND || GET_CODE (x) == IOR
7828 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
7829 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
7830 || GET_CODE (x) == LSHIFTRT
7831 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7832 || GET_CODE (x) == ROTATERT
7833 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
7834 return CC_NOOVmode;
7836 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
7837 return CC_Zmode;
7839 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
7840 && GET_CODE (x) == PLUS
7841 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
7842 return CC_Cmode;
7844 return CCmode;
7847 /* X and Y are two things to compare using CODE. Emit the compare insn and
7848 return the rtx for register 0 in the proper mode. FP means this is a
7849 floating point compare: I don't think that it is needed on the arm. */
7851 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
7853 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
7854 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
7856 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
7858 return cc_reg;
7861 /* Generate a sequence of insns that will generate the correct return
7862 address mask depending on the physical architecture that the program
7863 is running on. */
7865 arm_gen_return_addr_mask (void)
7867 rtx reg = gen_reg_rtx (Pmode);
7869 emit_insn (gen_return_addr_mask (reg));
7870 return reg;
7873 void
7874 arm_reload_in_hi (rtx *operands)
7876 rtx ref = operands[1];
7877 rtx base, scratch;
7878 HOST_WIDE_INT offset = 0;
7880 if (GET_CODE (ref) == SUBREG)
7882 offset = SUBREG_BYTE (ref);
7883 ref = SUBREG_REG (ref);
7886 if (GET_CODE (ref) == REG)
7888 /* We have a pseudo which has been spilt onto the stack; there
7889 are two cases here: the first where there is a simple
7890 stack-slot replacement and a second where the stack-slot is
7891 out of range, or is used as a subreg. */
7892 if (reg_equiv_mem[REGNO (ref)])
7894 ref = reg_equiv_mem[REGNO (ref)];
7895 base = find_replacement (&XEXP (ref, 0));
7897 else
7898 /* The slot is out of range, or was dressed up in a SUBREG. */
7899 base = reg_equiv_address[REGNO (ref)];
7901 else
7902 base = find_replacement (&XEXP (ref, 0));
7904 /* Handle the case where the address is too complex to be offset by 1. */
7905 if (GET_CODE (base) == MINUS
7906 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
7908 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7910 emit_set_insn (base_plus, base);
7911 base = base_plus;
7913 else if (GET_CODE (base) == PLUS)
7915 /* The addend must be CONST_INT, or we would have dealt with it above. */
7916 HOST_WIDE_INT hi, lo;
7918 offset += INTVAL (XEXP (base, 1));
7919 base = XEXP (base, 0);
7921 /* Rework the address into a legal sequence of insns. */
7922 /* Valid range for lo is -4095 -> 4095 */
7923 lo = (offset >= 0
7924 ? (offset & 0xfff)
7925 : -((-offset) & 0xfff));
7927 /* Corner case, if lo is the max offset then we would be out of range
7928 once we have added the additional 1 below, so bump the msb into the
7929 pre-loading insn(s). */
7930 if (lo == 4095)
7931 lo &= 0x7ff;
7933 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7934 ^ (HOST_WIDE_INT) 0x80000000)
7935 - (HOST_WIDE_INT) 0x80000000);
7937 gcc_assert (hi + lo == offset);
7939 if (hi != 0)
7941 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7943 /* Get the base address; addsi3 knows how to handle constants
7944 that require more than one insn. */
7945 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7946 base = base_plus;
7947 offset = lo;
7951 /* Operands[2] may overlap operands[0] (though it won't overlap
7952 operands[1]), that's why we asked for a DImode reg -- so we can
7953 use the bit that does not overlap. */
7954 if (REGNO (operands[2]) == REGNO (operands[0]))
7955 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7956 else
7957 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
7959 emit_insn (gen_zero_extendqisi2 (scratch,
7960 gen_rtx_MEM (QImode,
7961 plus_constant (base,
7962 offset))));
7963 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
7964 gen_rtx_MEM (QImode,
7965 plus_constant (base,
7966 offset + 1))));
7967 if (!BYTES_BIG_ENDIAN)
7968 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7969 gen_rtx_IOR (SImode,
7970 gen_rtx_ASHIFT
7971 (SImode,
7972 gen_rtx_SUBREG (SImode, operands[0], 0),
7973 GEN_INT (8)),
7974 scratch));
7975 else
7976 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7977 gen_rtx_IOR (SImode,
7978 gen_rtx_ASHIFT (SImode, scratch,
7979 GEN_INT (8)),
7980 gen_rtx_SUBREG (SImode, operands[0], 0)));
7983 /* Handle storing a half-word to memory during reload by synthesizing as two
7984 byte stores. Take care not to clobber the input values until after we
7985 have moved them somewhere safe. This code assumes that if the DImode
7986 scratch in operands[2] overlaps either the input value or output address
7987 in some way, then that value must die in this insn (we absolutely need
7988 two scratch registers for some corner cases). */
7989 void
7990 arm_reload_out_hi (rtx *operands)
7992 rtx ref = operands[0];
7993 rtx outval = operands[1];
7994 rtx base, scratch;
7995 HOST_WIDE_INT offset = 0;
7997 if (GET_CODE (ref) == SUBREG)
7999 offset = SUBREG_BYTE (ref);
8000 ref = SUBREG_REG (ref);
8003 if (GET_CODE (ref) == REG)
8005 /* We have a pseudo which has been spilt onto the stack; there
8006 are two cases here: the first where there is a simple
8007 stack-slot replacement and a second where the stack-slot is
8008 out of range, or is used as a subreg. */
8009 if (reg_equiv_mem[REGNO (ref)])
8011 ref = reg_equiv_mem[REGNO (ref)];
8012 base = find_replacement (&XEXP (ref, 0));
8014 else
8015 /* The slot is out of range, or was dressed up in a SUBREG. */
8016 base = reg_equiv_address[REGNO (ref)];
8018 else
8019 base = find_replacement (&XEXP (ref, 0));
8021 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8023 /* Handle the case where the address is too complex to be offset by 1. */
8024 if (GET_CODE (base) == MINUS
8025 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8027 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8029 /* Be careful not to destroy OUTVAL. */
8030 if (reg_overlap_mentioned_p (base_plus, outval))
8032 /* Updating base_plus might destroy outval, see if we can
8033 swap the scratch and base_plus. */
8034 if (!reg_overlap_mentioned_p (scratch, outval))
8036 rtx tmp = scratch;
8037 scratch = base_plus;
8038 base_plus = tmp;
8040 else
8042 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8044 /* Be conservative and copy OUTVAL into the scratch now,
8045 this should only be necessary if outval is a subreg
8046 of something larger than a word. */
8047 /* XXX Might this clobber base? I can't see how it can,
8048 since scratch is known to overlap with OUTVAL, and
8049 must be wider than a word. */
8050 emit_insn (gen_movhi (scratch_hi, outval));
8051 outval = scratch_hi;
8055 emit_set_insn (base_plus, base);
8056 base = base_plus;
8058 else if (GET_CODE (base) == PLUS)
8060 /* The addend must be CONST_INT, or we would have dealt with it above. */
8061 HOST_WIDE_INT hi, lo;
8063 offset += INTVAL (XEXP (base, 1));
8064 base = XEXP (base, 0);
8066 /* Rework the address into a legal sequence of insns. */
8067 /* Valid range for lo is -4095 -> 4095 */
8068 lo = (offset >= 0
8069 ? (offset & 0xfff)
8070 : -((-offset) & 0xfff));
8072 /* Corner case, if lo is the max offset then we would be out of range
8073 once we have added the additional 1 below, so bump the msb into the
8074 pre-loading insn(s). */
8075 if (lo == 4095)
8076 lo &= 0x7ff;
8078 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8079 ^ (HOST_WIDE_INT) 0x80000000)
8080 - (HOST_WIDE_INT) 0x80000000);
8082 gcc_assert (hi + lo == offset);
8084 if (hi != 0)
8086 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8088 /* Be careful not to destroy OUTVAL. */
8089 if (reg_overlap_mentioned_p (base_plus, outval))
8091 /* Updating base_plus might destroy outval, see if we
8092 can swap the scratch and base_plus. */
8093 if (!reg_overlap_mentioned_p (scratch, outval))
8095 rtx tmp = scratch;
8096 scratch = base_plus;
8097 base_plus = tmp;
8099 else
8101 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8103 /* Be conservative and copy outval into scratch now,
8104 this should only be necessary if outval is a
8105 subreg of something larger than a word. */
8106 /* XXX Might this clobber base? I can't see how it
8107 can, since scratch is known to overlap with
8108 outval. */
8109 emit_insn (gen_movhi (scratch_hi, outval));
8110 outval = scratch_hi;
8114 /* Get the base address; addsi3 knows how to handle constants
8115 that require more than one insn. */
8116 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8117 base = base_plus;
8118 offset = lo;
8122 if (BYTES_BIG_ENDIAN)
8124 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8125 plus_constant (base, offset + 1)),
8126 gen_lowpart (QImode, outval)));
8127 emit_insn (gen_lshrsi3 (scratch,
8128 gen_rtx_SUBREG (SImode, outval, 0),
8129 GEN_INT (8)));
8130 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8131 gen_lowpart (QImode, scratch)));
8133 else
8135 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8136 gen_lowpart (QImode, outval)));
8137 emit_insn (gen_lshrsi3 (scratch,
8138 gen_rtx_SUBREG (SImode, outval, 0),
8139 GEN_INT (8)));
8140 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8141 plus_constant (base, offset + 1)),
8142 gen_lowpart (QImode, scratch)));
8146 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8147 (padded to the size of a word) should be passed in a register. */
8149 static bool
8150 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8152 if (TARGET_AAPCS_BASED)
8153 return must_pass_in_stack_var_size (mode, type);
8154 else
8155 return must_pass_in_stack_var_size_or_pad (mode, type);
8159 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8160 Return true if an argument passed on the stack should be padded upwards,
8161 i.e. if the least-significant byte has useful data.
8162 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8163 aggregate types are placed in the lowest memory address. */
8165 bool
8166 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8168 if (!TARGET_AAPCS_BASED)
8169 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8171 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8172 return false;
8174 return true;
8178 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8179 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8180 byte of the register has useful data, and return the opposite if the
8181 most significant byte does.
8182 For AAPCS, small aggregates and small complex types are always padded
8183 upwards. */
8185 bool
8186 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8187 tree type, int first ATTRIBUTE_UNUSED)
8189 if (TARGET_AAPCS_BASED
8190 && BYTES_BIG_ENDIAN
8191 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8192 && int_size_in_bytes (type) <= 4)
8193 return true;
8195 /* Otherwise, use default padding. */
8196 return !BYTES_BIG_ENDIAN;
8200 /* Print a symbolic form of X to the debug file, F. */
8201 static void
8202 arm_print_value (FILE *f, rtx x)
8204 switch (GET_CODE (x))
8206 case CONST_INT:
8207 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8208 return;
8210 case CONST_DOUBLE:
8211 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8212 return;
8214 case CONST_VECTOR:
8216 int i;
8218 fprintf (f, "<");
8219 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8221 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8222 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8223 fputc (',', f);
8225 fprintf (f, ">");
8227 return;
8229 case CONST_STRING:
8230 fprintf (f, "\"%s\"", XSTR (x, 0));
8231 return;
8233 case SYMBOL_REF:
8234 fprintf (f, "`%s'", XSTR (x, 0));
8235 return;
8237 case LABEL_REF:
8238 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8239 return;
8241 case CONST:
8242 arm_print_value (f, XEXP (x, 0));
8243 return;
8245 case PLUS:
8246 arm_print_value (f, XEXP (x, 0));
8247 fprintf (f, "+");
8248 arm_print_value (f, XEXP (x, 1));
8249 return;
8251 case PC:
8252 fprintf (f, "pc");
8253 return;
8255 default:
8256 fprintf (f, "????");
8257 return;
8261 /* Routines for manipulation of the constant pool. */
8263 /* Arm instructions cannot load a large constant directly into a
8264 register; they have to come from a pc relative load. The constant
8265 must therefore be placed in the addressable range of the pc
8266 relative load. Depending on the precise pc relative load
8267 instruction the range is somewhere between 256 bytes and 4k. This
8268 means that we often have to dump a constant inside a function, and
8269 generate code to branch around it.
8271 It is important to minimize this, since the branches will slow
8272 things down and make the code larger.
8274 Normally we can hide the table after an existing unconditional
8275 branch so that there is no interruption of the flow, but in the
8276 worst case the code looks like this:
8278 ldr rn, L1
8280 b L2
8281 align
8282 L1: .long value
8286 ldr rn, L3
8288 b L4
8289 align
8290 L3: .long value
8294 We fix this by performing a scan after scheduling, which notices
8295 which instructions need to have their operands fetched from the
8296 constant table and builds the table.
8298 The algorithm starts by building a table of all the constants that
8299 need fixing up and all the natural barriers in the function (places
8300 where a constant table can be dropped without breaking the flow).
8301 For each fixup we note how far the pc-relative replacement will be
8302 able to reach and the offset of the instruction into the function.
8304 Having built the table we then group the fixes together to form
8305 tables that are as large as possible (subject to addressing
8306 constraints) and emit each table of constants after the last
8307 barrier that is within range of all the instructions in the group.
8308 If a group does not contain a barrier, then we forcibly create one
8309 by inserting a jump instruction into the flow. Once the table has
8310 been inserted, the insns are then modified to reference the
8311 relevant entry in the pool.
8313 Possible enhancements to the algorithm (not implemented) are:
8315 1) For some processors and object formats, there may be benefit in
8316 aligning the pools to the start of cache lines; this alignment
8317 would need to be taken into account when calculating addressability
8318 of a pool. */
8320 /* These typedefs are located at the start of this file, so that
8321 they can be used in the prototypes there. This comment is to
8322 remind readers of that fact so that the following structures
8323 can be understood more easily.
8325 typedef struct minipool_node Mnode;
8326 typedef struct minipool_fixup Mfix; */
8328 struct minipool_node
8330 /* Doubly linked chain of entries. */
8331 Mnode * next;
8332 Mnode * prev;
8333 /* The maximum offset into the code that this entry can be placed. While
8334 pushing fixes for forward references, all entries are sorted in order
8335 of increasing max_address. */
8336 HOST_WIDE_INT max_address;
8337 /* Similarly for an entry inserted for a backwards ref. */
8338 HOST_WIDE_INT min_address;
8339 /* The number of fixes referencing this entry. This can become zero
8340 if we "unpush" an entry. In this case we ignore the entry when we
8341 come to emit the code. */
8342 int refcount;
8343 /* The offset from the start of the minipool. */
8344 HOST_WIDE_INT offset;
8345 /* The value in table. */
8346 rtx value;
8347 /* The mode of value. */
8348 enum machine_mode mode;
8349 /* The size of the value. With iWMMXt enabled
8350 sizes > 4 also imply an alignment of 8-bytes. */
8351 int fix_size;
8354 struct minipool_fixup
8356 Mfix * next;
8357 rtx insn;
8358 HOST_WIDE_INT address;
8359 rtx * loc;
8360 enum machine_mode mode;
8361 int fix_size;
8362 rtx value;
8363 Mnode * minipool;
8364 HOST_WIDE_INT forwards;
8365 HOST_WIDE_INT backwards;
8368 /* Fixes less than a word need padding out to a word boundary. */
8369 #define MINIPOOL_FIX_SIZE(mode) \
8370 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8372 static Mnode * minipool_vector_head;
8373 static Mnode * minipool_vector_tail;
8374 static rtx minipool_vector_label;
8375 static int minipool_pad;
8377 /* The linked list of all minipool fixes required for this function. */
8378 Mfix * minipool_fix_head;
8379 Mfix * minipool_fix_tail;
8380 /* The fix entry for the current minipool, once it has been placed. */
8381 Mfix * minipool_barrier;
8383 /* Determines if INSN is the start of a jump table. Returns the end
8384 of the TABLE or NULL_RTX. */
8385 static rtx
8386 is_jump_table (rtx insn)
8388 rtx table;
8390 if (GET_CODE (insn) == JUMP_INSN
8391 && JUMP_LABEL (insn) != NULL
8392 && ((table = next_real_insn (JUMP_LABEL (insn)))
8393 == next_real_insn (insn))
8394 && table != NULL
8395 && GET_CODE (table) == JUMP_INSN
8396 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8397 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8398 return table;
8400 return NULL_RTX;
8403 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8404 #define JUMP_TABLES_IN_TEXT_SECTION 0
8405 #endif
8407 static HOST_WIDE_INT
8408 get_jump_table_size (rtx insn)
8410 /* ADDR_VECs only take room if read-only data does into the text
8411 section. */
8412 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8414 rtx body = PATTERN (insn);
8415 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8416 HOST_WIDE_INT size;
8417 HOST_WIDE_INT modesize;
8419 modesize = GET_MODE_SIZE (GET_MODE (body));
8420 size = modesize * XVECLEN (body, elt);
8421 switch (modesize)
8423 case 1:
8424 /* Round up size of TBB table to a halfword boundary. */
8425 size = (size + 1) & ~(HOST_WIDE_INT)1;
8426 break;
8427 case 2:
8428 /* No padding necessary for TBH. */
8429 break;
8430 case 4:
8431 /* Add two bytes for alignment on Thumb. */
8432 if (TARGET_THUMB)
8433 size += 2;
8434 break;
8435 default:
8436 gcc_unreachable ();
8438 return size;
8441 return 0;
8444 /* Move a minipool fix MP from its current location to before MAX_MP.
8445 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8446 constraints may need updating. */
8447 static Mnode *
8448 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
8449 HOST_WIDE_INT max_address)
8451 /* The code below assumes these are different. */
8452 gcc_assert (mp != max_mp);
8454 if (max_mp == NULL)
8456 if (max_address < mp->max_address)
8457 mp->max_address = max_address;
8459 else
8461 if (max_address > max_mp->max_address - mp->fix_size)
8462 mp->max_address = max_mp->max_address - mp->fix_size;
8463 else
8464 mp->max_address = max_address;
8466 /* Unlink MP from its current position. Since max_mp is non-null,
8467 mp->prev must be non-null. */
8468 mp->prev->next = mp->next;
8469 if (mp->next != NULL)
8470 mp->next->prev = mp->prev;
8471 else
8472 minipool_vector_tail = mp->prev;
8474 /* Re-insert it before MAX_MP. */
8475 mp->next = max_mp;
8476 mp->prev = max_mp->prev;
8477 max_mp->prev = mp;
8479 if (mp->prev != NULL)
8480 mp->prev->next = mp;
8481 else
8482 minipool_vector_head = mp;
8485 /* Save the new entry. */
8486 max_mp = mp;
8488 /* Scan over the preceding entries and adjust their addresses as
8489 required. */
8490 while (mp->prev != NULL
8491 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8493 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8494 mp = mp->prev;
8497 return max_mp;
8500 /* Add a constant to the minipool for a forward reference. Returns the
8501 node added or NULL if the constant will not fit in this pool. */
8502 static Mnode *
8503 add_minipool_forward_ref (Mfix *fix)
8505 /* If set, max_mp is the first pool_entry that has a lower
8506 constraint than the one we are trying to add. */
8507 Mnode * max_mp = NULL;
8508 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
8509 Mnode * mp;
8511 /* If the minipool starts before the end of FIX->INSN then this FIX
8512 can not be placed into the current pool. Furthermore, adding the
8513 new constant pool entry may cause the pool to start FIX_SIZE bytes
8514 earlier. */
8515 if (minipool_vector_head &&
8516 (fix->address + get_attr_length (fix->insn)
8517 >= minipool_vector_head->max_address - fix->fix_size))
8518 return NULL;
8520 /* Scan the pool to see if a constant with the same value has
8521 already been added. While we are doing this, also note the
8522 location where we must insert the constant if it doesn't already
8523 exist. */
8524 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8526 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8527 && fix->mode == mp->mode
8528 && (GET_CODE (fix->value) != CODE_LABEL
8529 || (CODE_LABEL_NUMBER (fix->value)
8530 == CODE_LABEL_NUMBER (mp->value)))
8531 && rtx_equal_p (fix->value, mp->value))
8533 /* More than one fix references this entry. */
8534 mp->refcount++;
8535 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
8538 /* Note the insertion point if necessary. */
8539 if (max_mp == NULL
8540 && mp->max_address > max_address)
8541 max_mp = mp;
8543 /* If we are inserting an 8-bytes aligned quantity and
8544 we have not already found an insertion point, then
8545 make sure that all such 8-byte aligned quantities are
8546 placed at the start of the pool. */
8547 if (ARM_DOUBLEWORD_ALIGN
8548 && max_mp == NULL
8549 && fix->fix_size >= 8
8550 && mp->fix_size < 8)
8552 max_mp = mp;
8553 max_address = mp->max_address;
8557 /* The value is not currently in the minipool, so we need to create
8558 a new entry for it. If MAX_MP is NULL, the entry will be put on
8559 the end of the list since the placement is less constrained than
8560 any existing entry. Otherwise, we insert the new fix before
8561 MAX_MP and, if necessary, adjust the constraints on the other
8562 entries. */
8563 mp = XNEW (Mnode);
8564 mp->fix_size = fix->fix_size;
8565 mp->mode = fix->mode;
8566 mp->value = fix->value;
8567 mp->refcount = 1;
8568 /* Not yet required for a backwards ref. */
8569 mp->min_address = -65536;
8571 if (max_mp == NULL)
8573 mp->max_address = max_address;
8574 mp->next = NULL;
8575 mp->prev = minipool_vector_tail;
8577 if (mp->prev == NULL)
8579 minipool_vector_head = mp;
8580 minipool_vector_label = gen_label_rtx ();
8582 else
8583 mp->prev->next = mp;
8585 minipool_vector_tail = mp;
8587 else
8589 if (max_address > max_mp->max_address - mp->fix_size)
8590 mp->max_address = max_mp->max_address - mp->fix_size;
8591 else
8592 mp->max_address = max_address;
8594 mp->next = max_mp;
8595 mp->prev = max_mp->prev;
8596 max_mp->prev = mp;
8597 if (mp->prev != NULL)
8598 mp->prev->next = mp;
8599 else
8600 minipool_vector_head = mp;
8603 /* Save the new entry. */
8604 max_mp = mp;
8606 /* Scan over the preceding entries and adjust their addresses as
8607 required. */
8608 while (mp->prev != NULL
8609 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8611 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8612 mp = mp->prev;
8615 return max_mp;
8618 static Mnode *
8619 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
8620 HOST_WIDE_INT min_address)
8622 HOST_WIDE_INT offset;
8624 /* The code below assumes these are different. */
8625 gcc_assert (mp != min_mp);
8627 if (min_mp == NULL)
8629 if (min_address > mp->min_address)
8630 mp->min_address = min_address;
8632 else
8634 /* We will adjust this below if it is too loose. */
8635 mp->min_address = min_address;
8637 /* Unlink MP from its current position. Since min_mp is non-null,
8638 mp->next must be non-null. */
8639 mp->next->prev = mp->prev;
8640 if (mp->prev != NULL)
8641 mp->prev->next = mp->next;
8642 else
8643 minipool_vector_head = mp->next;
8645 /* Reinsert it after MIN_MP. */
8646 mp->prev = min_mp;
8647 mp->next = min_mp->next;
8648 min_mp->next = mp;
8649 if (mp->next != NULL)
8650 mp->next->prev = mp;
8651 else
8652 minipool_vector_tail = mp;
8655 min_mp = mp;
8657 offset = 0;
8658 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8660 mp->offset = offset;
8661 if (mp->refcount > 0)
8662 offset += mp->fix_size;
8664 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
8665 mp->next->min_address = mp->min_address + mp->fix_size;
8668 return min_mp;
8671 /* Add a constant to the minipool for a backward reference. Returns the
8672 node added or NULL if the constant will not fit in this pool.
8674 Note that the code for insertion for a backwards reference can be
8675 somewhat confusing because the calculated offsets for each fix do
8676 not take into account the size of the pool (which is still under
8677 construction. */
8678 static Mnode *
8679 add_minipool_backward_ref (Mfix *fix)
8681 /* If set, min_mp is the last pool_entry that has a lower constraint
8682 than the one we are trying to add. */
8683 Mnode *min_mp = NULL;
8684 /* This can be negative, since it is only a constraint. */
8685 HOST_WIDE_INT min_address = fix->address - fix->backwards;
8686 Mnode *mp;
8688 /* If we can't reach the current pool from this insn, or if we can't
8689 insert this entry at the end of the pool without pushing other
8690 fixes out of range, then we don't try. This ensures that we
8691 can't fail later on. */
8692 if (min_address >= minipool_barrier->address
8693 || (minipool_vector_tail->min_address + fix->fix_size
8694 >= minipool_barrier->address))
8695 return NULL;
8697 /* Scan the pool to see if a constant with the same value has
8698 already been added. While we are doing this, also note the
8699 location where we must insert the constant if it doesn't already
8700 exist. */
8701 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
8703 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8704 && fix->mode == mp->mode
8705 && (GET_CODE (fix->value) != CODE_LABEL
8706 || (CODE_LABEL_NUMBER (fix->value)
8707 == CODE_LABEL_NUMBER (mp->value)))
8708 && rtx_equal_p (fix->value, mp->value)
8709 /* Check that there is enough slack to move this entry to the
8710 end of the table (this is conservative). */
8711 && (mp->max_address
8712 > (minipool_barrier->address
8713 + minipool_vector_tail->offset
8714 + minipool_vector_tail->fix_size)))
8716 mp->refcount++;
8717 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
8720 if (min_mp != NULL)
8721 mp->min_address += fix->fix_size;
8722 else
8724 /* Note the insertion point if necessary. */
8725 if (mp->min_address < min_address)
8727 /* For now, we do not allow the insertion of 8-byte alignment
8728 requiring nodes anywhere but at the start of the pool. */
8729 if (ARM_DOUBLEWORD_ALIGN
8730 && fix->fix_size >= 8 && mp->fix_size < 8)
8731 return NULL;
8732 else
8733 min_mp = mp;
8735 else if (mp->max_address
8736 < minipool_barrier->address + mp->offset + fix->fix_size)
8738 /* Inserting before this entry would push the fix beyond
8739 its maximum address (which can happen if we have
8740 re-located a forwards fix); force the new fix to come
8741 after it. */
8742 min_mp = mp;
8743 min_address = mp->min_address + fix->fix_size;
8745 /* If we are inserting an 8-bytes aligned quantity and
8746 we have not already found an insertion point, then
8747 make sure that all such 8-byte aligned quantities are
8748 placed at the start of the pool. */
8749 else if (ARM_DOUBLEWORD_ALIGN
8750 && min_mp == NULL
8751 && fix->fix_size >= 8
8752 && mp->fix_size < 8)
8754 min_mp = mp;
8755 min_address = mp->min_address + fix->fix_size;
8760 /* We need to create a new entry. */
8761 mp = XNEW (Mnode);
8762 mp->fix_size = fix->fix_size;
8763 mp->mode = fix->mode;
8764 mp->value = fix->value;
8765 mp->refcount = 1;
8766 mp->max_address = minipool_barrier->address + 65536;
8768 mp->min_address = min_address;
8770 if (min_mp == NULL)
8772 mp->prev = NULL;
8773 mp->next = minipool_vector_head;
8775 if (mp->next == NULL)
8777 minipool_vector_tail = mp;
8778 minipool_vector_label = gen_label_rtx ();
8780 else
8781 mp->next->prev = mp;
8783 minipool_vector_head = mp;
8785 else
8787 mp->next = min_mp->next;
8788 mp->prev = min_mp;
8789 min_mp->next = mp;
8791 if (mp->next != NULL)
8792 mp->next->prev = mp;
8793 else
8794 minipool_vector_tail = mp;
8797 /* Save the new entry. */
8798 min_mp = mp;
8800 if (mp->prev)
8801 mp = mp->prev;
8802 else
8803 mp->offset = 0;
8805 /* Scan over the following entries and adjust their offsets. */
8806 while (mp->next != NULL)
8808 if (mp->next->min_address < mp->min_address + mp->fix_size)
8809 mp->next->min_address = mp->min_address + mp->fix_size;
8811 if (mp->refcount)
8812 mp->next->offset = mp->offset + mp->fix_size;
8813 else
8814 mp->next->offset = mp->offset;
8816 mp = mp->next;
8819 return min_mp;
8822 static void
8823 assign_minipool_offsets (Mfix *barrier)
8825 HOST_WIDE_INT offset = 0;
8826 Mnode *mp;
8828 minipool_barrier = barrier;
8830 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8832 mp->offset = offset;
8834 if (mp->refcount > 0)
8835 offset += mp->fix_size;
8839 /* Output the literal table */
8840 static void
8841 dump_minipool (rtx scan)
8843 Mnode * mp;
8844 Mnode * nmp;
8845 int align64 = 0;
8847 if (ARM_DOUBLEWORD_ALIGN)
8848 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8849 if (mp->refcount > 0 && mp->fix_size >= 8)
8851 align64 = 1;
8852 break;
8855 if (dump_file)
8856 fprintf (dump_file,
8857 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8858 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
8860 scan = emit_label_after (gen_label_rtx (), scan);
8861 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
8862 scan = emit_label_after (minipool_vector_label, scan);
8864 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
8866 if (mp->refcount > 0)
8868 if (dump_file)
8870 fprintf (dump_file,
8871 ";; Offset %u, min %ld, max %ld ",
8872 (unsigned) mp->offset, (unsigned long) mp->min_address,
8873 (unsigned long) mp->max_address);
8874 arm_print_value (dump_file, mp->value);
8875 fputc ('\n', dump_file);
8878 switch (mp->fix_size)
8880 #ifdef HAVE_consttable_1
8881 case 1:
8882 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
8883 break;
8885 #endif
8886 #ifdef HAVE_consttable_2
8887 case 2:
8888 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
8889 break;
8891 #endif
8892 #ifdef HAVE_consttable_4
8893 case 4:
8894 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
8895 break;
8897 #endif
8898 #ifdef HAVE_consttable_8
8899 case 8:
8900 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
8901 break;
8903 #endif
8904 #ifdef HAVE_consttable_16
8905 case 16:
8906 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
8907 break;
8909 #endif
8910 default:
8911 gcc_unreachable ();
8915 nmp = mp->next;
8916 free (mp);
8919 minipool_vector_head = minipool_vector_tail = NULL;
8920 scan = emit_insn_after (gen_consttable_end (), scan);
8921 scan = emit_barrier_after (scan);
8924 /* Return the cost of forcibly inserting a barrier after INSN. */
8925 static int
8926 arm_barrier_cost (rtx insn)
8928 /* Basing the location of the pool on the loop depth is preferable,
8929 but at the moment, the basic block information seems to be
8930 corrupt by this stage of the compilation. */
8931 int base_cost = 50;
8932 rtx next = next_nonnote_insn (insn);
8934 if (next != NULL && GET_CODE (next) == CODE_LABEL)
8935 base_cost -= 20;
8937 switch (GET_CODE (insn))
8939 case CODE_LABEL:
8940 /* It will always be better to place the table before the label, rather
8941 than after it. */
8942 return 50;
8944 case INSN:
8945 case CALL_INSN:
8946 return base_cost;
8948 case JUMP_INSN:
8949 return base_cost - 10;
8951 default:
8952 return base_cost + 10;
8956 /* Find the best place in the insn stream in the range
8957 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
8958 Create the barrier by inserting a jump and add a new fix entry for
8959 it. */
8960 static Mfix *
8961 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
8963 HOST_WIDE_INT count = 0;
8964 rtx barrier;
8965 rtx from = fix->insn;
8966 /* The instruction after which we will insert the jump. */
8967 rtx selected = NULL;
8968 int selected_cost;
8969 /* The address at which the jump instruction will be placed. */
8970 HOST_WIDE_INT selected_address;
8971 Mfix * new_fix;
8972 HOST_WIDE_INT max_count = max_address - fix->address;
8973 rtx label = gen_label_rtx ();
8975 selected_cost = arm_barrier_cost (from);
8976 selected_address = fix->address;
8978 while (from && count < max_count)
8980 rtx tmp;
8981 int new_cost;
8983 /* This code shouldn't have been called if there was a natural barrier
8984 within range. */
8985 gcc_assert (GET_CODE (from) != BARRIER);
8987 /* Count the length of this insn. */
8988 count += get_attr_length (from);
8990 /* If there is a jump table, add its length. */
8991 tmp = is_jump_table (from);
8992 if (tmp != NULL)
8994 count += get_jump_table_size (tmp);
8996 /* Jump tables aren't in a basic block, so base the cost on
8997 the dispatch insn. If we select this location, we will
8998 still put the pool after the table. */
8999 new_cost = arm_barrier_cost (from);
9001 if (count < max_count
9002 && (!selected || new_cost <= selected_cost))
9004 selected = tmp;
9005 selected_cost = new_cost;
9006 selected_address = fix->address + count;
9009 /* Continue after the dispatch table. */
9010 from = NEXT_INSN (tmp);
9011 continue;
9014 new_cost = arm_barrier_cost (from);
9016 if (count < max_count
9017 && (!selected || new_cost <= selected_cost))
9019 selected = from;
9020 selected_cost = new_cost;
9021 selected_address = fix->address + count;
9024 from = NEXT_INSN (from);
9027 /* Make sure that we found a place to insert the jump. */
9028 gcc_assert (selected);
9030 /* Create a new JUMP_INSN that branches around a barrier. */
9031 from = emit_jump_insn_after (gen_jump (label), selected);
9032 JUMP_LABEL (from) = label;
9033 barrier = emit_barrier_after (from);
9034 emit_label_after (label, barrier);
9036 /* Create a minipool barrier entry for the new barrier. */
9037 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9038 new_fix->insn = barrier;
9039 new_fix->address = selected_address;
9040 new_fix->next = fix->next;
9041 fix->next = new_fix;
9043 return new_fix;
9046 /* Record that there is a natural barrier in the insn stream at
9047 ADDRESS. */
9048 static void
9049 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9051 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9053 fix->insn = insn;
9054 fix->address = address;
9056 fix->next = NULL;
9057 if (minipool_fix_head != NULL)
9058 minipool_fix_tail->next = fix;
9059 else
9060 minipool_fix_head = fix;
9062 minipool_fix_tail = fix;
9065 /* Record INSN, which will need fixing up to load a value from the
9066 minipool. ADDRESS is the offset of the insn since the start of the
9067 function; LOC is a pointer to the part of the insn which requires
9068 fixing; VALUE is the constant that must be loaded, which is of type
9069 MODE. */
9070 static void
9071 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9072 enum machine_mode mode, rtx value)
9074 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9076 fix->insn = insn;
9077 fix->address = address;
9078 fix->loc = loc;
9079 fix->mode = mode;
9080 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9081 fix->value = value;
9082 fix->forwards = get_attr_pool_range (insn);
9083 fix->backwards = get_attr_neg_pool_range (insn);
9084 fix->minipool = NULL;
9086 /* If an insn doesn't have a range defined for it, then it isn't
9087 expecting to be reworked by this code. Better to stop now than
9088 to generate duff assembly code. */
9089 gcc_assert (fix->forwards || fix->backwards);
9091 /* If an entry requires 8-byte alignment then assume all constant pools
9092 require 4 bytes of padding. Trying to do this later on a per-pool
9093 basis is awkward because existing pool entries have to be modified. */
9094 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9095 minipool_pad = 4;
9097 if (dump_file)
9099 fprintf (dump_file,
9100 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9101 GET_MODE_NAME (mode),
9102 INSN_UID (insn), (unsigned long) address,
9103 -1 * (long)fix->backwards, (long)fix->forwards);
9104 arm_print_value (dump_file, fix->value);
9105 fprintf (dump_file, "\n");
9108 /* Add it to the chain of fixes. */
9109 fix->next = NULL;
9111 if (minipool_fix_head != NULL)
9112 minipool_fix_tail->next = fix;
9113 else
9114 minipool_fix_head = fix;
9116 minipool_fix_tail = fix;
9119 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9120 Returns the number of insns needed, or 99 if we don't know how to
9121 do it. */
9123 arm_const_double_inline_cost (rtx val)
9125 rtx lowpart, highpart;
9126 enum machine_mode mode;
9128 mode = GET_MODE (val);
9130 if (mode == VOIDmode)
9131 mode = DImode;
9133 gcc_assert (GET_MODE_SIZE (mode) == 8);
9135 lowpart = gen_lowpart (SImode, val);
9136 highpart = gen_highpart_mode (SImode, mode, val);
9138 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9139 gcc_assert (GET_CODE (highpart) == CONST_INT);
9141 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9142 NULL_RTX, NULL_RTX, 0, 0)
9143 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9144 NULL_RTX, NULL_RTX, 0, 0));
9147 /* Return true if it is worthwhile to split a 64-bit constant into two
9148 32-bit operations. This is the case if optimizing for size, or
9149 if we have load delay slots, or if one 32-bit part can be done with
9150 a single data operation. */
9151 bool
9152 arm_const_double_by_parts (rtx val)
9154 enum machine_mode mode = GET_MODE (val);
9155 rtx part;
9157 if (optimize_size || arm_ld_sched)
9158 return true;
9160 if (mode == VOIDmode)
9161 mode = DImode;
9163 part = gen_highpart_mode (SImode, mode, val);
9165 gcc_assert (GET_CODE (part) == CONST_INT);
9167 if (const_ok_for_arm (INTVAL (part))
9168 || const_ok_for_arm (~INTVAL (part)))
9169 return true;
9171 part = gen_lowpart (SImode, val);
9173 gcc_assert (GET_CODE (part) == CONST_INT);
9175 if (const_ok_for_arm (INTVAL (part))
9176 || const_ok_for_arm (~INTVAL (part)))
9177 return true;
9179 return false;
9182 /* Scan INSN and note any of its operands that need fixing.
9183 If DO_PUSHES is false we do not actually push any of the fixups
9184 needed. The function returns TRUE if any fixups were needed/pushed.
9185 This is used by arm_memory_load_p() which needs to know about loads
9186 of constants that will be converted into minipool loads. */
9187 static bool
9188 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9190 bool result = false;
9191 int opno;
9193 extract_insn (insn);
9195 if (!constrain_operands (1))
9196 fatal_insn_not_found (insn);
9198 if (recog_data.n_alternatives == 0)
9199 return false;
9201 /* Fill in recog_op_alt with information about the constraints of
9202 this insn. */
9203 preprocess_constraints ();
9205 for (opno = 0; opno < recog_data.n_operands; opno++)
9207 /* Things we need to fix can only occur in inputs. */
9208 if (recog_data.operand_type[opno] != OP_IN)
9209 continue;
9211 /* If this alternative is a memory reference, then any mention
9212 of constants in this alternative is really to fool reload
9213 into allowing us to accept one there. We need to fix them up
9214 now so that we output the right code. */
9215 if (recog_op_alt[opno][which_alternative].memory_ok)
9217 rtx op = recog_data.operand[opno];
9219 if (CONSTANT_P (op))
9221 if (do_pushes)
9222 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9223 recog_data.operand_mode[opno], op);
9224 result = true;
9226 else if (GET_CODE (op) == MEM
9227 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9228 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9230 if (do_pushes)
9232 rtx cop = avoid_constant_pool_reference (op);
9234 /* Casting the address of something to a mode narrower
9235 than a word can cause avoid_constant_pool_reference()
9236 to return the pool reference itself. That's no good to
9237 us here. Lets just hope that we can use the
9238 constant pool value directly. */
9239 if (op == cop)
9240 cop = get_pool_constant (XEXP (op, 0));
9242 push_minipool_fix (insn, address,
9243 recog_data.operand_loc[opno],
9244 recog_data.operand_mode[opno], cop);
9247 result = true;
9252 return result;
9255 /* Gcc puts the pool in the wrong place for ARM, since we can only
9256 load addresses a limited distance around the pc. We do some
9257 special munging to move the constant pool values to the correct
9258 point in the code. */
9259 static void
9260 arm_reorg (void)
9262 rtx insn;
9263 HOST_WIDE_INT address = 0;
9264 Mfix * fix;
9266 minipool_fix_head = minipool_fix_tail = NULL;
9268 /* The first insn must always be a note, or the code below won't
9269 scan it properly. */
9270 insn = get_insns ();
9271 gcc_assert (GET_CODE (insn) == NOTE);
9272 minipool_pad = 0;
9274 /* Scan all the insns and record the operands that will need fixing. */
9275 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9277 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9278 && (arm_cirrus_insn_p (insn)
9279 || GET_CODE (insn) == JUMP_INSN
9280 || arm_memory_load_p (insn)))
9281 cirrus_reorg (insn);
9283 if (GET_CODE (insn) == BARRIER)
9284 push_minipool_barrier (insn, address);
9285 else if (INSN_P (insn))
9287 rtx table;
9289 note_invalid_constants (insn, address, true);
9290 address += get_attr_length (insn);
9292 /* If the insn is a vector jump, add the size of the table
9293 and skip the table. */
9294 if ((table = is_jump_table (insn)) != NULL)
9296 address += get_jump_table_size (table);
9297 insn = table;
9302 fix = minipool_fix_head;
9304 /* Now scan the fixups and perform the required changes. */
9305 while (fix)
9307 Mfix * ftmp;
9308 Mfix * fdel;
9309 Mfix * last_added_fix;
9310 Mfix * last_barrier = NULL;
9311 Mfix * this_fix;
9313 /* Skip any further barriers before the next fix. */
9314 while (fix && GET_CODE (fix->insn) == BARRIER)
9315 fix = fix->next;
9317 /* No more fixes. */
9318 if (fix == NULL)
9319 break;
9321 last_added_fix = NULL;
9323 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9325 if (GET_CODE (ftmp->insn) == BARRIER)
9327 if (ftmp->address >= minipool_vector_head->max_address)
9328 break;
9330 last_barrier = ftmp;
9332 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9333 break;
9335 last_added_fix = ftmp; /* Keep track of the last fix added. */
9338 /* If we found a barrier, drop back to that; any fixes that we
9339 could have reached but come after the barrier will now go in
9340 the next mini-pool. */
9341 if (last_barrier != NULL)
9343 /* Reduce the refcount for those fixes that won't go into this
9344 pool after all. */
9345 for (fdel = last_barrier->next;
9346 fdel && fdel != ftmp;
9347 fdel = fdel->next)
9349 fdel->minipool->refcount--;
9350 fdel->minipool = NULL;
9353 ftmp = last_barrier;
9355 else
9357 /* ftmp is first fix that we can't fit into this pool and
9358 there no natural barriers that we could use. Insert a
9359 new barrier in the code somewhere between the previous
9360 fix and this one, and arrange to jump around it. */
9361 HOST_WIDE_INT max_address;
9363 /* The last item on the list of fixes must be a barrier, so
9364 we can never run off the end of the list of fixes without
9365 last_barrier being set. */
9366 gcc_assert (ftmp);
9368 max_address = minipool_vector_head->max_address;
9369 /* Check that there isn't another fix that is in range that
9370 we couldn't fit into this pool because the pool was
9371 already too large: we need to put the pool before such an
9372 instruction. The pool itself may come just after the
9373 fix because create_fix_barrier also allows space for a
9374 jump instruction. */
9375 if (ftmp->address < max_address)
9376 max_address = ftmp->address + 1;
9378 last_barrier = create_fix_barrier (last_added_fix, max_address);
9381 assign_minipool_offsets (last_barrier);
9383 while (ftmp)
9385 if (GET_CODE (ftmp->insn) != BARRIER
9386 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9387 == NULL))
9388 break;
9390 ftmp = ftmp->next;
9393 /* Scan over the fixes we have identified for this pool, fixing them
9394 up and adding the constants to the pool itself. */
9395 for (this_fix = fix; this_fix && ftmp != this_fix;
9396 this_fix = this_fix->next)
9397 if (GET_CODE (this_fix->insn) != BARRIER)
9399 rtx addr
9400 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9401 minipool_vector_label),
9402 this_fix->minipool->offset);
9403 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9406 dump_minipool (last_barrier->insn);
9407 fix = ftmp;
9410 /* From now on we must synthesize any constants that we can't handle
9411 directly. This can happen if the RTL gets split during final
9412 instruction generation. */
9413 after_arm_reorg = 1;
9415 /* Free the minipool memory. */
9416 obstack_free (&minipool_obstack, minipool_startobj);
9419 /* Routines to output assembly language. */
9421 /* If the rtx is the correct value then return the string of the number.
9422 In this way we can ensure that valid double constants are generated even
9423 when cross compiling. */
9424 const char *
9425 fp_immediate_constant (rtx x)
9427 REAL_VALUE_TYPE r;
9428 int i;
9430 if (!fp_consts_inited)
9431 init_fp_table ();
9433 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9434 for (i = 0; i < 8; i++)
9435 if (REAL_VALUES_EQUAL (r, values_fp[i]))
9436 return strings_fp[i];
9438 gcc_unreachable ();
9441 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9442 static const char *
9443 fp_const_from_val (REAL_VALUE_TYPE *r)
9445 int i;
9447 if (!fp_consts_inited)
9448 init_fp_table ();
9450 for (i = 0; i < 8; i++)
9451 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
9452 return strings_fp[i];
9454 gcc_unreachable ();
9457 /* Output the operands of a LDM/STM instruction to STREAM.
9458 MASK is the ARM register set mask of which only bits 0-15 are important.
9459 REG is the base register, either the frame pointer or the stack pointer,
9460 INSTR is the possibly suffixed load or store instruction.
9461 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9463 static void
9464 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
9465 unsigned long mask, int rfe)
9467 unsigned i;
9468 bool not_first = FALSE;
9470 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
9471 fputc ('\t', stream);
9472 asm_fprintf (stream, instr, reg);
9473 fputc ('{', stream);
9475 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9476 if (mask & (1 << i))
9478 if (not_first)
9479 fprintf (stream, ", ");
9481 asm_fprintf (stream, "%r", i);
9482 not_first = TRUE;
9485 if (rfe)
9486 fprintf (stream, "}^\n");
9487 else
9488 fprintf (stream, "}\n");
9492 /* Output a FLDMD instruction to STREAM.
9493 BASE if the register containing the address.
9494 REG and COUNT specify the register range.
9495 Extra registers may be added to avoid hardware bugs.
9497 We output FLDMD even for ARMv5 VFP implementations. Although
9498 FLDMD is technically not supported until ARMv6, it is believed
9499 that all VFP implementations support its use in this context. */
9501 static void
9502 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
9504 int i;
9506 /* Workaround ARM10 VFPr1 bug. */
9507 if (count == 2 && !arm_arch6)
9509 if (reg == 15)
9510 reg--;
9511 count++;
9514 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9515 load into multiple parts if we have to handle more than 16 registers. */
9516 if (count > 16)
9518 vfp_output_fldmd (stream, base, reg, 16);
9519 vfp_output_fldmd (stream, base, reg + 16, count - 16);
9520 return;
9523 fputc ('\t', stream);
9524 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
9526 for (i = reg; i < reg + count; i++)
9528 if (i > reg)
9529 fputs (", ", stream);
9530 asm_fprintf (stream, "d%d", i);
9532 fputs ("}\n", stream);
9537 /* Output the assembly for a store multiple. */
9539 const char *
9540 vfp_output_fstmd (rtx * operands)
9542 char pattern[100];
9543 int p;
9544 int base;
9545 int i;
9547 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
9548 p = strlen (pattern);
9550 gcc_assert (GET_CODE (operands[1]) == REG);
9552 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
9553 for (i = 1; i < XVECLEN (operands[2], 0); i++)
9555 p += sprintf (&pattern[p], ", d%d", base + i);
9557 strcpy (&pattern[p], "}");
9559 output_asm_insn (pattern, operands);
9560 return "";
9564 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9565 number of bytes pushed. */
9567 static int
9568 vfp_emit_fstmd (int base_reg, int count)
9570 rtx par;
9571 rtx dwarf;
9572 rtx tmp, reg;
9573 int i;
9575 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9576 register pairs are stored by a store multiple insn. We avoid this
9577 by pushing an extra pair. */
9578 if (count == 2 && !arm_arch6)
9580 if (base_reg == LAST_VFP_REGNUM - 3)
9581 base_reg -= 2;
9582 count++;
9585 /* FSTMD may not store more than 16 doubleword registers at once. Split
9586 larger stores into multiple parts (up to a maximum of two, in
9587 practice). */
9588 if (count > 16)
9590 int saved;
9591 /* NOTE: base_reg is an internal register number, so each D register
9592 counts as 2. */
9593 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
9594 saved += vfp_emit_fstmd (base_reg, 16);
9595 return saved;
9598 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9599 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9601 reg = gen_rtx_REG (DFmode, base_reg);
9602 base_reg += 2;
9604 XVECEXP (par, 0, 0)
9605 = gen_rtx_SET (VOIDmode,
9606 gen_frame_mem (BLKmode,
9607 gen_rtx_PRE_DEC (BLKmode,
9608 stack_pointer_rtx)),
9609 gen_rtx_UNSPEC (BLKmode,
9610 gen_rtvec (1, reg),
9611 UNSPEC_PUSH_MULT));
9613 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9614 plus_constant (stack_pointer_rtx, -(count * 8)));
9615 RTX_FRAME_RELATED_P (tmp) = 1;
9616 XVECEXP (dwarf, 0, 0) = tmp;
9618 tmp = gen_rtx_SET (VOIDmode,
9619 gen_frame_mem (DFmode, stack_pointer_rtx),
9620 reg);
9621 RTX_FRAME_RELATED_P (tmp) = 1;
9622 XVECEXP (dwarf, 0, 1) = tmp;
9624 for (i = 1; i < count; i++)
9626 reg = gen_rtx_REG (DFmode, base_reg);
9627 base_reg += 2;
9628 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9630 tmp = gen_rtx_SET (VOIDmode,
9631 gen_frame_mem (DFmode,
9632 plus_constant (stack_pointer_rtx,
9633 i * 8)),
9634 reg);
9635 RTX_FRAME_RELATED_P (tmp) = 1;
9636 XVECEXP (dwarf, 0, i + 1) = tmp;
9639 par = emit_insn (par);
9640 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9641 REG_NOTES (par));
9642 RTX_FRAME_RELATED_P (par) = 1;
9644 return count * 8;
9647 /* Emit a call instruction with pattern PAT. ADDR is the address of
9648 the call target. */
9650 void
9651 arm_emit_call_insn (rtx pat, rtx addr)
9653 rtx insn;
9655 insn = emit_call_insn (pat);
9657 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9658 If the call might use such an entry, add a use of the PIC register
9659 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9660 if (TARGET_VXWORKS_RTP
9661 && flag_pic
9662 && GET_CODE (addr) == SYMBOL_REF
9663 && (SYMBOL_REF_DECL (addr)
9664 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
9665 : !SYMBOL_REF_LOCAL_P (addr)))
9667 require_pic_register ();
9668 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
9672 /* Output a 'call' insn. */
9673 const char *
9674 output_call (rtx *operands)
9676 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
9678 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9679 if (REGNO (operands[0]) == LR_REGNUM)
9681 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
9682 output_asm_insn ("mov%?\t%0, %|lr", operands);
9685 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9687 if (TARGET_INTERWORK || arm_arch4t)
9688 output_asm_insn ("bx%?\t%0", operands);
9689 else
9690 output_asm_insn ("mov%?\t%|pc, %0", operands);
9692 return "";
9695 /* Output a 'call' insn that is a reference in memory. */
9696 const char *
9697 output_call_mem (rtx *operands)
9699 if (TARGET_INTERWORK && !arm_arch5)
9701 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9702 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9703 output_asm_insn ("bx%?\t%|ip", operands);
9705 else if (regno_use_in (LR_REGNUM, operands[0]))
9707 /* LR is used in the memory address. We load the address in the
9708 first instruction. It's safe to use IP as the target of the
9709 load since the call will kill it anyway. */
9710 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9711 if (arm_arch5)
9712 output_asm_insn ("blx%?\t%|ip", operands);
9713 else
9715 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9716 if (arm_arch4t)
9717 output_asm_insn ("bx%?\t%|ip", operands);
9718 else
9719 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
9722 else
9724 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9725 output_asm_insn ("ldr%?\t%|pc, %0", operands);
9728 return "";
9732 /* Output a move from arm registers to an fpa registers.
9733 OPERANDS[0] is an fpa register.
9734 OPERANDS[1] is the first registers of an arm register pair. */
9735 const char *
9736 output_mov_long_double_fpa_from_arm (rtx *operands)
9738 int arm_reg0 = REGNO (operands[1]);
9739 rtx ops[3];
9741 gcc_assert (arm_reg0 != IP_REGNUM);
9743 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9744 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9745 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9747 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9748 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
9750 return "";
9753 /* Output a move from an fpa register to arm registers.
9754 OPERANDS[0] is the first registers of an arm register pair.
9755 OPERANDS[1] is an fpa register. */
9756 const char *
9757 output_mov_long_double_arm_from_fpa (rtx *operands)
9759 int arm_reg0 = REGNO (operands[0]);
9760 rtx ops[3];
9762 gcc_assert (arm_reg0 != IP_REGNUM);
9764 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9765 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9766 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9768 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
9769 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9770 return "";
9773 /* Output a move from arm registers to arm registers of a long double
9774 OPERANDS[0] is the destination.
9775 OPERANDS[1] is the source. */
9776 const char *
9777 output_mov_long_double_arm_from_arm (rtx *operands)
9779 /* We have to be careful here because the two might overlap. */
9780 int dest_start = REGNO (operands[0]);
9781 int src_start = REGNO (operands[1]);
9782 rtx ops[2];
9783 int i;
9785 if (dest_start < src_start)
9787 for (i = 0; i < 3; i++)
9789 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9790 ops[1] = gen_rtx_REG (SImode, src_start + i);
9791 output_asm_insn ("mov%?\t%0, %1", ops);
9794 else
9796 for (i = 2; i >= 0; i--)
9798 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9799 ops[1] = gen_rtx_REG (SImode, src_start + i);
9800 output_asm_insn ("mov%?\t%0, %1", ops);
9804 return "";
9808 /* Output a move from arm registers to an fpa registers.
9809 OPERANDS[0] is an fpa register.
9810 OPERANDS[1] is the first registers of an arm register pair. */
9811 const char *
9812 output_mov_double_fpa_from_arm (rtx *operands)
9814 int arm_reg0 = REGNO (operands[1]);
9815 rtx ops[2];
9817 gcc_assert (arm_reg0 != IP_REGNUM);
9819 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9820 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9821 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
9822 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
9823 return "";
9826 /* Output a move from an fpa register to arm registers.
9827 OPERANDS[0] is the first registers of an arm register pair.
9828 OPERANDS[1] is an fpa register. */
9829 const char *
9830 output_mov_double_arm_from_fpa (rtx *operands)
9832 int arm_reg0 = REGNO (operands[0]);
9833 rtx ops[2];
9835 gcc_assert (arm_reg0 != IP_REGNUM);
9837 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9838 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9839 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
9840 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
9841 return "";
9844 /* Output a move between double words.
9845 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9846 or MEM<-REG and all MEMs must be offsettable addresses. */
9847 const char *
9848 output_move_double (rtx *operands)
9850 enum rtx_code code0 = GET_CODE (operands[0]);
9851 enum rtx_code code1 = GET_CODE (operands[1]);
9852 rtx otherops[3];
9854 if (code0 == REG)
9856 int reg0 = REGNO (operands[0]);
9858 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9860 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
9862 switch (GET_CODE (XEXP (operands[1], 0)))
9864 case REG:
9865 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
9866 break;
9868 case PRE_INC:
9869 gcc_assert (TARGET_LDRD);
9870 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
9871 break;
9873 case PRE_DEC:
9874 if (TARGET_LDRD)
9875 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
9876 else
9877 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
9878 break;
9880 case POST_INC:
9881 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
9882 break;
9884 case POST_DEC:
9885 gcc_assert (TARGET_LDRD);
9886 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
9887 break;
9889 case PRE_MODIFY:
9890 case POST_MODIFY:
9891 otherops[0] = operands[0];
9892 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
9893 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
9895 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
9897 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9899 /* Registers overlap so split out the increment. */
9900 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9901 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
9903 else
9905 /* IWMMXT allows offsets larger than ldrd can handle,
9906 fix these up with a pair of ldr. */
9907 if (GET_CODE (otherops[2]) == CONST_INT
9908 && (INTVAL(otherops[2]) <= -256
9909 || INTVAL(otherops[2]) >= 256))
9911 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
9912 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9913 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9915 else
9916 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
9919 else
9921 /* IWMMXT allows offsets larger than ldrd can handle,
9922 fix these up with a pair of ldr. */
9923 if (GET_CODE (otherops[2]) == CONST_INT
9924 && (INTVAL(otherops[2]) <= -256
9925 || INTVAL(otherops[2]) >= 256))
9927 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9928 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9929 otherops[0] = operands[0];
9930 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
9932 else
9933 /* We only allow constant increments, so this is safe. */
9934 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
9936 break;
9938 case LABEL_REF:
9939 case CONST:
9940 output_asm_insn ("adr%?\t%0, %1", operands);
9941 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
9942 break;
9944 /* ??? This needs checking for thumb2. */
9945 default:
9946 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
9947 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
9949 otherops[0] = operands[0];
9950 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
9951 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
9953 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
9955 if (GET_CODE (otherops[2]) == CONST_INT)
9957 switch ((int) INTVAL (otherops[2]))
9959 case -8:
9960 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
9961 return "";
9962 case -4:
9963 if (TARGET_THUMB2)
9964 break;
9965 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
9966 return "";
9967 case 4:
9968 if (TARGET_THUMB2)
9969 break;
9970 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
9971 return "";
9974 if (TARGET_LDRD
9975 && (GET_CODE (otherops[2]) == REG
9976 || (GET_CODE (otherops[2]) == CONST_INT
9977 && INTVAL (otherops[2]) > -256
9978 && INTVAL (otherops[2]) < 256)))
9980 if (reg_overlap_mentioned_p (otherops[0],
9981 otherops[2]))
9983 /* Swap base and index registers over to
9984 avoid a conflict. */
9985 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
9986 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
9988 /* If both registers conflict, it will usually
9989 have been fixed by a splitter. */
9990 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9992 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9993 output_asm_insn ("ldr%(d%)\t%0, [%1]",
9994 otherops);
9996 else
9997 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
9998 return "";
10001 if (GET_CODE (otherops[2]) == CONST_INT)
10003 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10004 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10005 else
10006 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10008 else
10009 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10011 else
10012 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10014 return "ldm%(ia%)\t%0, %M0";
10016 else
10018 otherops[1] = adjust_address (operands[1], SImode, 4);
10019 /* Take care of overlapping base/data reg. */
10020 if (reg_mentioned_p (operands[0], operands[1]))
10022 output_asm_insn ("ldr%?\t%0, %1", otherops);
10023 output_asm_insn ("ldr%?\t%0, %1", operands);
10025 else
10027 output_asm_insn ("ldr%?\t%0, %1", operands);
10028 output_asm_insn ("ldr%?\t%0, %1", otherops);
10033 else
10035 /* Constraints should ensure this. */
10036 gcc_assert (code0 == MEM && code1 == REG);
10037 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10039 switch (GET_CODE (XEXP (operands[0], 0)))
10041 case REG:
10042 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10043 break;
10045 case PRE_INC:
10046 gcc_assert (TARGET_LDRD);
10047 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10048 break;
10050 case PRE_DEC:
10051 if (TARGET_LDRD)
10052 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10053 else
10054 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10055 break;
10057 case POST_INC:
10058 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10059 break;
10061 case POST_DEC:
10062 gcc_assert (TARGET_LDRD);
10063 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10064 break;
10066 case PRE_MODIFY:
10067 case POST_MODIFY:
10068 otherops[0] = operands[1];
10069 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10070 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10072 /* IWMMXT allows offsets larger than ldrd can handle,
10073 fix these up with a pair of ldr. */
10074 if (GET_CODE (otherops[2]) == CONST_INT
10075 && (INTVAL(otherops[2]) <= -256
10076 || INTVAL(otherops[2]) >= 256))
10078 rtx reg1;
10079 reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10080 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10082 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10083 otherops[0] = reg1;
10084 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10086 else
10088 otherops[0] = reg1;
10089 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10090 otherops[0] = operands[1];
10091 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10094 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10095 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10096 else
10097 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10098 break;
10100 case PLUS:
10101 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10102 if (GET_CODE (otherops[2]) == CONST_INT)
10104 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10106 case -8:
10107 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10108 return "";
10110 case -4:
10111 if (TARGET_THUMB2)
10112 break;
10113 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10114 return "";
10116 case 4:
10117 if (TARGET_THUMB2)
10118 break;
10119 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10120 return "";
10123 if (TARGET_LDRD
10124 && (GET_CODE (otherops[2]) == REG
10125 || (GET_CODE (otherops[2]) == CONST_INT
10126 && INTVAL (otherops[2]) > -256
10127 && INTVAL (otherops[2]) < 256)))
10129 otherops[0] = operands[1];
10130 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10131 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10132 return "";
10134 /* Fall through */
10136 default:
10137 otherops[0] = adjust_address (operands[0], SImode, 4);
10138 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10139 output_asm_insn ("str%?\t%1, %0", operands);
10140 output_asm_insn ("str%?\t%1, %0", otherops);
10144 return "";
10147 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10148 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10150 const char *
10151 output_move_quad (rtx *operands)
10153 if (REG_P (operands[0]))
10155 /* Load, or reg->reg move. */
10157 if (MEM_P (operands[1]))
10159 switch (GET_CODE (XEXP (operands[1], 0)))
10161 case REG:
10162 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10163 break;
10165 case LABEL_REF:
10166 case CONST:
10167 output_asm_insn ("adr%?\t%0, %1", operands);
10168 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10169 break;
10171 default:
10172 gcc_unreachable ();
10175 else
10177 rtx ops[2];
10178 int dest, src, i;
10180 gcc_assert (REG_P (operands[1]));
10182 dest = REGNO (operands[0]);
10183 src = REGNO (operands[1]);
10185 /* This seems pretty dumb, but hopefully GCC won't try to do it
10186 very often. */
10187 if (dest < src)
10188 for (i = 0; i < 4; i++)
10190 ops[0] = gen_rtx_REG (SImode, dest + i);
10191 ops[1] = gen_rtx_REG (SImode, src + i);
10192 output_asm_insn ("mov%?\t%0, %1", ops);
10194 else
10195 for (i = 3; i >= 0; i--)
10197 ops[0] = gen_rtx_REG (SImode, dest + i);
10198 ops[1] = gen_rtx_REG (SImode, src + i);
10199 output_asm_insn ("mov%?\t%0, %1", ops);
10203 else
10205 gcc_assert (MEM_P (operands[0]));
10206 gcc_assert (REG_P (operands[1]));
10207 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10209 switch (GET_CODE (XEXP (operands[0], 0)))
10211 case REG:
10212 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10213 break;
10215 default:
10216 gcc_unreachable ();
10220 return "";
10223 /* Output a VFP load or store instruction. */
10225 const char *
10226 output_move_vfp (rtx *operands)
10228 rtx reg, mem, addr, ops[2];
10229 int load = REG_P (operands[0]);
10230 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10231 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10232 const char *template;
10233 char buff[50];
10234 enum machine_mode mode;
10236 reg = operands[!load];
10237 mem = operands[load];
10239 mode = GET_MODE (reg);
10241 gcc_assert (REG_P (reg));
10242 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10243 gcc_assert (mode == SFmode
10244 || mode == DFmode
10245 || mode == SImode
10246 || mode == DImode
10247 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10248 gcc_assert (MEM_P (mem));
10250 addr = XEXP (mem, 0);
10252 switch (GET_CODE (addr))
10254 case PRE_DEC:
10255 template = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10256 ops[0] = XEXP (addr, 0);
10257 ops[1] = reg;
10258 break;
10260 case POST_INC:
10261 template = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10262 ops[0] = XEXP (addr, 0);
10263 ops[1] = reg;
10264 break;
10266 default:
10267 template = "f%s%c%%?\t%%%s0, %%1%s";
10268 ops[0] = reg;
10269 ops[1] = mem;
10270 break;
10273 sprintf (buff, template,
10274 load ? "ld" : "st",
10275 dp ? 'd' : 's',
10276 dp ? "P" : "",
10277 integer_p ? "\t%@ int" : "");
10278 output_asm_insn (buff, ops);
10280 return "";
10283 /* Output a Neon quad-word load or store, or a load or store for
10284 larger structure modes. We could also support post-modify forms using
10285 VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
10286 yet.
10287 WARNING: The ordering of elements in memory is weird in big-endian mode,
10288 because we use VSTM instead of VST1, to make it easy to make vector stores
10289 via ARM registers write values in the same order as stores direct from Neon
10290 registers. For example, the byte ordering of a quadword vector with 16-byte
10291 elements like this:
10293 [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
10295 will be (with lowest address first, h = most-significant byte,
10296 l = least-significant byte of element):
10298 [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
10299 e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
10301 When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
10302 rN in the order:
10304 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10306 So that STM/LDM can be used on vectors in ARM registers, and the same memory
10307 layout will result as if VSTM/VLDM were used. */
10309 const char *
10310 output_move_neon (rtx *operands)
10312 rtx reg, mem, addr, ops[2];
10313 int regno, load = REG_P (operands[0]);
10314 const char *template;
10315 char buff[50];
10316 enum machine_mode mode;
10318 reg = operands[!load];
10319 mem = operands[load];
10321 mode = GET_MODE (reg);
10323 gcc_assert (REG_P (reg));
10324 regno = REGNO (reg);
10325 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10326 || NEON_REGNO_OK_FOR_QUAD (regno));
10327 gcc_assert (VALID_NEON_DREG_MODE (mode)
10328 || VALID_NEON_QREG_MODE (mode)
10329 || VALID_NEON_STRUCT_MODE (mode));
10330 gcc_assert (MEM_P (mem));
10332 addr = XEXP (mem, 0);
10334 /* Strip off const from addresses like (const (plus (...))). */
10335 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10336 addr = XEXP (addr, 0);
10338 switch (GET_CODE (addr))
10340 case POST_INC:
10341 template = "v%smia%%?\t%%0!, %%h1";
10342 ops[0] = XEXP (addr, 0);
10343 ops[1] = reg;
10344 break;
10346 case POST_MODIFY:
10347 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10348 gcc_unreachable ();
10350 case LABEL_REF:
10351 case PLUS:
10353 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10354 int i;
10355 int overlap = -1;
10356 for (i = 0; i < nregs; i++)
10358 /* We're only using DImode here because it's a convenient size. */
10359 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10360 ops[1] = adjust_address (mem, SImode, 8 * i);
10361 if (reg_overlap_mentioned_p (ops[0], mem))
10363 gcc_assert (overlap == -1);
10364 overlap = i;
10366 else
10368 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10369 output_asm_insn (buff, ops);
10372 if (overlap != -1)
10374 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10375 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10376 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10377 output_asm_insn (buff, ops);
10380 return "";
10383 default:
10384 template = "v%smia%%?\t%%m0, %%h1";
10385 ops[0] = mem;
10386 ops[1] = reg;
10389 sprintf (buff, template, load ? "ld" : "st");
10390 output_asm_insn (buff, ops);
10392 return "";
10395 /* Output an ADD r, s, #n where n may be too big for one instruction.
10396 If adding zero to one register, output nothing. */
10397 const char *
10398 output_add_immediate (rtx *operands)
10400 HOST_WIDE_INT n = INTVAL (operands[2]);
10402 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
10404 if (n < 0)
10405 output_multi_immediate (operands,
10406 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10407 -n);
10408 else
10409 output_multi_immediate (operands,
10410 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10414 return "";
10417 /* Output a multiple immediate operation.
10418 OPERANDS is the vector of operands referred to in the output patterns.
10419 INSTR1 is the output pattern to use for the first constant.
10420 INSTR2 is the output pattern to use for subsequent constants.
10421 IMMED_OP is the index of the constant slot in OPERANDS.
10422 N is the constant value. */
10423 static const char *
10424 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
10425 int immed_op, HOST_WIDE_INT n)
10427 #if HOST_BITS_PER_WIDE_INT > 32
10428 n &= 0xffffffff;
10429 #endif
10431 if (n == 0)
10433 /* Quick and easy output. */
10434 operands[immed_op] = const0_rtx;
10435 output_asm_insn (instr1, operands);
10437 else
10439 int i;
10440 const char * instr = instr1;
10442 /* Note that n is never zero here (which would give no output). */
10443 for (i = 0; i < 32; i += 2)
10445 if (n & (3 << i))
10447 operands[immed_op] = GEN_INT (n & (255 << i));
10448 output_asm_insn (instr, operands);
10449 instr = instr2;
10450 i += 6;
10455 return "";
10458 /* Return the name of a shifter operation. */
10459 static const char *
10460 arm_shift_nmem(enum rtx_code code)
10462 switch (code)
10464 case ASHIFT:
10465 return ARM_LSL_NAME;
10467 case ASHIFTRT:
10468 return "asr";
10470 case LSHIFTRT:
10471 return "lsr";
10473 case ROTATERT:
10474 return "ror";
10476 default:
10477 abort();
10481 /* Return the appropriate ARM instruction for the operation code.
10482 The returned result should not be overwritten. OP is the rtx of the
10483 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10484 was shifted. */
10485 const char *
10486 arithmetic_instr (rtx op, int shift_first_arg)
10488 switch (GET_CODE (op))
10490 case PLUS:
10491 return "add";
10493 case MINUS:
10494 return shift_first_arg ? "rsb" : "sub";
10496 case IOR:
10497 return "orr";
10499 case XOR:
10500 return "eor";
10502 case AND:
10503 return "and";
10505 case ASHIFT:
10506 case ASHIFTRT:
10507 case LSHIFTRT:
10508 case ROTATERT:
10509 return arm_shift_nmem(GET_CODE(op));
10511 default:
10512 gcc_unreachable ();
10516 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10517 for the operation code. The returned result should not be overwritten.
10518 OP is the rtx code of the shift.
10519 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10520 shift. */
10521 static const char *
10522 shift_op (rtx op, HOST_WIDE_INT *amountp)
10524 const char * mnem;
10525 enum rtx_code code = GET_CODE (op);
10527 switch (GET_CODE (XEXP (op, 1)))
10529 case REG:
10530 case SUBREG:
10531 *amountp = -1;
10532 break;
10534 case CONST_INT:
10535 *amountp = INTVAL (XEXP (op, 1));
10536 break;
10538 default:
10539 gcc_unreachable ();
10542 switch (code)
10544 case ROTATE:
10545 gcc_assert (*amountp != -1);
10546 *amountp = 32 - *amountp;
10547 code = ROTATERT;
10549 /* Fall through. */
10551 case ASHIFT:
10552 case ASHIFTRT:
10553 case LSHIFTRT:
10554 case ROTATERT:
10555 mnem = arm_shift_nmem(code);
10556 break;
10558 case MULT:
10559 /* We never have to worry about the amount being other than a
10560 power of 2, since this case can never be reloaded from a reg. */
10561 gcc_assert (*amountp != -1);
10562 *amountp = int_log2 (*amountp);
10563 return ARM_LSL_NAME;
10565 default:
10566 gcc_unreachable ();
10569 if (*amountp != -1)
10571 /* This is not 100% correct, but follows from the desire to merge
10572 multiplication by a power of 2 with the recognizer for a
10573 shift. >=32 is not a valid shift for "lsl", so we must try and
10574 output a shift that produces the correct arithmetical result.
10575 Using lsr #32 is identical except for the fact that the carry bit
10576 is not set correctly if we set the flags; but we never use the
10577 carry bit from such an operation, so we can ignore that. */
10578 if (code == ROTATERT)
10579 /* Rotate is just modulo 32. */
10580 *amountp &= 31;
10581 else if (*amountp != (*amountp & 31))
10583 if (code == ASHIFT)
10584 mnem = "lsr";
10585 *amountp = 32;
10588 /* Shifts of 0 are no-ops. */
10589 if (*amountp == 0)
10590 return NULL;
10593 return mnem;
10596 /* Obtain the shift from the POWER of two. */
10598 static HOST_WIDE_INT
10599 int_log2 (HOST_WIDE_INT power)
10601 HOST_WIDE_INT shift = 0;
10603 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
10605 gcc_assert (shift <= 31);
10606 shift++;
10609 return shift;
10612 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10613 because /bin/as is horribly restrictive. The judgement about
10614 whether or not each character is 'printable' (and can be output as
10615 is) or not (and must be printed with an octal escape) must be made
10616 with reference to the *host* character set -- the situation is
10617 similar to that discussed in the comments above pp_c_char in
10618 c-pretty-print.c. */
10620 #define MAX_ASCII_LEN 51
10622 void
10623 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
10625 int i;
10626 int len_so_far = 0;
10628 fputs ("\t.ascii\t\"", stream);
10630 for (i = 0; i < len; i++)
10632 int c = p[i];
10634 if (len_so_far >= MAX_ASCII_LEN)
10636 fputs ("\"\n\t.ascii\t\"", stream);
10637 len_so_far = 0;
10640 if (ISPRINT (c))
10642 if (c == '\\' || c == '\"')
10644 putc ('\\', stream);
10645 len_so_far++;
10647 putc (c, stream);
10648 len_so_far++;
10650 else
10652 fprintf (stream, "\\%03o", c);
10653 len_so_far += 4;
10657 fputs ("\"\n", stream);
10660 /* Compute the register save mask for registers 0 through 12
10661 inclusive. This code is used by arm_compute_save_reg_mask. */
10663 static unsigned long
10664 arm_compute_save_reg0_reg12_mask (void)
10666 unsigned long func_type = arm_current_func_type ();
10667 unsigned long save_reg_mask = 0;
10668 unsigned int reg;
10670 if (IS_INTERRUPT (func_type))
10672 unsigned int max_reg;
10673 /* Interrupt functions must not corrupt any registers,
10674 even call clobbered ones. If this is a leaf function
10675 we can just examine the registers used by the RTL, but
10676 otherwise we have to assume that whatever function is
10677 called might clobber anything, and so we have to save
10678 all the call-clobbered registers as well. */
10679 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
10680 /* FIQ handlers have registers r8 - r12 banked, so
10681 we only need to check r0 - r7, Normal ISRs only
10682 bank r14 and r15, so we must check up to r12.
10683 r13 is the stack pointer which is always preserved,
10684 so we do not need to consider it here. */
10685 max_reg = 7;
10686 else
10687 max_reg = 12;
10689 for (reg = 0; reg <= max_reg; reg++)
10690 if (df_regs_ever_live_p (reg)
10691 || (! current_function_is_leaf && call_used_regs[reg]))
10692 save_reg_mask |= (1 << reg);
10694 /* Also save the pic base register if necessary. */
10695 if (flag_pic
10696 && !TARGET_SINGLE_PIC_BASE
10697 && arm_pic_register != INVALID_REGNUM
10698 && current_function_uses_pic_offset_table)
10699 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10701 else
10703 /* In arm mode we handle r11 (FP) as a special case. */
10704 unsigned last_reg = TARGET_ARM ? 10 : 11;
10706 /* In the normal case we only need to save those registers
10707 which are call saved and which are used by this function. */
10708 for (reg = 0; reg <= last_reg; reg++)
10709 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
10710 save_reg_mask |= (1 << reg);
10712 /* Handle the frame pointer as a special case. */
10713 if (! TARGET_APCS_FRAME
10714 && ! frame_pointer_needed
10715 && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
10716 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
10717 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10718 else if (! TARGET_APCS_FRAME
10719 && ! frame_pointer_needed
10720 && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
10721 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
10722 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10724 /* If we aren't loading the PIC register,
10725 don't stack it even though it may be live. */
10726 if (flag_pic
10727 && !TARGET_SINGLE_PIC_BASE
10728 && arm_pic_register != INVALID_REGNUM
10729 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
10730 || current_function_uses_pic_offset_table))
10731 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10733 /* The prologue will copy SP into R0, so save it. */
10734 if (IS_STACKALIGN (func_type))
10735 save_reg_mask |= 1;
10738 /* Save registers so the exception handler can modify them. */
10739 if (current_function_calls_eh_return)
10741 unsigned int i;
10743 for (i = 0; ; i++)
10745 reg = EH_RETURN_DATA_REGNO (i);
10746 if (reg == INVALID_REGNUM)
10747 break;
10748 save_reg_mask |= 1 << reg;
10752 return save_reg_mask;
10756 /* Compute a bit mask of which registers need to be
10757 saved on the stack for the current function. */
10759 static unsigned long
10760 arm_compute_save_reg_mask (void)
10762 unsigned int save_reg_mask = 0;
10763 unsigned long func_type = arm_current_func_type ();
10764 unsigned int reg;
10766 if (IS_NAKED (func_type))
10767 /* This should never really happen. */
10768 return 0;
10770 /* If we are creating a stack frame, then we must save the frame pointer,
10771 IP (which will hold the old stack pointer), LR and the PC. */
10772 if (frame_pointer_needed && TARGET_ARM)
10773 save_reg_mask |=
10774 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
10775 | (1 << IP_REGNUM)
10776 | (1 << LR_REGNUM)
10777 | (1 << PC_REGNUM);
10779 /* Volatile functions do not return, so there
10780 is no need to save any other registers. */
10781 if (IS_VOLATILE (func_type))
10782 return save_reg_mask;
10784 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
10786 /* Decide if we need to save the link register.
10787 Interrupt routines have their own banked link register,
10788 so they never need to save it.
10789 Otherwise if we do not use the link register we do not need to save
10790 it. If we are pushing other registers onto the stack however, we
10791 can save an instruction in the epilogue by pushing the link register
10792 now and then popping it back into the PC. This incurs extra memory
10793 accesses though, so we only do it when optimizing for size, and only
10794 if we know that we will not need a fancy return sequence. */
10795 if (df_regs_ever_live_p (LR_REGNUM)
10796 || (save_reg_mask
10797 && optimize_size
10798 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10799 && !current_function_calls_eh_return))
10800 save_reg_mask |= 1 << LR_REGNUM;
10802 if (cfun->machine->lr_save_eliminated)
10803 save_reg_mask &= ~ (1 << LR_REGNUM);
10805 if (TARGET_REALLY_IWMMXT
10806 && ((bit_count (save_reg_mask)
10807 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
10809 /* The total number of registers that are going to be pushed
10810 onto the stack is odd. We need to ensure that the stack
10811 is 64-bit aligned before we start to save iWMMXt registers,
10812 and also before we start to create locals. (A local variable
10813 might be a double or long long which we will load/store using
10814 an iWMMXt instruction). Therefore we need to push another
10815 ARM register, so that the stack will be 64-bit aligned. We
10816 try to avoid using the arg registers (r0 -r3) as they might be
10817 used to pass values in a tail call. */
10818 for (reg = 4; reg <= 12; reg++)
10819 if ((save_reg_mask & (1 << reg)) == 0)
10820 break;
10822 if (reg <= 12)
10823 save_reg_mask |= (1 << reg);
10824 else
10826 cfun->machine->sibcall_blocked = 1;
10827 save_reg_mask |= (1 << 3);
10831 /* We may need to push an additional register for use initializing the
10832 PIC base register. */
10833 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
10834 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
10836 reg = thumb_find_work_register (1 << 4);
10837 if (!call_used_regs[reg])
10838 save_reg_mask |= (1 << reg);
10841 return save_reg_mask;
10845 /* Compute a bit mask of which registers need to be
10846 saved on the stack for the current function. */
10847 static unsigned long
10848 thumb1_compute_save_reg_mask (void)
10850 unsigned long mask;
10851 unsigned reg;
10853 mask = 0;
10854 for (reg = 0; reg < 12; reg ++)
10855 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
10856 mask |= 1 << reg;
10858 if (flag_pic
10859 && !TARGET_SINGLE_PIC_BASE
10860 && arm_pic_register != INVALID_REGNUM
10861 && current_function_uses_pic_offset_table)
10862 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10864 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
10865 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
10866 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
10868 /* LR will also be pushed if any lo regs are pushed. */
10869 if (mask & 0xff || thumb_force_lr_save ())
10870 mask |= (1 << LR_REGNUM);
10872 /* Make sure we have a low work register if we need one.
10873 We will need one if we are going to push a high register,
10874 but we are not currently intending to push a low register. */
10875 if ((mask & 0xff) == 0
10876 && ((mask & 0x0f00) || TARGET_BACKTRACE))
10878 /* Use thumb_find_work_register to choose which register
10879 we will use. If the register is live then we will
10880 have to push it. Use LAST_LO_REGNUM as our fallback
10881 choice for the register to select. */
10882 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
10883 /* Make sure the register returned by thumb_find_work_register is
10884 not part of the return value. */
10885 if (reg * UNITS_PER_WORD <= arm_size_return_regs ())
10886 reg = LAST_LO_REGNUM;
10888 if (! call_used_regs[reg])
10889 mask |= 1 << reg;
10892 return mask;
10896 /* Return the number of bytes required to save VFP registers. */
10897 static int
10898 arm_get_vfp_saved_size (void)
10900 unsigned int regno;
10901 int count;
10902 int saved;
10904 saved = 0;
10905 /* Space for saved VFP registers. */
10906 if (TARGET_HARD_FLOAT && TARGET_VFP)
10908 count = 0;
10909 for (regno = FIRST_VFP_REGNUM;
10910 regno < LAST_VFP_REGNUM;
10911 regno += 2)
10913 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
10914 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
10916 if (count > 0)
10918 /* Workaround ARM10 VFPr1 bug. */
10919 if (count == 2 && !arm_arch6)
10920 count++;
10921 saved += count * 8;
10923 count = 0;
10925 else
10926 count++;
10928 if (count > 0)
10930 if (count == 2 && !arm_arch6)
10931 count++;
10932 saved += count * 8;
10935 return saved;
10939 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
10940 everything bar the final return instruction. */
10941 const char *
10942 output_return_instruction (rtx operand, int really_return, int reverse)
10944 char conditional[10];
10945 char instr[100];
10946 unsigned reg;
10947 unsigned long live_regs_mask;
10948 unsigned long func_type;
10949 arm_stack_offsets *offsets;
10951 func_type = arm_current_func_type ();
10953 if (IS_NAKED (func_type))
10954 return "";
10956 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
10958 /* If this function was declared non-returning, and we have
10959 found a tail call, then we have to trust that the called
10960 function won't return. */
10961 if (really_return)
10963 rtx ops[2];
10965 /* Otherwise, trap an attempted return by aborting. */
10966 ops[0] = operand;
10967 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
10968 : "abort");
10969 assemble_external_libcall (ops[1]);
10970 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
10973 return "";
10976 gcc_assert (!current_function_calls_alloca || really_return);
10978 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
10980 return_used_this_function = 1;
10982 live_regs_mask = arm_compute_save_reg_mask ();
10984 if (live_regs_mask)
10986 const char * return_reg;
10988 /* If we do not have any special requirements for function exit
10989 (e.g. interworking) then we can load the return address
10990 directly into the PC. Otherwise we must load it into LR. */
10991 if (really_return
10992 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
10993 return_reg = reg_names[PC_REGNUM];
10994 else
10995 return_reg = reg_names[LR_REGNUM];
10997 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
10999 /* There are three possible reasons for the IP register
11000 being saved. 1) a stack frame was created, in which case
11001 IP contains the old stack pointer, or 2) an ISR routine
11002 corrupted it, or 3) it was saved to align the stack on
11003 iWMMXt. In case 1, restore IP into SP, otherwise just
11004 restore IP. */
11005 if (frame_pointer_needed)
11007 live_regs_mask &= ~ (1 << IP_REGNUM);
11008 live_regs_mask |= (1 << SP_REGNUM);
11010 else
11011 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11014 /* On some ARM architectures it is faster to use LDR rather than
11015 LDM to load a single register. On other architectures, the
11016 cost is the same. In 26 bit mode, or for exception handlers,
11017 we have to use LDM to load the PC so that the CPSR is also
11018 restored. */
11019 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11020 if (live_regs_mask == (1U << reg))
11021 break;
11023 if (reg <= LAST_ARM_REGNUM
11024 && (reg != LR_REGNUM
11025 || ! really_return
11026 || ! IS_INTERRUPT (func_type)))
11028 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11029 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11031 else
11033 char *p;
11034 int first = 1;
11036 /* Generate the load multiple instruction to restore the
11037 registers. Note we can get here, even if
11038 frame_pointer_needed is true, but only if sp already
11039 points to the base of the saved core registers. */
11040 if (live_regs_mask & (1 << SP_REGNUM))
11042 unsigned HOST_WIDE_INT stack_adjust;
11044 offsets = arm_get_frame_offsets ();
11045 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11046 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11048 if (stack_adjust && arm_arch5 && TARGET_ARM)
11049 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11050 else
11052 /* If we can't use ldmib (SA110 bug),
11053 then try to pop r3 instead. */
11054 if (stack_adjust)
11055 live_regs_mask |= 1 << 3;
11056 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11059 else
11060 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11062 p = instr + strlen (instr);
11064 for (reg = 0; reg <= SP_REGNUM; reg++)
11065 if (live_regs_mask & (1 << reg))
11067 int l = strlen (reg_names[reg]);
11069 if (first)
11070 first = 0;
11071 else
11073 memcpy (p, ", ", 2);
11074 p += 2;
11077 memcpy (p, "%|", 2);
11078 memcpy (p + 2, reg_names[reg], l);
11079 p += l + 2;
11082 if (live_regs_mask & (1 << LR_REGNUM))
11084 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11085 /* If returning from an interrupt, restore the CPSR. */
11086 if (IS_INTERRUPT (func_type))
11087 strcat (p, "^");
11089 else
11090 strcpy (p, "}");
11093 output_asm_insn (instr, & operand);
11095 /* See if we need to generate an extra instruction to
11096 perform the actual function return. */
11097 if (really_return
11098 && func_type != ARM_FT_INTERWORKED
11099 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11101 /* The return has already been handled
11102 by loading the LR into the PC. */
11103 really_return = 0;
11107 if (really_return)
11109 switch ((int) ARM_FUNC_TYPE (func_type))
11111 case ARM_FT_ISR:
11112 case ARM_FT_FIQ:
11113 /* ??? This is wrong for unified assembly syntax. */
11114 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11115 break;
11117 case ARM_FT_INTERWORKED:
11118 sprintf (instr, "bx%s\t%%|lr", conditional);
11119 break;
11121 case ARM_FT_EXCEPTION:
11122 /* ??? This is wrong for unified assembly syntax. */
11123 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11124 break;
11126 default:
11127 /* Use bx if it's available. */
11128 if (arm_arch5 || arm_arch4t)
11129 sprintf (instr, "bx%s\t%%|lr", conditional);
11130 else
11131 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11132 break;
11135 output_asm_insn (instr, & operand);
11138 return "";
11141 /* Write the function name into the code section, directly preceding
11142 the function prologue.
11144 Code will be output similar to this:
11146 .ascii "arm_poke_function_name", 0
11147 .align
11149 .word 0xff000000 + (t1 - t0)
11150 arm_poke_function_name
11151 mov ip, sp
11152 stmfd sp!, {fp, ip, lr, pc}
11153 sub fp, ip, #4
11155 When performing a stack backtrace, code can inspect the value
11156 of 'pc' stored at 'fp' + 0. If the trace function then looks
11157 at location pc - 12 and the top 8 bits are set, then we know
11158 that there is a function name embedded immediately preceding this
11159 location and has length ((pc[-3]) & 0xff000000).
11161 We assume that pc is declared as a pointer to an unsigned long.
11163 It is of no benefit to output the function name if we are assembling
11164 a leaf function. These function types will not contain a stack
11165 backtrace structure, therefore it is not possible to determine the
11166 function name. */
11167 void
11168 arm_poke_function_name (FILE *stream, const char *name)
11170 unsigned long alignlength;
11171 unsigned long length;
11172 rtx x;
11174 length = strlen (name) + 1;
11175 alignlength = ROUND_UP_WORD (length);
11177 ASM_OUTPUT_ASCII (stream, name, length);
11178 ASM_OUTPUT_ALIGN (stream, 2);
11179 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11180 assemble_aligned_integer (UNITS_PER_WORD, x);
11183 /* Place some comments into the assembler stream
11184 describing the current function. */
11185 static void
11186 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11188 unsigned long func_type;
11190 if (TARGET_THUMB1)
11192 thumb1_output_function_prologue (f, frame_size);
11193 return;
11196 /* Sanity check. */
11197 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11199 func_type = arm_current_func_type ();
11201 switch ((int) ARM_FUNC_TYPE (func_type))
11203 default:
11204 case ARM_FT_NORMAL:
11205 break;
11206 case ARM_FT_INTERWORKED:
11207 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11208 break;
11209 case ARM_FT_ISR:
11210 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11211 break;
11212 case ARM_FT_FIQ:
11213 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11214 break;
11215 case ARM_FT_EXCEPTION:
11216 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11217 break;
11220 if (IS_NAKED (func_type))
11221 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11223 if (IS_VOLATILE (func_type))
11224 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11226 if (IS_NESTED (func_type))
11227 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11228 if (IS_STACKALIGN (func_type))
11229 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11231 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11232 current_function_args_size,
11233 current_function_pretend_args_size, frame_size);
11235 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11236 frame_pointer_needed,
11237 cfun->machine->uses_anonymous_args);
11239 if (cfun->machine->lr_save_eliminated)
11240 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11242 if (current_function_calls_eh_return)
11243 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11245 return_used_this_function = 0;
11248 const char *
11249 arm_output_epilogue (rtx sibling)
11251 int reg;
11252 unsigned long saved_regs_mask;
11253 unsigned long func_type;
11254 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11255 frame that is $fp + 4 for a non-variadic function. */
11256 int floats_offset = 0;
11257 rtx operands[3];
11258 FILE * f = asm_out_file;
11259 unsigned int lrm_count = 0;
11260 int really_return = (sibling == NULL);
11261 int start_reg;
11262 arm_stack_offsets *offsets;
11264 /* If we have already generated the return instruction
11265 then it is futile to generate anything else. */
11266 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11267 return "";
11269 func_type = arm_current_func_type ();
11271 if (IS_NAKED (func_type))
11272 /* Naked functions don't have epilogues. */
11273 return "";
11275 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11277 rtx op;
11279 /* A volatile function should never return. Call abort. */
11280 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11281 assemble_external_libcall (op);
11282 output_asm_insn ("bl\t%a0", &op);
11284 return "";
11287 /* If we are throwing an exception, then we really must be doing a
11288 return, so we can't tail-call. */
11289 gcc_assert (!current_function_calls_eh_return || really_return);
11291 offsets = arm_get_frame_offsets ();
11292 saved_regs_mask = arm_compute_save_reg_mask ();
11294 if (TARGET_IWMMXT)
11295 lrm_count = bit_count (saved_regs_mask);
11297 floats_offset = offsets->saved_args;
11298 /* Compute how far away the floats will be. */
11299 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11300 if (saved_regs_mask & (1 << reg))
11301 floats_offset += 4;
11303 if (frame_pointer_needed && TARGET_ARM)
11305 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11306 int vfp_offset = offsets->frame;
11308 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11310 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11311 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11313 floats_offset += 12;
11314 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11315 reg, FP_REGNUM, floats_offset - vfp_offset);
11318 else
11320 start_reg = LAST_FPA_REGNUM;
11322 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11324 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11326 floats_offset += 12;
11328 /* We can't unstack more than four registers at once. */
11329 if (start_reg - reg == 3)
11331 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11332 reg, FP_REGNUM, floats_offset - vfp_offset);
11333 start_reg = reg - 1;
11336 else
11338 if (reg != start_reg)
11339 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11340 reg + 1, start_reg - reg,
11341 FP_REGNUM, floats_offset - vfp_offset);
11342 start_reg = reg - 1;
11346 /* Just in case the last register checked also needs unstacking. */
11347 if (reg != start_reg)
11348 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11349 reg + 1, start_reg - reg,
11350 FP_REGNUM, floats_offset - vfp_offset);
11353 if (TARGET_HARD_FLOAT && TARGET_VFP)
11355 int saved_size;
11357 /* The fldmd insns do not have base+offset addressing
11358 modes, so we use IP to hold the address. */
11359 saved_size = arm_get_vfp_saved_size ();
11361 if (saved_size > 0)
11363 floats_offset += saved_size;
11364 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
11365 FP_REGNUM, floats_offset - vfp_offset);
11367 start_reg = FIRST_VFP_REGNUM;
11368 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11370 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11371 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11373 if (start_reg != reg)
11374 vfp_output_fldmd (f, IP_REGNUM,
11375 (start_reg - FIRST_VFP_REGNUM) / 2,
11376 (reg - start_reg) / 2);
11377 start_reg = reg + 2;
11380 if (start_reg != reg)
11381 vfp_output_fldmd (f, IP_REGNUM,
11382 (start_reg - FIRST_VFP_REGNUM) / 2,
11383 (reg - start_reg) / 2);
11386 if (TARGET_IWMMXT)
11388 /* The frame pointer is guaranteed to be non-double-word aligned.
11389 This is because it is set to (old_stack_pointer - 4) and the
11390 old_stack_pointer was double word aligned. Thus the offset to
11391 the iWMMXt registers to be loaded must also be non-double-word
11392 sized, so that the resultant address *is* double-word aligned.
11393 We can ignore floats_offset since that was already included in
11394 the live_regs_mask. */
11395 lrm_count += (lrm_count % 2 ? 2 : 1);
11397 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
11398 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11400 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
11401 reg, FP_REGNUM, lrm_count * 4);
11402 lrm_count += 2;
11406 /* saved_regs_mask should contain the IP, which at the time of stack
11407 frame generation actually contains the old stack pointer. So a
11408 quick way to unwind the stack is just pop the IP register directly
11409 into the stack pointer. */
11410 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
11411 saved_regs_mask &= ~ (1 << IP_REGNUM);
11412 saved_regs_mask |= (1 << SP_REGNUM);
11414 /* There are two registers left in saved_regs_mask - LR and PC. We
11415 only need to restore the LR register (the return address), but to
11416 save time we can load it directly into the PC, unless we need a
11417 special function exit sequence, or we are not really returning. */
11418 if (really_return
11419 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11420 && !current_function_calls_eh_return)
11421 /* Delete the LR from the register mask, so that the LR on
11422 the stack is loaded into the PC in the register mask. */
11423 saved_regs_mask &= ~ (1 << LR_REGNUM);
11424 else
11425 saved_regs_mask &= ~ (1 << PC_REGNUM);
11427 /* We must use SP as the base register, because SP is one of the
11428 registers being restored. If an interrupt or page fault
11429 happens in the ldm instruction, the SP might or might not
11430 have been restored. That would be bad, as then SP will no
11431 longer indicate the safe area of stack, and we can get stack
11432 corruption. Using SP as the base register means that it will
11433 be reset correctly to the original value, should an interrupt
11434 occur. If the stack pointer already points at the right
11435 place, then omit the subtraction. */
11436 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
11437 || current_function_calls_alloca)
11438 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
11439 4 * bit_count (saved_regs_mask));
11440 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
11442 if (IS_INTERRUPT (func_type))
11443 /* Interrupt handlers will have pushed the
11444 IP onto the stack, so restore it now. */
11445 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
11447 else
11449 HOST_WIDE_INT amount;
11450 int rfe;
11451 /* Restore stack pointer if necessary. */
11452 if (frame_pointer_needed)
11454 /* For Thumb-2 restore sp from the frame pointer.
11455 Operand restrictions mean we have to increment FP, then copy
11456 to SP. */
11457 amount = offsets->locals_base - offsets->saved_regs;
11458 operands[0] = hard_frame_pointer_rtx;
11460 else
11462 operands[0] = stack_pointer_rtx;
11463 amount = offsets->outgoing_args - offsets->saved_regs;
11466 if (amount)
11468 operands[1] = operands[0];
11469 operands[2] = GEN_INT (amount);
11470 output_add_immediate (operands);
11472 if (frame_pointer_needed)
11473 asm_fprintf (f, "\tmov\t%r, %r\n",
11474 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
11476 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11478 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11479 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11480 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
11481 reg, SP_REGNUM);
11483 else
11485 start_reg = FIRST_FPA_REGNUM;
11487 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11489 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11491 if (reg - start_reg == 3)
11493 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
11494 start_reg, SP_REGNUM);
11495 start_reg = reg + 1;
11498 else
11500 if (reg != start_reg)
11501 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11502 start_reg, reg - start_reg,
11503 SP_REGNUM);
11505 start_reg = reg + 1;
11509 /* Just in case the last register checked also needs unstacking. */
11510 if (reg != start_reg)
11511 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11512 start_reg, reg - start_reg, SP_REGNUM);
11515 if (TARGET_HARD_FLOAT && TARGET_VFP)
11517 start_reg = FIRST_VFP_REGNUM;
11518 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11520 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11521 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11523 if (start_reg != reg)
11524 vfp_output_fldmd (f, SP_REGNUM,
11525 (start_reg - FIRST_VFP_REGNUM) / 2,
11526 (reg - start_reg) / 2);
11527 start_reg = reg + 2;
11530 if (start_reg != reg)
11531 vfp_output_fldmd (f, SP_REGNUM,
11532 (start_reg - FIRST_VFP_REGNUM) / 2,
11533 (reg - start_reg) / 2);
11535 if (TARGET_IWMMXT)
11536 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
11537 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11538 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
11540 /* If we can, restore the LR into the PC. */
11541 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
11542 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
11543 && !IS_STACKALIGN (func_type)
11544 && really_return
11545 && current_function_pretend_args_size == 0
11546 && saved_regs_mask & (1 << LR_REGNUM)
11547 && !current_function_calls_eh_return)
11549 saved_regs_mask &= ~ (1 << LR_REGNUM);
11550 saved_regs_mask |= (1 << PC_REGNUM);
11551 rfe = IS_INTERRUPT (func_type);
11553 else
11554 rfe = 0;
11556 /* Load the registers off the stack. If we only have one register
11557 to load use the LDR instruction - it is faster. For Thumb-2
11558 always use pop and the assembler will pick the best instruction.*/
11559 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
11560 && !IS_INTERRUPT(func_type))
11562 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
11564 else if (saved_regs_mask)
11566 if (saved_regs_mask & (1 << SP_REGNUM))
11567 /* Note - write back to the stack register is not enabled
11568 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11569 in the list of registers and if we add writeback the
11570 instruction becomes UNPREDICTABLE. */
11571 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
11572 rfe);
11573 else if (TARGET_ARM)
11574 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
11575 rfe);
11576 else
11577 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
11580 if (current_function_pretend_args_size)
11582 /* Unwind the pre-pushed regs. */
11583 operands[0] = operands[1] = stack_pointer_rtx;
11584 operands[2] = GEN_INT (current_function_pretend_args_size);
11585 output_add_immediate (operands);
11589 /* We may have already restored PC directly from the stack. */
11590 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
11591 return "";
11593 /* Stack adjustment for exception handler. */
11594 if (current_function_calls_eh_return)
11595 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
11596 ARM_EH_STACKADJ_REGNUM);
11598 /* Generate the return instruction. */
11599 switch ((int) ARM_FUNC_TYPE (func_type))
11601 case ARM_FT_ISR:
11602 case ARM_FT_FIQ:
11603 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
11604 break;
11606 case ARM_FT_EXCEPTION:
11607 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11608 break;
11610 case ARM_FT_INTERWORKED:
11611 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11612 break;
11614 default:
11615 if (IS_STACKALIGN (func_type))
11617 /* See comment in arm_expand_prologue. */
11618 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
11620 if (arm_arch5 || arm_arch4t)
11621 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11622 else
11623 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11624 break;
11627 return "";
11630 static void
11631 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11632 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
11634 arm_stack_offsets *offsets;
11636 if (TARGET_THUMB1)
11638 int regno;
11640 /* Emit any call-via-reg trampolines that are needed for v4t support
11641 of call_reg and call_value_reg type insns. */
11642 for (regno = 0; regno < LR_REGNUM; regno++)
11644 rtx label = cfun->machine->call_via[regno];
11646 if (label != NULL)
11648 switch_to_section (function_section (current_function_decl));
11649 targetm.asm_out.internal_label (asm_out_file, "L",
11650 CODE_LABEL_NUMBER (label));
11651 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
11655 /* ??? Probably not safe to set this here, since it assumes that a
11656 function will be emitted as assembly immediately after we generate
11657 RTL for it. This does not happen for inline functions. */
11658 return_used_this_function = 0;
11660 else /* TARGET_32BIT */
11662 /* We need to take into account any stack-frame rounding. */
11663 offsets = arm_get_frame_offsets ();
11665 gcc_assert (!use_return_insn (FALSE, NULL)
11666 || !return_used_this_function
11667 || offsets->saved_regs == offsets->outgoing_args
11668 || frame_pointer_needed);
11670 /* Reset the ARM-specific per-function variables. */
11671 after_arm_reorg = 0;
11675 /* Generate and emit an insn that we will recognize as a push_multi.
11676 Unfortunately, since this insn does not reflect very well the actual
11677 semantics of the operation, we need to annotate the insn for the benefit
11678 of DWARF2 frame unwind information. */
11679 static rtx
11680 emit_multi_reg_push (unsigned long mask)
11682 int num_regs = 0;
11683 int num_dwarf_regs;
11684 int i, j;
11685 rtx par;
11686 rtx dwarf;
11687 int dwarf_par_index;
11688 rtx tmp, reg;
11690 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11691 if (mask & (1 << i))
11692 num_regs++;
11694 gcc_assert (num_regs && num_regs <= 16);
11696 /* We don't record the PC in the dwarf frame information. */
11697 num_dwarf_regs = num_regs;
11698 if (mask & (1 << PC_REGNUM))
11699 num_dwarf_regs--;
11701 /* For the body of the insn we are going to generate an UNSPEC in
11702 parallel with several USEs. This allows the insn to be recognized
11703 by the push_multi pattern in the arm.md file. The insn looks
11704 something like this:
11706 (parallel [
11707 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11708 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11709 (use (reg:SI 11 fp))
11710 (use (reg:SI 12 ip))
11711 (use (reg:SI 14 lr))
11712 (use (reg:SI 15 pc))
11715 For the frame note however, we try to be more explicit and actually
11716 show each register being stored into the stack frame, plus a (single)
11717 decrement of the stack pointer. We do it this way in order to be
11718 friendly to the stack unwinding code, which only wants to see a single
11719 stack decrement per instruction. The RTL we generate for the note looks
11720 something like this:
11722 (sequence [
11723 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11724 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11725 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11726 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11727 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11730 This sequence is used both by the code to support stack unwinding for
11731 exceptions handlers and the code to generate dwarf2 frame debugging. */
11733 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
11734 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
11735 dwarf_par_index = 1;
11737 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11739 if (mask & (1 << i))
11741 reg = gen_rtx_REG (SImode, i);
11743 XVECEXP (par, 0, 0)
11744 = gen_rtx_SET (VOIDmode,
11745 gen_frame_mem (BLKmode,
11746 gen_rtx_PRE_DEC (BLKmode,
11747 stack_pointer_rtx)),
11748 gen_rtx_UNSPEC (BLKmode,
11749 gen_rtvec (1, reg),
11750 UNSPEC_PUSH_MULT));
11752 if (i != PC_REGNUM)
11754 tmp = gen_rtx_SET (VOIDmode,
11755 gen_frame_mem (SImode, stack_pointer_rtx),
11756 reg);
11757 RTX_FRAME_RELATED_P (tmp) = 1;
11758 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
11759 dwarf_par_index++;
11762 break;
11766 for (j = 1, i++; j < num_regs; i++)
11768 if (mask & (1 << i))
11770 reg = gen_rtx_REG (SImode, i);
11772 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
11774 if (i != PC_REGNUM)
11777 = gen_rtx_SET (VOIDmode,
11778 gen_frame_mem (SImode,
11779 plus_constant (stack_pointer_rtx,
11780 4 * j)),
11781 reg);
11782 RTX_FRAME_RELATED_P (tmp) = 1;
11783 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
11786 j++;
11790 par = emit_insn (par);
11792 tmp = gen_rtx_SET (VOIDmode,
11793 stack_pointer_rtx,
11794 plus_constant (stack_pointer_rtx, -4 * num_regs));
11795 RTX_FRAME_RELATED_P (tmp) = 1;
11796 XVECEXP (dwarf, 0, 0) = tmp;
11798 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11799 REG_NOTES (par));
11800 return par;
11803 /* Calculate the size of the return value that is passed in registers. */
11804 static unsigned
11805 arm_size_return_regs (void)
11807 enum machine_mode mode;
11809 if (current_function_return_rtx != 0)
11810 mode = GET_MODE (current_function_return_rtx);
11811 else
11812 mode = DECL_MODE (DECL_RESULT (current_function_decl));
11814 return GET_MODE_SIZE (mode);
11817 static rtx
11818 emit_sfm (int base_reg, int count)
11820 rtx par;
11821 rtx dwarf;
11822 rtx tmp, reg;
11823 int i;
11825 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11826 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11828 reg = gen_rtx_REG (XFmode, base_reg++);
11830 XVECEXP (par, 0, 0)
11831 = gen_rtx_SET (VOIDmode,
11832 gen_frame_mem (BLKmode,
11833 gen_rtx_PRE_DEC (BLKmode,
11834 stack_pointer_rtx)),
11835 gen_rtx_UNSPEC (BLKmode,
11836 gen_rtvec (1, reg),
11837 UNSPEC_PUSH_MULT));
11838 tmp = gen_rtx_SET (VOIDmode,
11839 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
11840 RTX_FRAME_RELATED_P (tmp) = 1;
11841 XVECEXP (dwarf, 0, 1) = tmp;
11843 for (i = 1; i < count; i++)
11845 reg = gen_rtx_REG (XFmode, base_reg++);
11846 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11848 tmp = gen_rtx_SET (VOIDmode,
11849 gen_frame_mem (XFmode,
11850 plus_constant (stack_pointer_rtx,
11851 i * 12)),
11852 reg);
11853 RTX_FRAME_RELATED_P (tmp) = 1;
11854 XVECEXP (dwarf, 0, i + 1) = tmp;
11857 tmp = gen_rtx_SET (VOIDmode,
11858 stack_pointer_rtx,
11859 plus_constant (stack_pointer_rtx, -12 * count));
11861 RTX_FRAME_RELATED_P (tmp) = 1;
11862 XVECEXP (dwarf, 0, 0) = tmp;
11864 par = emit_insn (par);
11865 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11866 REG_NOTES (par));
11867 return par;
11871 /* Return true if the current function needs to save/restore LR. */
11873 static bool
11874 thumb_force_lr_save (void)
11876 return !cfun->machine->lr_save_eliminated
11877 && (!leaf_function_p ()
11878 || thumb_far_jump_used_p ()
11879 || df_regs_ever_live_p (LR_REGNUM));
11883 /* Compute the distance from register FROM to register TO.
11884 These can be the arg pointer (26), the soft frame pointer (25),
11885 the stack pointer (13) or the hard frame pointer (11).
11886 In thumb mode r7 is used as the soft frame pointer, if needed.
11887 Typical stack layout looks like this:
11889 old stack pointer -> | |
11890 ----
11891 | | \
11892 | | saved arguments for
11893 | | vararg functions
11894 | | /
11896 hard FP & arg pointer -> | | \
11897 | | stack
11898 | | frame
11899 | | /
11901 | | \
11902 | | call saved
11903 | | registers
11904 soft frame pointer -> | | /
11906 | | \
11907 | | local
11908 | | variables
11909 locals base pointer -> | | /
11911 | | \
11912 | | outgoing
11913 | | arguments
11914 current stack pointer -> | | /
11917 For a given function some or all of these stack components
11918 may not be needed, giving rise to the possibility of
11919 eliminating some of the registers.
11921 The values returned by this function must reflect the behavior
11922 of arm_expand_prologue() and arm_compute_save_reg_mask().
11924 The sign of the number returned reflects the direction of stack
11925 growth, so the values are positive for all eliminations except
11926 from the soft frame pointer to the hard frame pointer.
11928 SFP may point just inside the local variables block to ensure correct
11929 alignment. */
11932 /* Calculate stack offsets. These are used to calculate register elimination
11933 offsets and in prologue/epilogue code. */
11935 static arm_stack_offsets *
11936 arm_get_frame_offsets (void)
11938 struct arm_stack_offsets *offsets;
11939 unsigned long func_type;
11940 int leaf;
11941 int saved;
11942 HOST_WIDE_INT frame_size;
11944 offsets = &cfun->machine->stack_offsets;
11946 /* We need to know if we are a leaf function. Unfortunately, it
11947 is possible to be called after start_sequence has been called,
11948 which causes get_insns to return the insns for the sequence,
11949 not the function, which will cause leaf_function_p to return
11950 the incorrect result.
11952 to know about leaf functions once reload has completed, and the
11953 frame size cannot be changed after that time, so we can safely
11954 use the cached value. */
11956 if (reload_completed)
11957 return offsets;
11959 /* Initially this is the size of the local variables. It will translated
11960 into an offset once we have determined the size of preceding data. */
11961 frame_size = ROUND_UP_WORD (get_frame_size ());
11963 leaf = leaf_function_p ();
11965 /* Space for variadic functions. */
11966 offsets->saved_args = current_function_pretend_args_size;
11968 /* In Thumb mode this is incorrect, but never used. */
11969 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
11971 if (TARGET_32BIT)
11973 unsigned int regno;
11975 saved = bit_count (arm_compute_save_reg_mask ()) * 4;
11977 /* We know that SP will be doubleword aligned on entry, and we must
11978 preserve that condition at any subroutine call. We also require the
11979 soft frame pointer to be doubleword aligned. */
11981 if (TARGET_REALLY_IWMMXT)
11983 /* Check for the call-saved iWMMXt registers. */
11984 for (regno = FIRST_IWMMXT_REGNUM;
11985 regno <= LAST_IWMMXT_REGNUM;
11986 regno++)
11987 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
11988 saved += 8;
11991 func_type = arm_current_func_type ();
11992 if (! IS_VOLATILE (func_type))
11994 /* Space for saved FPA registers. */
11995 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
11996 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
11997 saved += 12;
11999 /* Space for saved VFP registers. */
12000 if (TARGET_HARD_FLOAT && TARGET_VFP)
12001 saved += arm_get_vfp_saved_size ();
12004 else /* TARGET_THUMB1 */
12006 saved = bit_count (thumb1_compute_save_reg_mask ()) * 4;
12007 if (TARGET_BACKTRACE)
12008 saved += 16;
12011 /* Saved registers include the stack frame. */
12012 offsets->saved_regs = offsets->saved_args + saved;
12013 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12014 /* A leaf function does not need any stack alignment if it has nothing
12015 on the stack. */
12016 if (leaf && frame_size == 0)
12018 offsets->outgoing_args = offsets->soft_frame;
12019 offsets->locals_base = offsets->soft_frame;
12020 return offsets;
12023 /* Ensure SFP has the correct alignment. */
12024 if (ARM_DOUBLEWORD_ALIGN
12025 && (offsets->soft_frame & 7))
12026 offsets->soft_frame += 4;
12028 offsets->locals_base = offsets->soft_frame + frame_size;
12029 offsets->outgoing_args = (offsets->locals_base
12030 + current_function_outgoing_args_size);
12032 if (ARM_DOUBLEWORD_ALIGN)
12034 /* Ensure SP remains doubleword aligned. */
12035 if (offsets->outgoing_args & 7)
12036 offsets->outgoing_args += 4;
12037 gcc_assert (!(offsets->outgoing_args & 7));
12040 return offsets;
12044 /* Calculate the relative offsets for the different stack pointers. Positive
12045 offsets are in the direction of stack growth. */
12047 HOST_WIDE_INT
12048 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12050 arm_stack_offsets *offsets;
12052 offsets = arm_get_frame_offsets ();
12054 /* OK, now we have enough information to compute the distances.
12055 There must be an entry in these switch tables for each pair
12056 of registers in ELIMINABLE_REGS, even if some of the entries
12057 seem to be redundant or useless. */
12058 switch (from)
12060 case ARG_POINTER_REGNUM:
12061 switch (to)
12063 case THUMB_HARD_FRAME_POINTER_REGNUM:
12064 return 0;
12066 case FRAME_POINTER_REGNUM:
12067 /* This is the reverse of the soft frame pointer
12068 to hard frame pointer elimination below. */
12069 return offsets->soft_frame - offsets->saved_args;
12071 case ARM_HARD_FRAME_POINTER_REGNUM:
12072 /* If there is no stack frame then the hard
12073 frame pointer and the arg pointer coincide. */
12074 if (offsets->frame == offsets->saved_regs)
12075 return 0;
12076 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
12077 return (frame_pointer_needed
12078 && cfun->static_chain_decl != NULL
12079 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
12081 case STACK_POINTER_REGNUM:
12082 /* If nothing has been pushed on the stack at all
12083 then this will return -4. This *is* correct! */
12084 return offsets->outgoing_args - (offsets->saved_args + 4);
12086 default:
12087 gcc_unreachable ();
12089 gcc_unreachable ();
12091 case FRAME_POINTER_REGNUM:
12092 switch (to)
12094 case THUMB_HARD_FRAME_POINTER_REGNUM:
12095 return 0;
12097 case ARM_HARD_FRAME_POINTER_REGNUM:
12098 /* The hard frame pointer points to the top entry in the
12099 stack frame. The soft frame pointer to the bottom entry
12100 in the stack frame. If there is no stack frame at all,
12101 then they are identical. */
12103 return offsets->frame - offsets->soft_frame;
12105 case STACK_POINTER_REGNUM:
12106 return offsets->outgoing_args - offsets->soft_frame;
12108 default:
12109 gcc_unreachable ();
12111 gcc_unreachable ();
12113 default:
12114 /* You cannot eliminate from the stack pointer.
12115 In theory you could eliminate from the hard frame
12116 pointer to the stack pointer, but this will never
12117 happen, since if a stack frame is not needed the
12118 hard frame pointer will never be used. */
12119 gcc_unreachable ();
12124 /* Emit RTL to save coprocessor registers on function entry. Returns the
12125 number of bytes pushed. */
12127 static int
12128 arm_save_coproc_regs(void)
12130 int saved_size = 0;
12131 unsigned reg;
12132 unsigned start_reg;
12133 rtx insn;
12135 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12136 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12138 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12139 insn = gen_rtx_MEM (V2SImode, insn);
12140 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12141 RTX_FRAME_RELATED_P (insn) = 1;
12142 saved_size += 8;
12145 /* Save any floating point call-saved registers used by this
12146 function. */
12147 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12149 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12150 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12152 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12153 insn = gen_rtx_MEM (XFmode, insn);
12154 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12155 RTX_FRAME_RELATED_P (insn) = 1;
12156 saved_size += 12;
12159 else
12161 start_reg = LAST_FPA_REGNUM;
12163 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12165 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12167 if (start_reg - reg == 3)
12169 insn = emit_sfm (reg, 4);
12170 RTX_FRAME_RELATED_P (insn) = 1;
12171 saved_size += 48;
12172 start_reg = reg - 1;
12175 else
12177 if (start_reg != reg)
12179 insn = emit_sfm (reg + 1, start_reg - reg);
12180 RTX_FRAME_RELATED_P (insn) = 1;
12181 saved_size += (start_reg - reg) * 12;
12183 start_reg = reg - 1;
12187 if (start_reg != reg)
12189 insn = emit_sfm (reg + 1, start_reg - reg);
12190 saved_size += (start_reg - reg) * 12;
12191 RTX_FRAME_RELATED_P (insn) = 1;
12194 if (TARGET_HARD_FLOAT && TARGET_VFP)
12196 start_reg = FIRST_VFP_REGNUM;
12198 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12200 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12201 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12203 if (start_reg != reg)
12204 saved_size += vfp_emit_fstmd (start_reg,
12205 (reg - start_reg) / 2);
12206 start_reg = reg + 2;
12209 if (start_reg != reg)
12210 saved_size += vfp_emit_fstmd (start_reg,
12211 (reg - start_reg) / 2);
12213 return saved_size;
12217 /* Set the Thumb frame pointer from the stack pointer. */
12219 static void
12220 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12222 HOST_WIDE_INT amount;
12223 rtx insn, dwarf;
12225 amount = offsets->outgoing_args - offsets->locals_base;
12226 if (amount < 1024)
12227 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12228 stack_pointer_rtx, GEN_INT (amount)));
12229 else
12231 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12232 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12233 hard_frame_pointer_rtx,
12234 stack_pointer_rtx));
12235 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12236 plus_constant (stack_pointer_rtx, amount));
12237 RTX_FRAME_RELATED_P (dwarf) = 1;
12238 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12239 REG_NOTES (insn));
12242 RTX_FRAME_RELATED_P (insn) = 1;
12245 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12246 function. */
12247 void
12248 arm_expand_prologue (void)
12250 rtx amount;
12251 rtx insn;
12252 rtx ip_rtx;
12253 unsigned long live_regs_mask;
12254 unsigned long func_type;
12255 int fp_offset = 0;
12256 int saved_pretend_args = 0;
12257 int saved_regs = 0;
12258 unsigned HOST_WIDE_INT args_to_push;
12259 arm_stack_offsets *offsets;
12261 func_type = arm_current_func_type ();
12263 /* Naked functions don't have prologues. */
12264 if (IS_NAKED (func_type))
12265 return;
12267 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12268 args_to_push = current_function_pretend_args_size;
12270 /* Compute which register we will have to save onto the stack. */
12271 live_regs_mask = arm_compute_save_reg_mask ();
12273 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
12275 if (IS_STACKALIGN (func_type))
12277 rtx dwarf;
12278 rtx r0;
12279 rtx r1;
12280 /* Handle a word-aligned stack pointer. We generate the following:
12282 mov r0, sp
12283 bic r1, r0, #7
12284 mov sp, r1
12285 <save and restore r0 in normal prologue/epilogue>
12286 mov sp, r0
12287 bx lr
12289 The unwinder doesn't need to know about the stack realignment.
12290 Just tell it we saved SP in r0. */
12291 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
12293 r0 = gen_rtx_REG (SImode, 0);
12294 r1 = gen_rtx_REG (SImode, 1);
12295 dwarf = gen_rtx_UNSPEC (SImode, NULL_RTVEC, UNSPEC_STACK_ALIGN);
12296 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
12297 insn = gen_movsi (r0, stack_pointer_rtx);
12298 RTX_FRAME_RELATED_P (insn) = 1;
12299 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12300 dwarf, REG_NOTES (insn));
12301 emit_insn (insn);
12302 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
12303 emit_insn (gen_movsi (stack_pointer_rtx, r1));
12306 if (frame_pointer_needed && TARGET_ARM)
12308 if (IS_INTERRUPT (func_type))
12310 /* Interrupt functions must not corrupt any registers.
12311 Creating a frame pointer however, corrupts the IP
12312 register, so we must push it first. */
12313 insn = emit_multi_reg_push (1 << IP_REGNUM);
12315 /* Do not set RTX_FRAME_RELATED_P on this insn.
12316 The dwarf stack unwinding code only wants to see one
12317 stack decrement per function, and this is not it. If
12318 this instruction is labeled as being part of the frame
12319 creation sequence then dwarf2out_frame_debug_expr will
12320 die when it encounters the assignment of IP to FP
12321 later on, since the use of SP here establishes SP as
12322 the CFA register and not IP.
12324 Anyway this instruction is not really part of the stack
12325 frame creation although it is part of the prologue. */
12327 else if (IS_NESTED (func_type))
12329 /* The Static chain register is the same as the IP register
12330 used as a scratch register during stack frame creation.
12331 To get around this need to find somewhere to store IP
12332 whilst the frame is being created. We try the following
12333 places in order:
12335 1. The last argument register.
12336 2. A slot on the stack above the frame. (This only
12337 works if the function is not a varargs function).
12338 3. Register r3, after pushing the argument registers
12339 onto the stack.
12341 Note - we only need to tell the dwarf2 backend about the SP
12342 adjustment in the second variant; the static chain register
12343 doesn't need to be unwound, as it doesn't contain a value
12344 inherited from the caller. */
12346 if (df_regs_ever_live_p (3) == false)
12347 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12348 else if (args_to_push == 0)
12350 rtx dwarf;
12352 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
12353 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
12354 fp_offset = 4;
12356 /* Just tell the dwarf backend that we adjusted SP. */
12357 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12358 plus_constant (stack_pointer_rtx,
12359 -fp_offset));
12360 RTX_FRAME_RELATED_P (insn) = 1;
12361 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12362 dwarf, REG_NOTES (insn));
12364 else
12366 /* Store the args on the stack. */
12367 if (cfun->machine->uses_anonymous_args)
12368 insn = emit_multi_reg_push
12369 ((0xf0 >> (args_to_push / 4)) & 0xf);
12370 else
12371 insn = emit_insn
12372 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12373 GEN_INT (- args_to_push)));
12375 RTX_FRAME_RELATED_P (insn) = 1;
12377 saved_pretend_args = 1;
12378 fp_offset = args_to_push;
12379 args_to_push = 0;
12381 /* Now reuse r3 to preserve IP. */
12382 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12386 insn = emit_set_insn (ip_rtx,
12387 plus_constant (stack_pointer_rtx, fp_offset));
12388 RTX_FRAME_RELATED_P (insn) = 1;
12391 if (args_to_push)
12393 /* Push the argument registers, or reserve space for them. */
12394 if (cfun->machine->uses_anonymous_args)
12395 insn = emit_multi_reg_push
12396 ((0xf0 >> (args_to_push / 4)) & 0xf);
12397 else
12398 insn = emit_insn
12399 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12400 GEN_INT (- args_to_push)));
12401 RTX_FRAME_RELATED_P (insn) = 1;
12404 /* If this is an interrupt service routine, and the link register
12405 is going to be pushed, and we are not creating a stack frame,
12406 (which would involve an extra push of IP and a pop in the epilogue)
12407 subtracting four from LR now will mean that the function return
12408 can be done with a single instruction. */
12409 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
12410 && (live_regs_mask & (1 << LR_REGNUM)) != 0
12411 && ! frame_pointer_needed
12412 && TARGET_ARM)
12414 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
12416 emit_set_insn (lr, plus_constant (lr, -4));
12419 if (live_regs_mask)
12421 insn = emit_multi_reg_push (live_regs_mask);
12422 saved_regs += bit_count (live_regs_mask) * 4;
12423 RTX_FRAME_RELATED_P (insn) = 1;
12426 if (! IS_VOLATILE (func_type))
12427 saved_regs += arm_save_coproc_regs ();
12429 if (frame_pointer_needed && TARGET_ARM)
12431 /* Create the new frame pointer. */
12433 insn = GEN_INT (-(4 + args_to_push + fp_offset));
12434 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
12435 RTX_FRAME_RELATED_P (insn) = 1;
12437 if (IS_NESTED (func_type))
12439 /* Recover the static chain register. */
12440 if (!df_regs_ever_live_p (3)
12441 || saved_pretend_args)
12442 insn = gen_rtx_REG (SImode, 3);
12443 else /* if (current_function_pretend_args_size == 0) */
12445 insn = plus_constant (hard_frame_pointer_rtx, 4);
12446 insn = gen_frame_mem (SImode, insn);
12448 emit_set_insn (ip_rtx, insn);
12449 /* Add a USE to stop propagate_one_insn() from barfing. */
12450 emit_insn (gen_prologue_use (ip_rtx));
12455 offsets = arm_get_frame_offsets ();
12456 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
12458 /* This add can produce multiple insns for a large constant, so we
12459 need to get tricky. */
12460 rtx last = get_last_insn ();
12462 amount = GEN_INT (offsets->saved_args + saved_regs
12463 - offsets->outgoing_args);
12465 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12466 amount));
12469 last = last ? NEXT_INSN (last) : get_insns ();
12470 RTX_FRAME_RELATED_P (last) = 1;
12472 while (last != insn);
12474 /* If the frame pointer is needed, emit a special barrier that
12475 will prevent the scheduler from moving stores to the frame
12476 before the stack adjustment. */
12477 if (frame_pointer_needed)
12478 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
12479 hard_frame_pointer_rtx));
12483 if (frame_pointer_needed && TARGET_THUMB2)
12484 thumb_set_frame_pointer (offsets);
12486 if (flag_pic && arm_pic_register != INVALID_REGNUM)
12488 unsigned long mask;
12490 mask = live_regs_mask;
12491 mask &= THUMB2_WORK_REGS;
12492 if (!IS_NESTED (func_type))
12493 mask |= (1 << IP_REGNUM);
12494 arm_load_pic_register (mask);
12497 /* If we are profiling, make sure no instructions are scheduled before
12498 the call to mcount. Similarly if the user has requested no
12499 scheduling in the prolog. Similarly if we want non-call exceptions
12500 using the EABI unwinder, to prevent faulting instructions from being
12501 swapped with a stack adjustment. */
12502 if (current_function_profile || !TARGET_SCHED_PROLOG
12503 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
12504 emit_insn (gen_blockage ());
12506 /* If the link register is being kept alive, with the return address in it,
12507 then make sure that it does not get reused by the ce2 pass. */
12508 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
12509 cfun->machine->lr_save_eliminated = 1;
12512 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12513 static void
12514 arm_print_condition (FILE *stream)
12516 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
12518 /* Branch conversion is not implemented for Thumb-2. */
12519 if (TARGET_THUMB)
12521 output_operand_lossage ("predicated Thumb instruction");
12522 return;
12524 if (current_insn_predicate != NULL)
12526 output_operand_lossage
12527 ("predicated instruction in conditional sequence");
12528 return;
12531 fputs (arm_condition_codes[arm_current_cc], stream);
12533 else if (current_insn_predicate)
12535 enum arm_cond_code code;
12537 if (TARGET_THUMB1)
12539 output_operand_lossage ("predicated Thumb instruction");
12540 return;
12543 code = get_arm_condition_code (current_insn_predicate);
12544 fputs (arm_condition_codes[code], stream);
12549 /* If CODE is 'd', then the X is a condition operand and the instruction
12550 should only be executed if the condition is true.
12551 if CODE is 'D', then the X is a condition operand and the instruction
12552 should only be executed if the condition is false: however, if the mode
12553 of the comparison is CCFPEmode, then always execute the instruction -- we
12554 do this because in these circumstances !GE does not necessarily imply LT;
12555 in these cases the instruction pattern will take care to make sure that
12556 an instruction containing %d will follow, thereby undoing the effects of
12557 doing this instruction unconditionally.
12558 If CODE is 'N' then X is a floating point operand that must be negated
12559 before output.
12560 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12561 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12562 void
12563 arm_print_operand (FILE *stream, rtx x, int code)
12565 switch (code)
12567 case '@':
12568 fputs (ASM_COMMENT_START, stream);
12569 return;
12571 case '_':
12572 fputs (user_label_prefix, stream);
12573 return;
12575 case '|':
12576 fputs (REGISTER_PREFIX, stream);
12577 return;
12579 case '?':
12580 arm_print_condition (stream);
12581 return;
12583 case '(':
12584 /* Nothing in unified syntax, otherwise the current condition code. */
12585 if (!TARGET_UNIFIED_ASM)
12586 arm_print_condition (stream);
12587 break;
12589 case ')':
12590 /* The current condition code in unified syntax, otherwise nothing. */
12591 if (TARGET_UNIFIED_ASM)
12592 arm_print_condition (stream);
12593 break;
12595 case '.':
12596 /* The current condition code for a condition code setting instruction.
12597 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12598 if (TARGET_UNIFIED_ASM)
12600 fputc('s', stream);
12601 arm_print_condition (stream);
12603 else
12605 arm_print_condition (stream);
12606 fputc('s', stream);
12608 return;
12610 case '!':
12611 /* If the instruction is conditionally executed then print
12612 the current condition code, otherwise print 's'. */
12613 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
12614 if (current_insn_predicate)
12615 arm_print_condition (stream);
12616 else
12617 fputc('s', stream);
12618 break;
12620 /* %# is a "break" sequence. It doesn't output anything, but is used to
12621 separate e.g. operand numbers from following text, if that text consists
12622 of further digits which we don't want to be part of the operand
12623 number. */
12624 case '#':
12625 return;
12627 case 'N':
12629 REAL_VALUE_TYPE r;
12630 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12631 r = REAL_VALUE_NEGATE (r);
12632 fprintf (stream, "%s", fp_const_from_val (&r));
12634 return;
12636 /* An integer without a preceding # sign. */
12637 case 'c':
12638 gcc_assert (GET_CODE (x) == CONST_INT);
12639 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12640 return;
12642 case 'B':
12643 if (GET_CODE (x) == CONST_INT)
12645 HOST_WIDE_INT val;
12646 val = ARM_SIGN_EXTEND (~INTVAL (x));
12647 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
12649 else
12651 putc ('~', stream);
12652 output_addr_const (stream, x);
12654 return;
12656 case 'L':
12657 /* The low 16 bits of an immediate constant. */
12658 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
12659 return;
12661 case 'i':
12662 fprintf (stream, "%s", arithmetic_instr (x, 1));
12663 return;
12665 /* Truncate Cirrus shift counts. */
12666 case 's':
12667 if (GET_CODE (x) == CONST_INT)
12669 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
12670 return;
12672 arm_print_operand (stream, x, 0);
12673 return;
12675 case 'I':
12676 fprintf (stream, "%s", arithmetic_instr (x, 0));
12677 return;
12679 case 'S':
12681 HOST_WIDE_INT val;
12682 const char *shift;
12684 if (!shift_operator (x, SImode))
12686 output_operand_lossage ("invalid shift operand");
12687 break;
12690 shift = shift_op (x, &val);
12692 if (shift)
12694 fprintf (stream, ", %s ", shift);
12695 if (val == -1)
12696 arm_print_operand (stream, XEXP (x, 1), 0);
12697 else
12698 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
12701 return;
12703 /* An explanation of the 'Q', 'R' and 'H' register operands:
12705 In a pair of registers containing a DI or DF value the 'Q'
12706 operand returns the register number of the register containing
12707 the least significant part of the value. The 'R' operand returns
12708 the register number of the register containing the most
12709 significant part of the value.
12711 The 'H' operand returns the higher of the two register numbers.
12712 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
12713 same as the 'Q' operand, since the most significant part of the
12714 value is held in the lower number register. The reverse is true
12715 on systems where WORDS_BIG_ENDIAN is false.
12717 The purpose of these operands is to distinguish between cases
12718 where the endian-ness of the values is important (for example
12719 when they are added together), and cases where the endian-ness
12720 is irrelevant, but the order of register operations is important.
12721 For example when loading a value from memory into a register
12722 pair, the endian-ness does not matter. Provided that the value
12723 from the lower memory address is put into the lower numbered
12724 register, and the value from the higher address is put into the
12725 higher numbered register, the load will work regardless of whether
12726 the value being loaded is big-wordian or little-wordian. The
12727 order of the two register loads can matter however, if the address
12728 of the memory location is actually held in one of the registers
12729 being overwritten by the load. */
12730 case 'Q':
12731 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12733 output_operand_lossage ("invalid operand for code '%c'", code);
12734 return;
12737 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
12738 return;
12740 case 'R':
12741 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12743 output_operand_lossage ("invalid operand for code '%c'", code);
12744 return;
12747 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
12748 return;
12750 case 'H':
12751 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12753 output_operand_lossage ("invalid operand for code '%c'", code);
12754 return;
12757 asm_fprintf (stream, "%r", REGNO (x) + 1);
12758 return;
12760 case 'J':
12761 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12763 output_operand_lossage ("invalid operand for code '%c'", code);
12764 return;
12767 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
12768 return;
12770 case 'K':
12771 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12773 output_operand_lossage ("invalid operand for code '%c'", code);
12774 return;
12777 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
12778 return;
12780 case 'm':
12781 asm_fprintf (stream, "%r",
12782 GET_CODE (XEXP (x, 0)) == REG
12783 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
12784 return;
12786 case 'M':
12787 asm_fprintf (stream, "{%r-%r}",
12788 REGNO (x),
12789 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
12790 return;
12792 /* Like 'M', but writing doubleword vector registers, for use by Neon
12793 insns. */
12794 case 'h':
12796 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
12797 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
12798 if (numregs == 1)
12799 asm_fprintf (stream, "{d%d}", regno);
12800 else
12801 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
12803 return;
12805 case 'd':
12806 /* CONST_TRUE_RTX means always -- that's the default. */
12807 if (x == const_true_rtx)
12808 return;
12810 if (!COMPARISON_P (x))
12812 output_operand_lossage ("invalid operand for code '%c'", code);
12813 return;
12816 fputs (arm_condition_codes[get_arm_condition_code (x)],
12817 stream);
12818 return;
12820 case 'D':
12821 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
12822 want to do that. */
12823 if (x == const_true_rtx)
12825 output_operand_lossage ("instruction never executed");
12826 return;
12828 if (!COMPARISON_P (x))
12830 output_operand_lossage ("invalid operand for code '%c'", code);
12831 return;
12834 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
12835 (get_arm_condition_code (x))],
12836 stream);
12837 return;
12839 /* Cirrus registers can be accessed in a variety of ways:
12840 single floating point (f)
12841 double floating point (d)
12842 32bit integer (fx)
12843 64bit integer (dx). */
12844 case 'W': /* Cirrus register in F mode. */
12845 case 'X': /* Cirrus register in D mode. */
12846 case 'Y': /* Cirrus register in FX mode. */
12847 case 'Z': /* Cirrus register in DX mode. */
12848 gcc_assert (GET_CODE (x) == REG
12849 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
12851 fprintf (stream, "mv%s%s",
12852 code == 'W' ? "f"
12853 : code == 'X' ? "d"
12854 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
12856 return;
12858 /* Print cirrus register in the mode specified by the register's mode. */
12859 case 'V':
12861 int mode = GET_MODE (x);
12863 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
12865 output_operand_lossage ("invalid operand for code '%c'", code);
12866 return;
12869 fprintf (stream, "mv%s%s",
12870 mode == DFmode ? "d"
12871 : mode == SImode ? "fx"
12872 : mode == DImode ? "dx"
12873 : "f", reg_names[REGNO (x)] + 2);
12875 return;
12878 case 'U':
12879 if (GET_CODE (x) != REG
12880 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
12881 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
12882 /* Bad value for wCG register number. */
12884 output_operand_lossage ("invalid operand for code '%c'", code);
12885 return;
12888 else
12889 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
12890 return;
12892 /* Print an iWMMXt control register name. */
12893 case 'w':
12894 if (GET_CODE (x) != CONST_INT
12895 || INTVAL (x) < 0
12896 || INTVAL (x) >= 16)
12897 /* Bad value for wC register number. */
12899 output_operand_lossage ("invalid operand for code '%c'", code);
12900 return;
12903 else
12905 static const char * wc_reg_names [16] =
12907 "wCID", "wCon", "wCSSF", "wCASF",
12908 "wC4", "wC5", "wC6", "wC7",
12909 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
12910 "wC12", "wC13", "wC14", "wC15"
12913 fprintf (stream, wc_reg_names [INTVAL (x)]);
12915 return;
12917 /* Print a VFP/Neon double precision or quad precision register name. */
12918 case 'P':
12919 case 'q':
12921 int mode = GET_MODE (x);
12922 int is_quad = (code == 'q');
12923 int regno;
12925 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
12927 output_operand_lossage ("invalid operand for code '%c'", code);
12928 return;
12931 if (GET_CODE (x) != REG
12932 || !IS_VFP_REGNUM (REGNO (x)))
12934 output_operand_lossage ("invalid operand for code '%c'", code);
12935 return;
12938 regno = REGNO (x);
12939 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
12940 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
12942 output_operand_lossage ("invalid operand for code '%c'", code);
12943 return;
12946 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
12947 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
12949 return;
12951 /* These two codes print the low/high doubleword register of a Neon quad
12952 register, respectively. For pair-structure types, can also print
12953 low/high quadword registers. */
12954 case 'e':
12955 case 'f':
12957 int mode = GET_MODE (x);
12958 int regno;
12960 if ((GET_MODE_SIZE (mode) != 16
12961 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
12963 output_operand_lossage ("invalid operand for code '%c'", code);
12964 return;
12967 regno = REGNO (x);
12968 if (!NEON_REGNO_OK_FOR_QUAD (regno))
12970 output_operand_lossage ("invalid operand for code '%c'", code);
12971 return;
12974 if (GET_MODE_SIZE (mode) == 16)
12975 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
12976 + (code == 'f' ? 1 : 0));
12977 else
12978 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
12979 + (code == 'f' ? 1 : 0));
12981 return;
12983 /* Print a VFPv3 floating-point constant, represented as an integer
12984 index. */
12985 case 'G':
12987 int index = vfp3_const_double_index (x);
12988 gcc_assert (index != -1);
12989 fprintf (stream, "%d", index);
12991 return;
12993 /* Print bits representing opcode features for Neon.
12995 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
12996 and polynomials as unsigned.
12998 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
13000 Bit 2 is 1 for rounding functions, 0 otherwise. */
13002 /* Identify the type as 's', 'u', 'p' or 'f'. */
13003 case 'T':
13005 HOST_WIDE_INT bits = INTVAL (x);
13006 fputc ("uspf"[bits & 3], stream);
13008 return;
13010 /* Likewise, but signed and unsigned integers are both 'i'. */
13011 case 'F':
13013 HOST_WIDE_INT bits = INTVAL (x);
13014 fputc ("iipf"[bits & 3], stream);
13016 return;
13018 /* As for 'T', but emit 'u' instead of 'p'. */
13019 case 't':
13021 HOST_WIDE_INT bits = INTVAL (x);
13022 fputc ("usuf"[bits & 3], stream);
13024 return;
13026 /* Bit 2: rounding (vs none). */
13027 case 'O':
13029 HOST_WIDE_INT bits = INTVAL (x);
13030 fputs ((bits & 4) != 0 ? "r" : "", stream);
13032 return;
13034 default:
13035 if (x == 0)
13037 output_operand_lossage ("missing operand");
13038 return;
13041 switch (GET_CODE (x))
13043 case REG:
13044 asm_fprintf (stream, "%r", REGNO (x));
13045 break;
13047 case MEM:
13048 output_memory_reference_mode = GET_MODE (x);
13049 output_address (XEXP (x, 0));
13050 break;
13052 case CONST_DOUBLE:
13053 if (TARGET_NEON)
13055 char fpstr[20];
13056 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13057 sizeof (fpstr), 0, 1);
13058 fprintf (stream, "#%s", fpstr);
13060 else
13061 fprintf (stream, "#%s", fp_immediate_constant (x));
13062 break;
13064 default:
13065 gcc_assert (GET_CODE (x) != NEG);
13066 fputc ('#', stream);
13067 output_addr_const (stream, x);
13068 break;
13073 /* Target hook for assembling integer objects. The ARM version needs to
13074 handle word-sized values specially. */
13075 static bool
13076 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13078 enum machine_mode mode;
13080 if (size == UNITS_PER_WORD && aligned_p)
13082 fputs ("\t.word\t", asm_out_file);
13083 output_addr_const (asm_out_file, x);
13085 /* Mark symbols as position independent. We only do this in the
13086 .text segment, not in the .data segment. */
13087 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13088 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13090 /* See legitimize_pic_address for an explanation of the
13091 TARGET_VXWORKS_RTP check. */
13092 if (TARGET_VXWORKS_RTP
13093 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13094 fputs ("(GOT)", asm_out_file);
13095 else
13096 fputs ("(GOTOFF)", asm_out_file);
13098 fputc ('\n', asm_out_file);
13099 return true;
13102 mode = GET_MODE (x);
13104 if (arm_vector_mode_supported_p (mode))
13106 int i, units;
13107 unsigned int invmask = 0, parts_per_word;
13109 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13111 units = CONST_VECTOR_NUNITS (x);
13112 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13114 /* For big-endian Neon vectors, we must permute the vector to the form
13115 which, when loaded by a VLDR or VLDM instruction, will give a vector
13116 with the elements in the right order. */
13117 if (TARGET_NEON && WORDS_BIG_ENDIAN)
13119 parts_per_word = UNITS_PER_WORD / size;
13120 /* FIXME: This might be wrong for 64-bit vector elements, but we don't
13121 support those anywhere yet. */
13122 invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
13125 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13126 for (i = 0; i < units; i++)
13128 rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
13129 assemble_integer
13130 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13132 else
13133 for (i = 0; i < units; i++)
13135 rtx elt = CONST_VECTOR_ELT (x, i);
13136 REAL_VALUE_TYPE rval;
13138 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13140 assemble_real
13141 (rval, GET_MODE_INNER (mode),
13142 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13145 return true;
13148 return default_assemble_integer (x, size, aligned_p);
13151 static void
13152 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13154 section *s;
13156 if (!TARGET_AAPCS_BASED)
13158 (is_ctor ?
13159 default_named_section_asm_out_constructor
13160 : default_named_section_asm_out_destructor) (symbol, priority);
13161 return;
13164 /* Put these in the .init_array section, using a special relocation. */
13165 if (priority != DEFAULT_INIT_PRIORITY)
13167 char buf[18];
13168 sprintf (buf, "%s.%.5u",
13169 is_ctor ? ".init_array" : ".fini_array",
13170 priority);
13171 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13173 else if (is_ctor)
13174 s = ctors_section;
13175 else
13176 s = dtors_section;
13178 switch_to_section (s);
13179 assemble_align (POINTER_SIZE);
13180 fputs ("\t.word\t", asm_out_file);
13181 output_addr_const (asm_out_file, symbol);
13182 fputs ("(target1)\n", asm_out_file);
13185 /* Add a function to the list of static constructors. */
13187 static void
13188 arm_elf_asm_constructor (rtx symbol, int priority)
13190 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13193 /* Add a function to the list of static destructors. */
13195 static void
13196 arm_elf_asm_destructor (rtx symbol, int priority)
13198 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13201 /* A finite state machine takes care of noticing whether or not instructions
13202 can be conditionally executed, and thus decrease execution time and code
13203 size by deleting branch instructions. The fsm is controlled by
13204 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13206 /* The state of the fsm controlling condition codes are:
13207 0: normal, do nothing special
13208 1: make ASM_OUTPUT_OPCODE not output this instruction
13209 2: make ASM_OUTPUT_OPCODE not output this instruction
13210 3: make instructions conditional
13211 4: make instructions conditional
13213 State transitions (state->state by whom under condition):
13214 0 -> 1 final_prescan_insn if the `target' is a label
13215 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13216 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13217 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13218 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13219 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13220 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13221 (the target insn is arm_target_insn).
13223 If the jump clobbers the conditions then we use states 2 and 4.
13225 A similar thing can be done with conditional return insns.
13227 XXX In case the `target' is an unconditional branch, this conditionalising
13228 of the instructions always reduces code size, but not always execution
13229 time. But then, I want to reduce the code size to somewhere near what
13230 /bin/cc produces. */
13232 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13233 instructions. When a COND_EXEC instruction is seen the subsequent
13234 instructions are scanned so that multiple conditional instructions can be
13235 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13236 specify the length and true/false mask for the IT block. These will be
13237 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13239 /* Returns the index of the ARM condition code string in
13240 `arm_condition_codes'. COMPARISON should be an rtx like
13241 `(eq (...) (...))'. */
13242 static enum arm_cond_code
13243 get_arm_condition_code (rtx comparison)
13245 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
13246 int code;
13247 enum rtx_code comp_code = GET_CODE (comparison);
13249 if (GET_MODE_CLASS (mode) != MODE_CC)
13250 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
13251 XEXP (comparison, 1));
13253 switch (mode)
13255 case CC_DNEmode: code = ARM_NE; goto dominance;
13256 case CC_DEQmode: code = ARM_EQ; goto dominance;
13257 case CC_DGEmode: code = ARM_GE; goto dominance;
13258 case CC_DGTmode: code = ARM_GT; goto dominance;
13259 case CC_DLEmode: code = ARM_LE; goto dominance;
13260 case CC_DLTmode: code = ARM_LT; goto dominance;
13261 case CC_DGEUmode: code = ARM_CS; goto dominance;
13262 case CC_DGTUmode: code = ARM_HI; goto dominance;
13263 case CC_DLEUmode: code = ARM_LS; goto dominance;
13264 case CC_DLTUmode: code = ARM_CC;
13266 dominance:
13267 gcc_assert (comp_code == EQ || comp_code == NE);
13269 if (comp_code == EQ)
13270 return ARM_INVERSE_CONDITION_CODE (code);
13271 return code;
13273 case CC_NOOVmode:
13274 switch (comp_code)
13276 case NE: return ARM_NE;
13277 case EQ: return ARM_EQ;
13278 case GE: return ARM_PL;
13279 case LT: return ARM_MI;
13280 default: gcc_unreachable ();
13283 case CC_Zmode:
13284 switch (comp_code)
13286 case NE: return ARM_NE;
13287 case EQ: return ARM_EQ;
13288 default: gcc_unreachable ();
13291 case CC_Nmode:
13292 switch (comp_code)
13294 case NE: return ARM_MI;
13295 case EQ: return ARM_PL;
13296 default: gcc_unreachable ();
13299 case CCFPEmode:
13300 case CCFPmode:
13301 /* These encodings assume that AC=1 in the FPA system control
13302 byte. This allows us to handle all cases except UNEQ and
13303 LTGT. */
13304 switch (comp_code)
13306 case GE: return ARM_GE;
13307 case GT: return ARM_GT;
13308 case LE: return ARM_LS;
13309 case LT: return ARM_MI;
13310 case NE: return ARM_NE;
13311 case EQ: return ARM_EQ;
13312 case ORDERED: return ARM_VC;
13313 case UNORDERED: return ARM_VS;
13314 case UNLT: return ARM_LT;
13315 case UNLE: return ARM_LE;
13316 case UNGT: return ARM_HI;
13317 case UNGE: return ARM_PL;
13318 /* UNEQ and LTGT do not have a representation. */
13319 case UNEQ: /* Fall through. */
13320 case LTGT: /* Fall through. */
13321 default: gcc_unreachable ();
13324 case CC_SWPmode:
13325 switch (comp_code)
13327 case NE: return ARM_NE;
13328 case EQ: return ARM_EQ;
13329 case GE: return ARM_LE;
13330 case GT: return ARM_LT;
13331 case LE: return ARM_GE;
13332 case LT: return ARM_GT;
13333 case GEU: return ARM_LS;
13334 case GTU: return ARM_CC;
13335 case LEU: return ARM_CS;
13336 case LTU: return ARM_HI;
13337 default: gcc_unreachable ();
13340 case CC_Cmode:
13341 switch (comp_code)
13343 case LTU: return ARM_CS;
13344 case GEU: return ARM_CC;
13345 default: gcc_unreachable ();
13348 case CCmode:
13349 switch (comp_code)
13351 case NE: return ARM_NE;
13352 case EQ: return ARM_EQ;
13353 case GE: return ARM_GE;
13354 case GT: return ARM_GT;
13355 case LE: return ARM_LE;
13356 case LT: return ARM_LT;
13357 case GEU: return ARM_CS;
13358 case GTU: return ARM_HI;
13359 case LEU: return ARM_LS;
13360 case LTU: return ARM_CC;
13361 default: gcc_unreachable ();
13364 default: gcc_unreachable ();
13368 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13369 instructions. */
13370 void
13371 thumb2_final_prescan_insn (rtx insn)
13373 rtx first_insn = insn;
13374 rtx body = PATTERN (insn);
13375 rtx predicate;
13376 enum arm_cond_code code;
13377 int n;
13378 int mask;
13380 /* Remove the previous insn from the count of insns to be output. */
13381 if (arm_condexec_count)
13382 arm_condexec_count--;
13384 /* Nothing to do if we are already inside a conditional block. */
13385 if (arm_condexec_count)
13386 return;
13388 if (GET_CODE (body) != COND_EXEC)
13389 return;
13391 /* Conditional jumps are implemented directly. */
13392 if (GET_CODE (insn) == JUMP_INSN)
13393 return;
13395 predicate = COND_EXEC_TEST (body);
13396 arm_current_cc = get_arm_condition_code (predicate);
13398 n = get_attr_ce_count (insn);
13399 arm_condexec_count = 1;
13400 arm_condexec_mask = (1 << n) - 1;
13401 arm_condexec_masklen = n;
13402 /* See if subsequent instructions can be combined into the same block. */
13403 for (;;)
13405 insn = next_nonnote_insn (insn);
13407 /* Jumping into the middle of an IT block is illegal, so a label or
13408 barrier terminates the block. */
13409 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
13410 break;
13412 body = PATTERN (insn);
13413 /* USE and CLOBBER aren't really insns, so just skip them. */
13414 if (GET_CODE (body) == USE
13415 || GET_CODE (body) == CLOBBER)
13416 continue;
13418 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13419 if (GET_CODE (body) != COND_EXEC)
13420 break;
13421 /* Allow up to 4 conditionally executed instructions in a block. */
13422 n = get_attr_ce_count (insn);
13423 if (arm_condexec_masklen + n > 4)
13424 break;
13426 predicate = COND_EXEC_TEST (body);
13427 code = get_arm_condition_code (predicate);
13428 mask = (1 << n) - 1;
13429 if (arm_current_cc == code)
13430 arm_condexec_mask |= (mask << arm_condexec_masklen);
13431 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
13432 break;
13434 arm_condexec_count++;
13435 arm_condexec_masklen += n;
13437 /* A jump must be the last instruction in a conditional block. */
13438 if (GET_CODE(insn) == JUMP_INSN)
13439 break;
13441 /* Restore recog_data (getting the attributes of other insns can
13442 destroy this array, but final.c assumes that it remains intact
13443 across this call). */
13444 extract_constrain_insn_cached (first_insn);
13447 void
13448 arm_final_prescan_insn (rtx insn)
13450 /* BODY will hold the body of INSN. */
13451 rtx body = PATTERN (insn);
13453 /* This will be 1 if trying to repeat the trick, and things need to be
13454 reversed if it appears to fail. */
13455 int reverse = 0;
13457 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13458 taken are clobbered, even if the rtl suggests otherwise. It also
13459 means that we have to grub around within the jump expression to find
13460 out what the conditions are when the jump isn't taken. */
13461 int jump_clobbers = 0;
13463 /* If we start with a return insn, we only succeed if we find another one. */
13464 int seeking_return = 0;
13466 /* START_INSN will hold the insn from where we start looking. This is the
13467 first insn after the following code_label if REVERSE is true. */
13468 rtx start_insn = insn;
13470 /* If in state 4, check if the target branch is reached, in order to
13471 change back to state 0. */
13472 if (arm_ccfsm_state == 4)
13474 if (insn == arm_target_insn)
13476 arm_target_insn = NULL;
13477 arm_ccfsm_state = 0;
13479 return;
13482 /* If in state 3, it is possible to repeat the trick, if this insn is an
13483 unconditional branch to a label, and immediately following this branch
13484 is the previous target label which is only used once, and the label this
13485 branch jumps to is not too far off. */
13486 if (arm_ccfsm_state == 3)
13488 if (simplejump_p (insn))
13490 start_insn = next_nonnote_insn (start_insn);
13491 if (GET_CODE (start_insn) == BARRIER)
13493 /* XXX Isn't this always a barrier? */
13494 start_insn = next_nonnote_insn (start_insn);
13496 if (GET_CODE (start_insn) == CODE_LABEL
13497 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13498 && LABEL_NUSES (start_insn) == 1)
13499 reverse = TRUE;
13500 else
13501 return;
13503 else if (GET_CODE (body) == RETURN)
13505 start_insn = next_nonnote_insn (start_insn);
13506 if (GET_CODE (start_insn) == BARRIER)
13507 start_insn = next_nonnote_insn (start_insn);
13508 if (GET_CODE (start_insn) == CODE_LABEL
13509 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13510 && LABEL_NUSES (start_insn) == 1)
13512 reverse = TRUE;
13513 seeking_return = 1;
13515 else
13516 return;
13518 else
13519 return;
13522 gcc_assert (!arm_ccfsm_state || reverse);
13523 if (GET_CODE (insn) != JUMP_INSN)
13524 return;
13526 /* This jump might be paralleled with a clobber of the condition codes
13527 the jump should always come first */
13528 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
13529 body = XVECEXP (body, 0, 0);
13531 if (reverse
13532 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
13533 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
13535 int insns_skipped;
13536 int fail = FALSE, succeed = FALSE;
13537 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13538 int then_not_else = TRUE;
13539 rtx this_insn = start_insn, label = 0;
13541 /* If the jump cannot be done with one instruction, we cannot
13542 conditionally execute the instruction in the inverse case. */
13543 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
13545 jump_clobbers = 1;
13546 return;
13549 /* Register the insn jumped to. */
13550 if (reverse)
13552 if (!seeking_return)
13553 label = XEXP (SET_SRC (body), 0);
13555 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
13556 label = XEXP (XEXP (SET_SRC (body), 1), 0);
13557 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
13559 label = XEXP (XEXP (SET_SRC (body), 2), 0);
13560 then_not_else = FALSE;
13562 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
13563 seeking_return = 1;
13564 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
13566 seeking_return = 1;
13567 then_not_else = FALSE;
13569 else
13570 gcc_unreachable ();
13572 /* See how many insns this branch skips, and what kind of insns. If all
13573 insns are okay, and the label or unconditional branch to the same
13574 label is not too far away, succeed. */
13575 for (insns_skipped = 0;
13576 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
13578 rtx scanbody;
13580 this_insn = next_nonnote_insn (this_insn);
13581 if (!this_insn)
13582 break;
13584 switch (GET_CODE (this_insn))
13586 case CODE_LABEL:
13587 /* Succeed if it is the target label, otherwise fail since
13588 control falls in from somewhere else. */
13589 if (this_insn == label)
13591 if (jump_clobbers)
13593 arm_ccfsm_state = 2;
13594 this_insn = next_nonnote_insn (this_insn);
13596 else
13597 arm_ccfsm_state = 1;
13598 succeed = TRUE;
13600 else
13601 fail = TRUE;
13602 break;
13604 case BARRIER:
13605 /* Succeed if the following insn is the target label.
13606 Otherwise fail.
13607 If return insns are used then the last insn in a function
13608 will be a barrier. */
13609 this_insn = next_nonnote_insn (this_insn);
13610 if (this_insn && this_insn == label)
13612 if (jump_clobbers)
13614 arm_ccfsm_state = 2;
13615 this_insn = next_nonnote_insn (this_insn);
13617 else
13618 arm_ccfsm_state = 1;
13619 succeed = TRUE;
13621 else
13622 fail = TRUE;
13623 break;
13625 case CALL_INSN:
13626 /* The AAPCS says that conditional calls should not be
13627 used since they make interworking inefficient (the
13628 linker can't transform BL<cond> into BLX). That's
13629 only a problem if the machine has BLX. */
13630 if (arm_arch5)
13632 fail = TRUE;
13633 break;
13636 /* Succeed if the following insn is the target label, or
13637 if the following two insns are a barrier and the
13638 target label. */
13639 this_insn = next_nonnote_insn (this_insn);
13640 if (this_insn && GET_CODE (this_insn) == BARRIER)
13641 this_insn = next_nonnote_insn (this_insn);
13643 if (this_insn && this_insn == label
13644 && insns_skipped < max_insns_skipped)
13646 if (jump_clobbers)
13648 arm_ccfsm_state = 2;
13649 this_insn = next_nonnote_insn (this_insn);
13651 else
13652 arm_ccfsm_state = 1;
13653 succeed = TRUE;
13655 else
13656 fail = TRUE;
13657 break;
13659 case JUMP_INSN:
13660 /* If this is an unconditional branch to the same label, succeed.
13661 If it is to another label, do nothing. If it is conditional,
13662 fail. */
13663 /* XXX Probably, the tests for SET and the PC are
13664 unnecessary. */
13666 scanbody = PATTERN (this_insn);
13667 if (GET_CODE (scanbody) == SET
13668 && GET_CODE (SET_DEST (scanbody)) == PC)
13670 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
13671 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
13673 arm_ccfsm_state = 2;
13674 succeed = TRUE;
13676 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
13677 fail = TRUE;
13679 /* Fail if a conditional return is undesirable (e.g. on a
13680 StrongARM), but still allow this if optimizing for size. */
13681 else if (GET_CODE (scanbody) == RETURN
13682 && !use_return_insn (TRUE, NULL)
13683 && !optimize_size)
13684 fail = TRUE;
13685 else if (GET_CODE (scanbody) == RETURN
13686 && seeking_return)
13688 arm_ccfsm_state = 2;
13689 succeed = TRUE;
13691 else if (GET_CODE (scanbody) == PARALLEL)
13693 switch (get_attr_conds (this_insn))
13695 case CONDS_NOCOND:
13696 break;
13697 default:
13698 fail = TRUE;
13699 break;
13702 else
13703 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
13705 break;
13707 case INSN:
13708 /* Instructions using or affecting the condition codes make it
13709 fail. */
13710 scanbody = PATTERN (this_insn);
13711 if (!(GET_CODE (scanbody) == SET
13712 || GET_CODE (scanbody) == PARALLEL)
13713 || get_attr_conds (this_insn) != CONDS_NOCOND)
13714 fail = TRUE;
13716 /* A conditional cirrus instruction must be followed by
13717 a non Cirrus instruction. However, since we
13718 conditionalize instructions in this function and by
13719 the time we get here we can't add instructions
13720 (nops), because shorten_branches() has already been
13721 called, we will disable conditionalizing Cirrus
13722 instructions to be safe. */
13723 if (GET_CODE (scanbody) != USE
13724 && GET_CODE (scanbody) != CLOBBER
13725 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
13726 fail = TRUE;
13727 break;
13729 default:
13730 break;
13733 if (succeed)
13735 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
13736 arm_target_label = CODE_LABEL_NUMBER (label);
13737 else
13739 gcc_assert (seeking_return || arm_ccfsm_state == 2);
13741 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
13743 this_insn = next_nonnote_insn (this_insn);
13744 gcc_assert (!this_insn
13745 || (GET_CODE (this_insn) != BARRIER
13746 && GET_CODE (this_insn) != CODE_LABEL));
13748 if (!this_insn)
13750 /* Oh, dear! we ran off the end.. give up. */
13751 extract_constrain_insn_cached (insn);
13752 arm_ccfsm_state = 0;
13753 arm_target_insn = NULL;
13754 return;
13756 arm_target_insn = this_insn;
13758 if (jump_clobbers)
13760 gcc_assert (!reverse);
13761 arm_current_cc =
13762 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
13763 0), 0), 1));
13764 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
13765 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13766 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
13767 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13769 else
13771 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
13772 what it was. */
13773 if (!reverse)
13774 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
13775 0));
13778 if (reverse || then_not_else)
13779 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13782 /* Restore recog_data (getting the attributes of other insns can
13783 destroy this array, but final.c assumes that it remains intact
13784 across this call. */
13785 extract_constrain_insn_cached (insn);
13789 /* Output IT instructions. */
13790 void
13791 thumb2_asm_output_opcode (FILE * stream)
13793 char buff[5];
13794 int n;
13796 if (arm_condexec_mask)
13798 for (n = 0; n < arm_condexec_masklen; n++)
13799 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
13800 buff[n] = 0;
13801 asm_fprintf(stream, "i%s\t%s\n\t", buff,
13802 arm_condition_codes[arm_current_cc]);
13803 arm_condexec_mask = 0;
13807 /* Returns true if REGNO is a valid register
13808 for holding a quantity of type MODE. */
13810 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
13812 if (GET_MODE_CLASS (mode) == MODE_CC)
13813 return (regno == CC_REGNUM
13814 || (TARGET_HARD_FLOAT && TARGET_VFP
13815 && regno == VFPCC_REGNUM));
13817 if (TARGET_THUMB1)
13818 /* For the Thumb we only allow values bigger than SImode in
13819 registers 0 - 6, so that there is always a second low
13820 register available to hold the upper part of the value.
13821 We probably we ought to ensure that the register is the
13822 start of an even numbered register pair. */
13823 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
13825 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
13826 && IS_CIRRUS_REGNUM (regno))
13827 /* We have outlawed SI values in Cirrus registers because they
13828 reside in the lower 32 bits, but SF values reside in the
13829 upper 32 bits. This causes gcc all sorts of grief. We can't
13830 even split the registers into pairs because Cirrus SI values
13831 get sign extended to 64bits-- aldyh. */
13832 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
13834 if (TARGET_HARD_FLOAT && TARGET_VFP
13835 && IS_VFP_REGNUM (regno))
13837 if (mode == SFmode || mode == SImode)
13838 return VFP_REGNO_OK_FOR_SINGLE (regno);
13840 if (mode == DFmode)
13841 return VFP_REGNO_OK_FOR_DOUBLE (regno);
13843 if (TARGET_NEON)
13844 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
13845 || (VALID_NEON_QREG_MODE (mode)
13846 && NEON_REGNO_OK_FOR_QUAD (regno))
13847 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
13848 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
13849 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
13850 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
13851 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
13853 return FALSE;
13856 if (TARGET_REALLY_IWMMXT)
13858 if (IS_IWMMXT_GR_REGNUM (regno))
13859 return mode == SImode;
13861 if (IS_IWMMXT_REGNUM (regno))
13862 return VALID_IWMMXT_REG_MODE (mode);
13865 /* We allow any value to be stored in the general registers.
13866 Restrict doubleword quantities to even register pairs so that we can
13867 use ldrd. Do not allow Neon structure opaque modes in general registers;
13868 they would use too many. */
13869 if (regno <= LAST_ARM_REGNUM)
13870 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
13871 && !VALID_NEON_STRUCT_MODE (mode);
13873 if (regno == FRAME_POINTER_REGNUM
13874 || regno == ARG_POINTER_REGNUM)
13875 /* We only allow integers in the fake hard registers. */
13876 return GET_MODE_CLASS (mode) == MODE_INT;
13878 /* The only registers left are the FPA registers
13879 which we only allow to hold FP values. */
13880 return (TARGET_HARD_FLOAT && TARGET_FPA
13881 && GET_MODE_CLASS (mode) == MODE_FLOAT
13882 && regno >= FIRST_FPA_REGNUM
13883 && regno <= LAST_FPA_REGNUM);
13886 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
13887 not used in arm mode. */
13889 arm_regno_class (int regno)
13891 if (TARGET_THUMB1)
13893 if (regno == STACK_POINTER_REGNUM)
13894 return STACK_REG;
13895 if (regno == CC_REGNUM)
13896 return CC_REG;
13897 if (regno < 8)
13898 return LO_REGS;
13899 return HI_REGS;
13902 if (TARGET_THUMB2 && regno < 8)
13903 return LO_REGS;
13905 if ( regno <= LAST_ARM_REGNUM
13906 || regno == FRAME_POINTER_REGNUM
13907 || regno == ARG_POINTER_REGNUM)
13908 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
13910 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
13911 return TARGET_THUMB2 ? CC_REG : NO_REGS;
13913 if (IS_CIRRUS_REGNUM (regno))
13914 return CIRRUS_REGS;
13916 if (IS_VFP_REGNUM (regno))
13918 if (regno <= D7_VFP_REGNUM)
13919 return VFP_D0_D7_REGS;
13920 else if (regno <= LAST_LO_VFP_REGNUM)
13921 return VFP_LO_REGS;
13922 else
13923 return VFP_HI_REGS;
13926 if (IS_IWMMXT_REGNUM (regno))
13927 return IWMMXT_REGS;
13929 if (IS_IWMMXT_GR_REGNUM (regno))
13930 return IWMMXT_GR_REGS;
13932 return FPA_REGS;
13935 /* Handle a special case when computing the offset
13936 of an argument from the frame pointer. */
13938 arm_debugger_arg_offset (int value, rtx addr)
13940 rtx insn;
13942 /* We are only interested if dbxout_parms() failed to compute the offset. */
13943 if (value != 0)
13944 return 0;
13946 /* We can only cope with the case where the address is held in a register. */
13947 if (GET_CODE (addr) != REG)
13948 return 0;
13950 /* If we are using the frame pointer to point at the argument, then
13951 an offset of 0 is correct. */
13952 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
13953 return 0;
13955 /* If we are using the stack pointer to point at the
13956 argument, then an offset of 0 is correct. */
13957 /* ??? Check this is consistent with thumb2 frame layout. */
13958 if ((TARGET_THUMB || !frame_pointer_needed)
13959 && REGNO (addr) == SP_REGNUM)
13960 return 0;
13962 /* Oh dear. The argument is pointed to by a register rather
13963 than being held in a register, or being stored at a known
13964 offset from the frame pointer. Since GDB only understands
13965 those two kinds of argument we must translate the address
13966 held in the register into an offset from the frame pointer.
13967 We do this by searching through the insns for the function
13968 looking to see where this register gets its value. If the
13969 register is initialized from the frame pointer plus an offset
13970 then we are in luck and we can continue, otherwise we give up.
13972 This code is exercised by producing debugging information
13973 for a function with arguments like this:
13975 double func (double a, double b, int c, double d) {return d;}
13977 Without this code the stab for parameter 'd' will be set to
13978 an offset of 0 from the frame pointer, rather than 8. */
13980 /* The if() statement says:
13982 If the insn is a normal instruction
13983 and if the insn is setting the value in a register
13984 and if the register being set is the register holding the address of the argument
13985 and if the address is computing by an addition
13986 that involves adding to a register
13987 which is the frame pointer
13988 a constant integer
13990 then... */
13992 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13994 if ( GET_CODE (insn) == INSN
13995 && GET_CODE (PATTERN (insn)) == SET
13996 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
13997 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
13998 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
13999 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
14000 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14003 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14005 break;
14009 if (value == 0)
14011 debug_rtx (addr);
14012 warning (0, "unable to compute real location of stacked parameter");
14013 value = 8; /* XXX magic hack */
14016 return value;
14019 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14020 do \
14022 if ((MASK) & insn_flags) \
14023 add_builtin_function ((NAME), (TYPE), (CODE), \
14024 BUILT_IN_MD, NULL, NULL_TREE); \
14026 while (0)
14028 struct builtin_description
14030 const unsigned int mask;
14031 const enum insn_code icode;
14032 const char * const name;
14033 const enum arm_builtins code;
14034 const enum rtx_code comparison;
14035 const unsigned int flag;
14038 static const struct builtin_description bdesc_2arg[] =
14040 #define IWMMXT_BUILTIN(code, string, builtin) \
14041 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14042 ARM_BUILTIN_##builtin, 0, 0 },
14044 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14045 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14046 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14047 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14048 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14049 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14050 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14051 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14052 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14053 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14054 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14055 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14056 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14057 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14058 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14059 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14060 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14061 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14062 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14063 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14064 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14065 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14066 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14067 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14068 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14069 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14070 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14071 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14072 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14073 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14074 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14075 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14076 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14077 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14078 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14079 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14080 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14081 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14082 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14083 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14084 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14085 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14086 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14087 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14088 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14089 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14090 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14091 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14092 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14093 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14094 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14095 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14096 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14097 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14098 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14099 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14100 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14101 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14103 #define IWMMXT_BUILTIN2(code, builtin) \
14104 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14106 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14107 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14108 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14109 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14110 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14111 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14112 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14113 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14114 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14115 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14116 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14117 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14118 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14119 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14120 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14121 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14122 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14123 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14124 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14125 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14126 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14127 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14128 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14129 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14130 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14131 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14132 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14133 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14134 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14135 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14136 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14137 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14140 static const struct builtin_description bdesc_1arg[] =
14142 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14143 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14144 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14145 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14146 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14147 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14148 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14149 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14150 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14151 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14152 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14153 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14154 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14155 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14156 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14157 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14158 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14159 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14162 /* Set up all the iWMMXt builtins. This is
14163 not called if TARGET_IWMMXT is zero. */
14165 static void
14166 arm_init_iwmmxt_builtins (void)
14168 const struct builtin_description * d;
14169 size_t i;
14170 tree endlink = void_list_node;
14172 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14173 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14174 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14176 tree int_ftype_int
14177 = build_function_type (integer_type_node,
14178 tree_cons (NULL_TREE, integer_type_node, endlink));
14179 tree v8qi_ftype_v8qi_v8qi_int
14180 = build_function_type (V8QI_type_node,
14181 tree_cons (NULL_TREE, V8QI_type_node,
14182 tree_cons (NULL_TREE, V8QI_type_node,
14183 tree_cons (NULL_TREE,
14184 integer_type_node,
14185 endlink))));
14186 tree v4hi_ftype_v4hi_int
14187 = build_function_type (V4HI_type_node,
14188 tree_cons (NULL_TREE, V4HI_type_node,
14189 tree_cons (NULL_TREE, integer_type_node,
14190 endlink)));
14191 tree v2si_ftype_v2si_int
14192 = build_function_type (V2SI_type_node,
14193 tree_cons (NULL_TREE, V2SI_type_node,
14194 tree_cons (NULL_TREE, integer_type_node,
14195 endlink)));
14196 tree v2si_ftype_di_di
14197 = build_function_type (V2SI_type_node,
14198 tree_cons (NULL_TREE, long_long_integer_type_node,
14199 tree_cons (NULL_TREE, long_long_integer_type_node,
14200 endlink)));
14201 tree di_ftype_di_int
14202 = build_function_type (long_long_integer_type_node,
14203 tree_cons (NULL_TREE, long_long_integer_type_node,
14204 tree_cons (NULL_TREE, integer_type_node,
14205 endlink)));
14206 tree di_ftype_di_int_int
14207 = build_function_type (long_long_integer_type_node,
14208 tree_cons (NULL_TREE, long_long_integer_type_node,
14209 tree_cons (NULL_TREE, integer_type_node,
14210 tree_cons (NULL_TREE,
14211 integer_type_node,
14212 endlink))));
14213 tree int_ftype_v8qi
14214 = build_function_type (integer_type_node,
14215 tree_cons (NULL_TREE, V8QI_type_node,
14216 endlink));
14217 tree int_ftype_v4hi
14218 = build_function_type (integer_type_node,
14219 tree_cons (NULL_TREE, V4HI_type_node,
14220 endlink));
14221 tree int_ftype_v2si
14222 = build_function_type (integer_type_node,
14223 tree_cons (NULL_TREE, V2SI_type_node,
14224 endlink));
14225 tree int_ftype_v8qi_int
14226 = build_function_type (integer_type_node,
14227 tree_cons (NULL_TREE, V8QI_type_node,
14228 tree_cons (NULL_TREE, integer_type_node,
14229 endlink)));
14230 tree int_ftype_v4hi_int
14231 = build_function_type (integer_type_node,
14232 tree_cons (NULL_TREE, V4HI_type_node,
14233 tree_cons (NULL_TREE, integer_type_node,
14234 endlink)));
14235 tree int_ftype_v2si_int
14236 = build_function_type (integer_type_node,
14237 tree_cons (NULL_TREE, V2SI_type_node,
14238 tree_cons (NULL_TREE, integer_type_node,
14239 endlink)));
14240 tree v8qi_ftype_v8qi_int_int
14241 = build_function_type (V8QI_type_node,
14242 tree_cons (NULL_TREE, V8QI_type_node,
14243 tree_cons (NULL_TREE, integer_type_node,
14244 tree_cons (NULL_TREE,
14245 integer_type_node,
14246 endlink))));
14247 tree v4hi_ftype_v4hi_int_int
14248 = build_function_type (V4HI_type_node,
14249 tree_cons (NULL_TREE, V4HI_type_node,
14250 tree_cons (NULL_TREE, integer_type_node,
14251 tree_cons (NULL_TREE,
14252 integer_type_node,
14253 endlink))));
14254 tree v2si_ftype_v2si_int_int
14255 = build_function_type (V2SI_type_node,
14256 tree_cons (NULL_TREE, V2SI_type_node,
14257 tree_cons (NULL_TREE, integer_type_node,
14258 tree_cons (NULL_TREE,
14259 integer_type_node,
14260 endlink))));
14261 /* Miscellaneous. */
14262 tree v8qi_ftype_v4hi_v4hi
14263 = build_function_type (V8QI_type_node,
14264 tree_cons (NULL_TREE, V4HI_type_node,
14265 tree_cons (NULL_TREE, V4HI_type_node,
14266 endlink)));
14267 tree v4hi_ftype_v2si_v2si
14268 = build_function_type (V4HI_type_node,
14269 tree_cons (NULL_TREE, V2SI_type_node,
14270 tree_cons (NULL_TREE, V2SI_type_node,
14271 endlink)));
14272 tree v2si_ftype_v4hi_v4hi
14273 = build_function_type (V2SI_type_node,
14274 tree_cons (NULL_TREE, V4HI_type_node,
14275 tree_cons (NULL_TREE, V4HI_type_node,
14276 endlink)));
14277 tree v2si_ftype_v8qi_v8qi
14278 = build_function_type (V2SI_type_node,
14279 tree_cons (NULL_TREE, V8QI_type_node,
14280 tree_cons (NULL_TREE, V8QI_type_node,
14281 endlink)));
14282 tree v4hi_ftype_v4hi_di
14283 = build_function_type (V4HI_type_node,
14284 tree_cons (NULL_TREE, V4HI_type_node,
14285 tree_cons (NULL_TREE,
14286 long_long_integer_type_node,
14287 endlink)));
14288 tree v2si_ftype_v2si_di
14289 = build_function_type (V2SI_type_node,
14290 tree_cons (NULL_TREE, V2SI_type_node,
14291 tree_cons (NULL_TREE,
14292 long_long_integer_type_node,
14293 endlink)));
14294 tree void_ftype_int_int
14295 = build_function_type (void_type_node,
14296 tree_cons (NULL_TREE, integer_type_node,
14297 tree_cons (NULL_TREE, integer_type_node,
14298 endlink)));
14299 tree di_ftype_void
14300 = build_function_type (long_long_unsigned_type_node, endlink);
14301 tree di_ftype_v8qi
14302 = build_function_type (long_long_integer_type_node,
14303 tree_cons (NULL_TREE, V8QI_type_node,
14304 endlink));
14305 tree di_ftype_v4hi
14306 = build_function_type (long_long_integer_type_node,
14307 tree_cons (NULL_TREE, V4HI_type_node,
14308 endlink));
14309 tree di_ftype_v2si
14310 = build_function_type (long_long_integer_type_node,
14311 tree_cons (NULL_TREE, V2SI_type_node,
14312 endlink));
14313 tree v2si_ftype_v4hi
14314 = build_function_type (V2SI_type_node,
14315 tree_cons (NULL_TREE, V4HI_type_node,
14316 endlink));
14317 tree v4hi_ftype_v8qi
14318 = build_function_type (V4HI_type_node,
14319 tree_cons (NULL_TREE, V8QI_type_node,
14320 endlink));
14322 tree di_ftype_di_v4hi_v4hi
14323 = build_function_type (long_long_unsigned_type_node,
14324 tree_cons (NULL_TREE,
14325 long_long_unsigned_type_node,
14326 tree_cons (NULL_TREE, V4HI_type_node,
14327 tree_cons (NULL_TREE,
14328 V4HI_type_node,
14329 endlink))));
14331 tree di_ftype_v4hi_v4hi
14332 = build_function_type (long_long_unsigned_type_node,
14333 tree_cons (NULL_TREE, V4HI_type_node,
14334 tree_cons (NULL_TREE, V4HI_type_node,
14335 endlink)));
14337 /* Normal vector binops. */
14338 tree v8qi_ftype_v8qi_v8qi
14339 = build_function_type (V8QI_type_node,
14340 tree_cons (NULL_TREE, V8QI_type_node,
14341 tree_cons (NULL_TREE, V8QI_type_node,
14342 endlink)));
14343 tree v4hi_ftype_v4hi_v4hi
14344 = build_function_type (V4HI_type_node,
14345 tree_cons (NULL_TREE, V4HI_type_node,
14346 tree_cons (NULL_TREE, V4HI_type_node,
14347 endlink)));
14348 tree v2si_ftype_v2si_v2si
14349 = build_function_type (V2SI_type_node,
14350 tree_cons (NULL_TREE, V2SI_type_node,
14351 tree_cons (NULL_TREE, V2SI_type_node,
14352 endlink)));
14353 tree di_ftype_di_di
14354 = build_function_type (long_long_unsigned_type_node,
14355 tree_cons (NULL_TREE, long_long_unsigned_type_node,
14356 tree_cons (NULL_TREE,
14357 long_long_unsigned_type_node,
14358 endlink)));
14360 /* Add all builtins that are more or less simple operations on two
14361 operands. */
14362 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14364 /* Use one of the operands; the target can have a different mode for
14365 mask-generating compares. */
14366 enum machine_mode mode;
14367 tree type;
14369 if (d->name == 0)
14370 continue;
14372 mode = insn_data[d->icode].operand[1].mode;
14374 switch (mode)
14376 case V8QImode:
14377 type = v8qi_ftype_v8qi_v8qi;
14378 break;
14379 case V4HImode:
14380 type = v4hi_ftype_v4hi_v4hi;
14381 break;
14382 case V2SImode:
14383 type = v2si_ftype_v2si_v2si;
14384 break;
14385 case DImode:
14386 type = di_ftype_di_di;
14387 break;
14389 default:
14390 gcc_unreachable ();
14393 def_mbuiltin (d->mask, d->name, type, d->code);
14396 /* Add the remaining MMX insns with somewhat more complicated types. */
14397 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
14398 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
14399 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
14401 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
14402 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
14403 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
14404 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
14405 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
14406 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
14408 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
14409 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
14410 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
14411 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
14412 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
14413 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
14415 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
14416 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
14417 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
14418 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
14419 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
14420 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
14422 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
14423 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
14424 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
14425 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
14426 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
14427 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
14429 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
14431 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
14432 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
14433 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
14434 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
14436 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
14437 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
14438 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
14439 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
14440 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
14441 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
14442 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
14443 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
14444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
14446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
14447 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
14448 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
14450 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
14451 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
14452 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
14454 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
14455 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
14456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
14457 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
14458 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
14459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
14461 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
14462 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
14463 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
14464 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
14465 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
14466 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
14467 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
14468 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
14469 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
14470 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
14471 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
14472 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
14474 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
14475 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
14476 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
14477 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
14479 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
14480 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
14481 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
14482 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
14483 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
14484 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
14485 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
14488 static void
14489 arm_init_tls_builtins (void)
14491 tree ftype, decl;
14493 ftype = build_function_type (ptr_type_node, void_list_node);
14494 decl = add_builtin_function ("__builtin_thread_pointer", ftype,
14495 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
14496 NULL, NULL_TREE);
14497 TREE_NOTHROW (decl) = 1;
14498 TREE_READONLY (decl) = 1;
14501 typedef enum {
14502 T_V8QI = 0x0001,
14503 T_V4HI = 0x0002,
14504 T_V2SI = 0x0004,
14505 T_V2SF = 0x0008,
14506 T_DI = 0x0010,
14507 T_V16QI = 0x0020,
14508 T_V8HI = 0x0040,
14509 T_V4SI = 0x0080,
14510 T_V4SF = 0x0100,
14511 T_V2DI = 0x0200,
14512 T_TI = 0x0400,
14513 T_EI = 0x0800,
14514 T_OI = 0x1000
14515 } neon_builtin_type_bits;
14517 #define v8qi_UP T_V8QI
14518 #define v4hi_UP T_V4HI
14519 #define v2si_UP T_V2SI
14520 #define v2sf_UP T_V2SF
14521 #define di_UP T_DI
14522 #define v16qi_UP T_V16QI
14523 #define v8hi_UP T_V8HI
14524 #define v4si_UP T_V4SI
14525 #define v4sf_UP T_V4SF
14526 #define v2di_UP T_V2DI
14527 #define ti_UP T_TI
14528 #define ei_UP T_EI
14529 #define oi_UP T_OI
14531 #define UP(X) X##_UP
14533 #define T_MAX 13
14535 typedef enum {
14536 NEON_BINOP,
14537 NEON_TERNOP,
14538 NEON_UNOP,
14539 NEON_GETLANE,
14540 NEON_SETLANE,
14541 NEON_CREATE,
14542 NEON_DUP,
14543 NEON_DUPLANE,
14544 NEON_COMBINE,
14545 NEON_SPLIT,
14546 NEON_LANEMUL,
14547 NEON_LANEMULL,
14548 NEON_LANEMULH,
14549 NEON_LANEMAC,
14550 NEON_SCALARMUL,
14551 NEON_SCALARMULL,
14552 NEON_SCALARMULH,
14553 NEON_SCALARMAC,
14554 NEON_CONVERT,
14555 NEON_FIXCONV,
14556 NEON_SELECT,
14557 NEON_RESULTPAIR,
14558 NEON_REINTERP,
14559 NEON_VTBL,
14560 NEON_VTBX,
14561 NEON_LOAD1,
14562 NEON_LOAD1LANE,
14563 NEON_STORE1,
14564 NEON_STORE1LANE,
14565 NEON_LOADSTRUCT,
14566 NEON_LOADSTRUCTLANE,
14567 NEON_STORESTRUCT,
14568 NEON_STORESTRUCTLANE,
14569 NEON_LOGICBINOP,
14570 NEON_SHIFTINSERT,
14571 NEON_SHIFTIMM,
14572 NEON_SHIFTACC
14573 } neon_itype;
14575 typedef struct {
14576 const char *name;
14577 const neon_itype itype;
14578 const neon_builtin_type_bits bits;
14579 const enum insn_code codes[T_MAX];
14580 const unsigned int num_vars;
14581 unsigned int base_fcode;
14582 } neon_builtin_datum;
14584 #define CF(N,X) CODE_FOR_neon_##N##X
14586 #define VAR1(T, N, A) \
14587 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14588 #define VAR2(T, N, A, B) \
14589 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14590 #define VAR3(T, N, A, B, C) \
14591 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14592 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14593 #define VAR4(T, N, A, B, C, D) \
14594 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14595 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14596 #define VAR5(T, N, A, B, C, D, E) \
14597 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14598 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14599 #define VAR6(T, N, A, B, C, D, E, F) \
14600 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14601 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14602 #define VAR7(T, N, A, B, C, D, E, F, G) \
14603 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14604 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14605 CF (N, G) }, 7, 0
14606 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14607 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14608 | UP (H), \
14609 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14610 CF (N, G), CF (N, H) }, 8, 0
14611 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14612 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14613 | UP (H) | UP (I), \
14614 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14615 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14616 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14617 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14618 | UP (H) | UP (I) | UP (J), \
14619 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14620 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14622 /* The mode entries in the following table correspond to the "key" type of the
14623 instruction variant, i.e. equivalent to that which would be specified after
14624 the assembler mnemonic, which usually refers to the last vector operand.
14625 (Signed/unsigned/polynomial types are not differentiated between though, and
14626 are all mapped onto the same mode for a given element size.) The modes
14627 listed per instruction should be the same as those defined for that
14628 instruction's pattern in neon.md.
14629 WARNING: Variants should be listed in the same increasing order as
14630 neon_builtin_type_bits. */
14632 static neon_builtin_datum neon_builtin_data[] =
14634 { VAR10 (BINOP, vadd,
14635 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14636 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
14637 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
14638 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14639 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14640 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
14641 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14642 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14643 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
14644 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14645 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
14646 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
14647 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
14648 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
14649 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
14650 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
14651 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
14652 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
14653 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
14654 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
14655 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
14656 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
14657 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14658 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14659 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14660 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
14661 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
14662 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
14663 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14664 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14665 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14666 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
14667 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14668 { VAR10 (BINOP, vsub,
14669 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14670 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
14671 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
14672 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14673 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14674 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
14675 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14676 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14677 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14678 { VAR2 (BINOP, vcage, v2sf, v4sf) },
14679 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
14680 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14681 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14682 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
14683 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14684 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
14685 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14686 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14687 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
14688 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14689 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14690 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
14691 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
14692 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
14693 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
14694 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14695 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14696 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14697 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14698 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14699 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14700 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14701 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14702 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
14703 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
14704 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
14705 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14706 /* FIXME: vget_lane supports more variants than this! */
14707 { VAR10 (GETLANE, vget_lane,
14708 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14709 { VAR10 (SETLANE, vset_lane,
14710 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14711 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
14712 { VAR10 (DUP, vdup_n,
14713 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14714 { VAR10 (DUPLANE, vdup_lane,
14715 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14716 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
14717 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
14718 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
14719 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
14720 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
14721 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
14722 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
14723 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14724 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14725 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
14726 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
14727 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14728 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
14729 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
14730 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14731 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14732 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
14733 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
14734 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14735 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
14736 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
14737 { VAR10 (BINOP, vext,
14738 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14739 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14740 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
14741 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
14742 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
14743 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
14744 { VAR10 (SELECT, vbsl,
14745 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14746 { VAR1 (VTBL, vtbl1, v8qi) },
14747 { VAR1 (VTBL, vtbl2, v8qi) },
14748 { VAR1 (VTBL, vtbl3, v8qi) },
14749 { VAR1 (VTBL, vtbl4, v8qi) },
14750 { VAR1 (VTBX, vtbx1, v8qi) },
14751 { VAR1 (VTBX, vtbx2, v8qi) },
14752 { VAR1 (VTBX, vtbx3, v8qi) },
14753 { VAR1 (VTBX, vtbx4, v8qi) },
14754 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14755 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14756 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14757 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
14758 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
14759 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
14760 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
14761 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
14762 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
14763 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
14764 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
14765 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
14766 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
14767 { VAR10 (LOAD1, vld1,
14768 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14769 { VAR10 (LOAD1LANE, vld1_lane,
14770 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14771 { VAR10 (LOAD1, vld1_dup,
14772 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14773 { VAR10 (STORE1, vst1,
14774 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14775 { VAR10 (STORE1LANE, vst1_lane,
14776 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14777 { VAR9 (LOADSTRUCT,
14778 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14779 { VAR7 (LOADSTRUCTLANE, vld2_lane,
14780 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14781 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
14782 { VAR9 (STORESTRUCT, vst2,
14783 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14784 { VAR7 (STORESTRUCTLANE, vst2_lane,
14785 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14786 { VAR9 (LOADSTRUCT,
14787 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14788 { VAR7 (LOADSTRUCTLANE, vld3_lane,
14789 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14790 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
14791 { VAR9 (STORESTRUCT, vst3,
14792 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14793 { VAR7 (STORESTRUCTLANE, vst3_lane,
14794 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14795 { VAR9 (LOADSTRUCT, vld4,
14796 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14797 { VAR7 (LOADSTRUCTLANE, vld4_lane,
14798 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14799 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
14800 { VAR9 (STORESTRUCT, vst4,
14801 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14802 { VAR7 (STORESTRUCTLANE, vst4_lane,
14803 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14804 { VAR10 (LOGICBINOP, vand,
14805 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14806 { VAR10 (LOGICBINOP, vorr,
14807 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14808 { VAR10 (BINOP, veor,
14809 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14810 { VAR10 (LOGICBINOP, vbic,
14811 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14812 { VAR10 (LOGICBINOP, vorn,
14813 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
14816 #undef CF
14817 #undef VAR1
14818 #undef VAR2
14819 #undef VAR3
14820 #undef VAR4
14821 #undef VAR5
14822 #undef VAR6
14823 #undef VAR7
14824 #undef VAR8
14825 #undef VAR9
14826 #undef VAR10
14828 static void
14829 arm_init_neon_builtins (void)
14831 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
14833 /* Create distinguished type nodes for NEON vector element types,
14834 and pointers to values of such types, so we can detect them later. */
14835 tree neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14836 tree neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14837 tree neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14838 tree neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14839 tree neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
14840 tree neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
14841 tree neon_float_type_node = make_node (REAL_TYPE);
14843 tree intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
14844 tree intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
14845 tree intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
14846 tree intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
14847 tree float_pointer_node = build_pointer_type (neon_float_type_node);
14849 /* Next create constant-qualified versions of the above types. */
14850 tree const_intQI_node = build_qualified_type (neon_intQI_type_node,
14851 TYPE_QUAL_CONST);
14852 tree const_intHI_node = build_qualified_type (neon_intHI_type_node,
14853 TYPE_QUAL_CONST);
14854 tree const_intSI_node = build_qualified_type (neon_intSI_type_node,
14855 TYPE_QUAL_CONST);
14856 tree const_intDI_node = build_qualified_type (neon_intDI_type_node,
14857 TYPE_QUAL_CONST);
14858 tree const_float_node = build_qualified_type (neon_float_type_node,
14859 TYPE_QUAL_CONST);
14861 tree const_intQI_pointer_node = build_pointer_type (const_intQI_node);
14862 tree const_intHI_pointer_node = build_pointer_type (const_intHI_node);
14863 tree const_intSI_pointer_node = build_pointer_type (const_intSI_node);
14864 tree const_intDI_pointer_node = build_pointer_type (const_intDI_node);
14865 tree const_float_pointer_node = build_pointer_type (const_float_node);
14867 /* Now create vector types based on our NEON element types. */
14868 /* 64-bit vectors. */
14869 tree V8QI_type_node =
14870 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
14871 tree V4HI_type_node =
14872 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
14873 tree V2SI_type_node =
14874 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
14875 tree V2SF_type_node =
14876 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
14877 /* 128-bit vectors. */
14878 tree V16QI_type_node =
14879 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
14880 tree V8HI_type_node =
14881 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
14882 tree V4SI_type_node =
14883 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
14884 tree V4SF_type_node =
14885 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
14886 tree V2DI_type_node =
14887 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
14889 /* Unsigned integer types for various mode sizes. */
14890 tree intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
14891 tree intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
14892 tree intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
14893 tree intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
14895 /* Opaque integer types for structures of vectors. */
14896 tree intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
14897 tree intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
14898 tree intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
14899 tree intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
14901 /* Pointers to vector types. */
14902 tree V8QI_pointer_node = build_pointer_type (V8QI_type_node);
14903 tree V4HI_pointer_node = build_pointer_type (V4HI_type_node);
14904 tree V2SI_pointer_node = build_pointer_type (V2SI_type_node);
14905 tree V2SF_pointer_node = build_pointer_type (V2SF_type_node);
14906 tree V16QI_pointer_node = build_pointer_type (V16QI_type_node);
14907 tree V8HI_pointer_node = build_pointer_type (V8HI_type_node);
14908 tree V4SI_pointer_node = build_pointer_type (V4SI_type_node);
14909 tree V4SF_pointer_node = build_pointer_type (V4SF_type_node);
14910 tree V2DI_pointer_node = build_pointer_type (V2DI_type_node);
14912 /* Operations which return results as pairs. */
14913 tree void_ftype_pv8qi_v8qi_v8qi =
14914 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
14915 V8QI_type_node, NULL);
14916 tree void_ftype_pv4hi_v4hi_v4hi =
14917 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
14918 V4HI_type_node, NULL);
14919 tree void_ftype_pv2si_v2si_v2si =
14920 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
14921 V2SI_type_node, NULL);
14922 tree void_ftype_pv2sf_v2sf_v2sf =
14923 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
14924 V2SF_type_node, NULL);
14925 tree void_ftype_pdi_di_di =
14926 build_function_type_list (void_type_node, intDI_pointer_node,
14927 neon_intDI_type_node, neon_intDI_type_node, NULL);
14928 tree void_ftype_pv16qi_v16qi_v16qi =
14929 build_function_type_list (void_type_node, V16QI_pointer_node,
14930 V16QI_type_node, V16QI_type_node, NULL);
14931 tree void_ftype_pv8hi_v8hi_v8hi =
14932 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
14933 V8HI_type_node, NULL);
14934 tree void_ftype_pv4si_v4si_v4si =
14935 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
14936 V4SI_type_node, NULL);
14937 tree void_ftype_pv4sf_v4sf_v4sf =
14938 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
14939 V4SF_type_node, NULL);
14940 tree void_ftype_pv2di_v2di_v2di =
14941 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
14942 V2DI_type_node, NULL);
14944 tree reinterp_ftype_dreg[5][5];
14945 tree reinterp_ftype_qreg[5][5];
14946 tree dreg_types[5], qreg_types[5];
14948 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
14949 layout_type (neon_float_type_node);
14951 /* Define typedefs which exactly correspond to the modes we are basing vector
14952 types on. If you change these names you'll need to change
14953 the table used by arm_mangle_type too. */
14954 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
14955 "__builtin_neon_qi");
14956 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
14957 "__builtin_neon_hi");
14958 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
14959 "__builtin_neon_si");
14960 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
14961 "__builtin_neon_sf");
14962 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
14963 "__builtin_neon_di");
14965 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
14966 "__builtin_neon_poly8");
14967 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
14968 "__builtin_neon_poly16");
14969 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
14970 "__builtin_neon_uqi");
14971 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
14972 "__builtin_neon_uhi");
14973 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
14974 "__builtin_neon_usi");
14975 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
14976 "__builtin_neon_udi");
14978 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
14979 "__builtin_neon_ti");
14980 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
14981 "__builtin_neon_ei");
14982 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
14983 "__builtin_neon_oi");
14984 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
14985 "__builtin_neon_ci");
14986 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
14987 "__builtin_neon_xi");
14989 dreg_types[0] = V8QI_type_node;
14990 dreg_types[1] = V4HI_type_node;
14991 dreg_types[2] = V2SI_type_node;
14992 dreg_types[3] = V2SF_type_node;
14993 dreg_types[4] = neon_intDI_type_node;
14995 qreg_types[0] = V16QI_type_node;
14996 qreg_types[1] = V8HI_type_node;
14997 qreg_types[2] = V4SI_type_node;
14998 qreg_types[3] = V4SF_type_node;
14999 qreg_types[4] = V2DI_type_node;
15001 for (i = 0; i < 5; i++)
15003 int j;
15004 for (j = 0; j < 5; j++)
15006 reinterp_ftype_dreg[i][j]
15007 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15008 reinterp_ftype_qreg[i][j]
15009 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15013 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15015 neon_builtin_datum *d = &neon_builtin_data[i];
15016 unsigned int j, codeidx = 0;
15018 d->base_fcode = fcode;
15020 for (j = 0; j < T_MAX; j++)
15022 const char* const modenames[] = {
15023 "v8qi", "v4hi", "v2si", "v2sf", "di",
15024 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15026 char namebuf[60];
15027 tree ftype = NULL;
15028 enum insn_code icode;
15029 int is_load = 0, is_store = 0;
15031 if ((d->bits & (1 << j)) == 0)
15032 continue;
15034 icode = d->codes[codeidx++];
15036 switch (d->itype)
15038 case NEON_LOAD1:
15039 case NEON_LOAD1LANE:
15040 case NEON_LOADSTRUCT:
15041 case NEON_LOADSTRUCTLANE:
15042 is_load = 1;
15043 /* Fall through. */
15044 case NEON_STORE1:
15045 case NEON_STORE1LANE:
15046 case NEON_STORESTRUCT:
15047 case NEON_STORESTRUCTLANE:
15048 if (!is_load)
15049 is_store = 1;
15050 /* Fall through. */
15051 case NEON_UNOP:
15052 case NEON_BINOP:
15053 case NEON_LOGICBINOP:
15054 case NEON_SHIFTINSERT:
15055 case NEON_TERNOP:
15056 case NEON_GETLANE:
15057 case NEON_SETLANE:
15058 case NEON_CREATE:
15059 case NEON_DUP:
15060 case NEON_DUPLANE:
15061 case NEON_SHIFTIMM:
15062 case NEON_SHIFTACC:
15063 case NEON_COMBINE:
15064 case NEON_SPLIT:
15065 case NEON_CONVERT:
15066 case NEON_FIXCONV:
15067 case NEON_LANEMUL:
15068 case NEON_LANEMULL:
15069 case NEON_LANEMULH:
15070 case NEON_LANEMAC:
15071 case NEON_SCALARMUL:
15072 case NEON_SCALARMULL:
15073 case NEON_SCALARMULH:
15074 case NEON_SCALARMAC:
15075 case NEON_SELECT:
15076 case NEON_VTBL:
15077 case NEON_VTBX:
15079 int k;
15080 tree return_type = void_type_node, args = void_list_node;
15082 /* Build a function type directly from the insn_data for this
15083 builtin. The build_function_type() function takes care of
15084 removing duplicates for us. */
15085 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15087 tree eltype;
15089 if (is_load && k == 1)
15091 /* Neon load patterns always have the memory operand
15092 (a SImode pointer) in the operand 1 position. We
15093 want a const pointer to the element type in that
15094 position. */
15095 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15097 switch (1 << j)
15099 case T_V8QI:
15100 case T_V16QI:
15101 eltype = const_intQI_pointer_node;
15102 break;
15104 case T_V4HI:
15105 case T_V8HI:
15106 eltype = const_intHI_pointer_node;
15107 break;
15109 case T_V2SI:
15110 case T_V4SI:
15111 eltype = const_intSI_pointer_node;
15112 break;
15114 case T_V2SF:
15115 case T_V4SF:
15116 eltype = const_float_pointer_node;
15117 break;
15119 case T_DI:
15120 case T_V2DI:
15121 eltype = const_intDI_pointer_node;
15122 break;
15124 default: gcc_unreachable ();
15127 else if (is_store && k == 0)
15129 /* Similarly, Neon store patterns use operand 0 as
15130 the memory location to store to (a SImode pointer).
15131 Use a pointer to the element type of the store in
15132 that position. */
15133 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15135 switch (1 << j)
15137 case T_V8QI:
15138 case T_V16QI:
15139 eltype = intQI_pointer_node;
15140 break;
15142 case T_V4HI:
15143 case T_V8HI:
15144 eltype = intHI_pointer_node;
15145 break;
15147 case T_V2SI:
15148 case T_V4SI:
15149 eltype = intSI_pointer_node;
15150 break;
15152 case T_V2SF:
15153 case T_V4SF:
15154 eltype = float_pointer_node;
15155 break;
15157 case T_DI:
15158 case T_V2DI:
15159 eltype = intDI_pointer_node;
15160 break;
15162 default: gcc_unreachable ();
15165 else
15167 switch (insn_data[icode].operand[k].mode)
15169 case VOIDmode: eltype = void_type_node; break;
15170 /* Scalars. */
15171 case QImode: eltype = neon_intQI_type_node; break;
15172 case HImode: eltype = neon_intHI_type_node; break;
15173 case SImode: eltype = neon_intSI_type_node; break;
15174 case SFmode: eltype = neon_float_type_node; break;
15175 case DImode: eltype = neon_intDI_type_node; break;
15176 case TImode: eltype = intTI_type_node; break;
15177 case EImode: eltype = intEI_type_node; break;
15178 case OImode: eltype = intOI_type_node; break;
15179 case CImode: eltype = intCI_type_node; break;
15180 case XImode: eltype = intXI_type_node; break;
15181 /* 64-bit vectors. */
15182 case V8QImode: eltype = V8QI_type_node; break;
15183 case V4HImode: eltype = V4HI_type_node; break;
15184 case V2SImode: eltype = V2SI_type_node; break;
15185 case V2SFmode: eltype = V2SF_type_node; break;
15186 /* 128-bit vectors. */
15187 case V16QImode: eltype = V16QI_type_node; break;
15188 case V8HImode: eltype = V8HI_type_node; break;
15189 case V4SImode: eltype = V4SI_type_node; break;
15190 case V4SFmode: eltype = V4SF_type_node; break;
15191 case V2DImode: eltype = V2DI_type_node; break;
15192 default: gcc_unreachable ();
15196 if (k == 0 && !is_store)
15197 return_type = eltype;
15198 else
15199 args = tree_cons (NULL_TREE, eltype, args);
15202 ftype = build_function_type (return_type, args);
15204 break;
15206 case NEON_RESULTPAIR:
15208 switch (insn_data[icode].operand[1].mode)
15210 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
15211 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
15212 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
15213 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
15214 case DImode: ftype = void_ftype_pdi_di_di; break;
15215 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
15216 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
15217 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
15218 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
15219 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
15220 default: gcc_unreachable ();
15223 break;
15225 case NEON_REINTERP:
15227 /* We iterate over 5 doubleword types, then 5 quadword
15228 types. */
15229 int rhs = j % 5;
15230 switch (insn_data[icode].operand[0].mode)
15232 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
15233 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
15234 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
15235 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
15236 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
15237 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
15238 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
15239 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
15240 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
15241 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
15242 default: gcc_unreachable ();
15245 break;
15247 default:
15248 gcc_unreachable ();
15251 gcc_assert (ftype != NULL);
15253 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
15255 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
15256 NULL_TREE);
15261 static void
15262 arm_init_builtins (void)
15264 arm_init_tls_builtins ();
15266 if (TARGET_REALLY_IWMMXT)
15267 arm_init_iwmmxt_builtins ();
15269 if (TARGET_NEON)
15270 arm_init_neon_builtins ();
15273 /* Errors in the source file can cause expand_expr to return const0_rtx
15274 where we expect a vector. To avoid crashing, use one of the vector
15275 clear instructions. */
15277 static rtx
15278 safe_vector_operand (rtx x, enum machine_mode mode)
15280 if (x != const0_rtx)
15281 return x;
15282 x = gen_reg_rtx (mode);
15284 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
15285 : gen_rtx_SUBREG (DImode, x, 0)));
15286 return x;
15289 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15291 static rtx
15292 arm_expand_binop_builtin (enum insn_code icode,
15293 tree exp, rtx target)
15295 rtx pat;
15296 tree arg0 = CALL_EXPR_ARG (exp, 0);
15297 tree arg1 = CALL_EXPR_ARG (exp, 1);
15298 rtx op0 = expand_normal (arg0);
15299 rtx op1 = expand_normal (arg1);
15300 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15301 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15302 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15304 if (VECTOR_MODE_P (mode0))
15305 op0 = safe_vector_operand (op0, mode0);
15306 if (VECTOR_MODE_P (mode1))
15307 op1 = safe_vector_operand (op1, mode1);
15309 if (! target
15310 || GET_MODE (target) != tmode
15311 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15312 target = gen_reg_rtx (tmode);
15314 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
15316 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15317 op0 = copy_to_mode_reg (mode0, op0);
15318 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15319 op1 = copy_to_mode_reg (mode1, op1);
15321 pat = GEN_FCN (icode) (target, op0, op1);
15322 if (! pat)
15323 return 0;
15324 emit_insn (pat);
15325 return target;
15328 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15330 static rtx
15331 arm_expand_unop_builtin (enum insn_code icode,
15332 tree exp, rtx target, int do_load)
15334 rtx pat;
15335 tree arg0 = CALL_EXPR_ARG (exp, 0);
15336 rtx op0 = expand_normal (arg0);
15337 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15338 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15340 if (! target
15341 || GET_MODE (target) != tmode
15342 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15343 target = gen_reg_rtx (tmode);
15344 if (do_load)
15345 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15346 else
15348 if (VECTOR_MODE_P (mode0))
15349 op0 = safe_vector_operand (op0, mode0);
15351 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15352 op0 = copy_to_mode_reg (mode0, op0);
15355 pat = GEN_FCN (icode) (target, op0);
15356 if (! pat)
15357 return 0;
15358 emit_insn (pat);
15359 return target;
15362 static int
15363 neon_builtin_compare (const void *a, const void *b)
15365 const neon_builtin_datum *key = a;
15366 const neon_builtin_datum *memb = b;
15367 unsigned int soughtcode = key->base_fcode;
15369 if (soughtcode >= memb->base_fcode
15370 && soughtcode < memb->base_fcode + memb->num_vars)
15371 return 0;
15372 else if (soughtcode < memb->base_fcode)
15373 return -1;
15374 else
15375 return 1;
15378 static enum insn_code
15379 locate_neon_builtin_icode (int fcode, neon_itype *itype)
15381 neon_builtin_datum key, *found;
15382 int idx;
15384 key.base_fcode = fcode;
15385 found = bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
15386 sizeof (neon_builtin_data[0]), neon_builtin_compare);
15387 gcc_assert (found);
15388 idx = fcode - (int) found->base_fcode;
15389 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
15391 if (itype)
15392 *itype = found->itype;
15394 return found->codes[idx];
15397 typedef enum {
15398 NEON_ARG_COPY_TO_REG,
15399 NEON_ARG_CONSTANT,
15400 NEON_ARG_STOP
15401 } builtin_arg;
15403 #define NEON_MAX_BUILTIN_ARGS 5
15405 /* Expand a Neon builtin. */
15406 static rtx
15407 arm_expand_neon_args (rtx target, int icode, int have_retval,
15408 tree exp, ...)
15410 va_list ap;
15411 rtx pat;
15412 tree arg[NEON_MAX_BUILTIN_ARGS];
15413 rtx op[NEON_MAX_BUILTIN_ARGS];
15414 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15415 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
15416 int argc = 0;
15418 if (have_retval
15419 && (!target
15420 || GET_MODE (target) != tmode
15421 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
15422 target = gen_reg_rtx (tmode);
15424 va_start (ap, exp);
15426 for (;;)
15428 builtin_arg thisarg = va_arg (ap, int);
15430 if (thisarg == NEON_ARG_STOP)
15431 break;
15432 else
15434 arg[argc] = CALL_EXPR_ARG (exp, argc);
15435 op[argc] = expand_normal (arg[argc]);
15436 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
15438 switch (thisarg)
15440 case NEON_ARG_COPY_TO_REG:
15441 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15442 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15443 (op[argc], mode[argc]))
15444 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
15445 break;
15447 case NEON_ARG_CONSTANT:
15448 /* FIXME: This error message is somewhat unhelpful. */
15449 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15450 (op[argc], mode[argc]))
15451 error ("argument must be a constant");
15452 break;
15454 case NEON_ARG_STOP:
15455 gcc_unreachable ();
15458 argc++;
15462 va_end (ap);
15464 if (have_retval)
15465 switch (argc)
15467 case 1:
15468 pat = GEN_FCN (icode) (target, op[0]);
15469 break;
15471 case 2:
15472 pat = GEN_FCN (icode) (target, op[0], op[1]);
15473 break;
15475 case 3:
15476 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
15477 break;
15479 case 4:
15480 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
15481 break;
15483 case 5:
15484 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
15485 break;
15487 default:
15488 gcc_unreachable ();
15490 else
15491 switch (argc)
15493 case 1:
15494 pat = GEN_FCN (icode) (op[0]);
15495 break;
15497 case 2:
15498 pat = GEN_FCN (icode) (op[0], op[1]);
15499 break;
15501 case 3:
15502 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15503 break;
15505 case 4:
15506 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15507 break;
15509 case 5:
15510 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
15511 break;
15513 default:
15514 gcc_unreachable ();
15517 if (!pat)
15518 return 0;
15520 emit_insn (pat);
15522 return target;
15525 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15526 constants defined per-instruction or per instruction-variant. Instead, the
15527 required info is looked up in the table neon_builtin_data. */
15528 static rtx
15529 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
15531 neon_itype itype;
15532 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
15534 switch (itype)
15536 case NEON_UNOP:
15537 case NEON_CONVERT:
15538 case NEON_DUPLANE:
15539 return arm_expand_neon_args (target, icode, 1, exp,
15540 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15542 case NEON_BINOP:
15543 case NEON_SETLANE:
15544 case NEON_SCALARMUL:
15545 case NEON_SCALARMULL:
15546 case NEON_SCALARMULH:
15547 case NEON_SHIFTINSERT:
15548 case NEON_LOGICBINOP:
15549 return arm_expand_neon_args (target, icode, 1, exp,
15550 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15551 NEON_ARG_STOP);
15553 case NEON_TERNOP:
15554 return arm_expand_neon_args (target, icode, 1, exp,
15555 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15556 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15558 case NEON_GETLANE:
15559 case NEON_FIXCONV:
15560 case NEON_SHIFTIMM:
15561 return arm_expand_neon_args (target, icode, 1, exp,
15562 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
15563 NEON_ARG_STOP);
15565 case NEON_CREATE:
15566 return arm_expand_neon_args (target, icode, 1, exp,
15567 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15569 case NEON_DUP:
15570 case NEON_SPLIT:
15571 case NEON_REINTERP:
15572 return arm_expand_neon_args (target, icode, 1, exp,
15573 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15575 case NEON_COMBINE:
15576 case NEON_VTBL:
15577 return arm_expand_neon_args (target, icode, 1, exp,
15578 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15580 case NEON_RESULTPAIR:
15581 return arm_expand_neon_args (target, icode, 0, exp,
15582 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15583 NEON_ARG_STOP);
15585 case NEON_LANEMUL:
15586 case NEON_LANEMULL:
15587 case NEON_LANEMULH:
15588 return arm_expand_neon_args (target, icode, 1, exp,
15589 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15590 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15592 case NEON_LANEMAC:
15593 return arm_expand_neon_args (target, icode, 1, exp,
15594 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15595 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15597 case NEON_SHIFTACC:
15598 return arm_expand_neon_args (target, icode, 1, exp,
15599 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15600 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15602 case NEON_SCALARMAC:
15603 return arm_expand_neon_args (target, icode, 1, exp,
15604 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15605 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15607 case NEON_SELECT:
15608 case NEON_VTBX:
15609 return arm_expand_neon_args (target, icode, 1, exp,
15610 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15611 NEON_ARG_STOP);
15613 case NEON_LOAD1:
15614 case NEON_LOADSTRUCT:
15615 return arm_expand_neon_args (target, icode, 1, exp,
15616 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15618 case NEON_LOAD1LANE:
15619 case NEON_LOADSTRUCTLANE:
15620 return arm_expand_neon_args (target, icode, 1, exp,
15621 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15622 NEON_ARG_STOP);
15624 case NEON_STORE1:
15625 case NEON_STORESTRUCT:
15626 return arm_expand_neon_args (target, icode, 0, exp,
15627 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15629 case NEON_STORE1LANE:
15630 case NEON_STORESTRUCTLANE:
15631 return arm_expand_neon_args (target, icode, 0, exp,
15632 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15633 NEON_ARG_STOP);
15636 gcc_unreachable ();
15639 /* Emit code to reinterpret one Neon type as another, without altering bits. */
15640 void
15641 neon_reinterpret (rtx dest, rtx src)
15643 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
15646 /* Emit code to place a Neon pair result in memory locations (with equal
15647 registers). */
15648 void
15649 neon_emit_pair_result_insn (enum machine_mode mode,
15650 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
15651 rtx op1, rtx op2)
15653 rtx mem = gen_rtx_MEM (mode, destaddr);
15654 rtx tmp1 = gen_reg_rtx (mode);
15655 rtx tmp2 = gen_reg_rtx (mode);
15657 emit_insn (intfn (tmp1, op1, tmp2, op2));
15659 emit_move_insn (mem, tmp1);
15660 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
15661 emit_move_insn (mem, tmp2);
15664 /* Set up operands for a register copy from src to dest, taking care not to
15665 clobber registers in the process.
15666 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
15667 be called with a large N, so that should be OK. */
15669 void
15670 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
15672 unsigned int copied = 0, opctr = 0;
15673 unsigned int done = (1 << count) - 1;
15674 unsigned int i, j;
15676 while (copied != done)
15678 for (i = 0; i < count; i++)
15680 int good = 1;
15682 for (j = 0; good && j < count; j++)
15683 if (i != j && (copied & (1 << j)) == 0
15684 && reg_overlap_mentioned_p (src[j], dest[i]))
15685 good = 0;
15687 if (good)
15689 operands[opctr++] = dest[i];
15690 operands[opctr++] = src[i];
15691 copied |= 1 << i;
15696 gcc_assert (opctr == count * 2);
15699 /* Expand an expression EXP that calls a built-in function,
15700 with result going to TARGET if that's convenient
15701 (and in mode MODE if that's convenient).
15702 SUBTARGET may be used as the target for computing one of EXP's operands.
15703 IGNORE is nonzero if the value is to be ignored. */
15705 static rtx
15706 arm_expand_builtin (tree exp,
15707 rtx target,
15708 rtx subtarget ATTRIBUTE_UNUSED,
15709 enum machine_mode mode ATTRIBUTE_UNUSED,
15710 int ignore ATTRIBUTE_UNUSED)
15712 const struct builtin_description * d;
15713 enum insn_code icode;
15714 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15715 tree arg0;
15716 tree arg1;
15717 tree arg2;
15718 rtx op0;
15719 rtx op1;
15720 rtx op2;
15721 rtx pat;
15722 int fcode = DECL_FUNCTION_CODE (fndecl);
15723 size_t i;
15724 enum machine_mode tmode;
15725 enum machine_mode mode0;
15726 enum machine_mode mode1;
15727 enum machine_mode mode2;
15729 if (fcode >= ARM_BUILTIN_NEON_BASE)
15730 return arm_expand_neon_builtin (fcode, exp, target);
15732 switch (fcode)
15734 case ARM_BUILTIN_TEXTRMSB:
15735 case ARM_BUILTIN_TEXTRMUB:
15736 case ARM_BUILTIN_TEXTRMSH:
15737 case ARM_BUILTIN_TEXTRMUH:
15738 case ARM_BUILTIN_TEXTRMSW:
15739 case ARM_BUILTIN_TEXTRMUW:
15740 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
15741 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
15742 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
15743 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
15744 : CODE_FOR_iwmmxt_textrmw);
15746 arg0 = CALL_EXPR_ARG (exp, 0);
15747 arg1 = CALL_EXPR_ARG (exp, 1);
15748 op0 = expand_normal (arg0);
15749 op1 = expand_normal (arg1);
15750 tmode = insn_data[icode].operand[0].mode;
15751 mode0 = insn_data[icode].operand[1].mode;
15752 mode1 = insn_data[icode].operand[2].mode;
15754 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15755 op0 = copy_to_mode_reg (mode0, op0);
15756 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15758 /* @@@ better error message */
15759 error ("selector must be an immediate");
15760 return gen_reg_rtx (tmode);
15762 if (target == 0
15763 || GET_MODE (target) != tmode
15764 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15765 target = gen_reg_rtx (tmode);
15766 pat = GEN_FCN (icode) (target, op0, op1);
15767 if (! pat)
15768 return 0;
15769 emit_insn (pat);
15770 return target;
15772 case ARM_BUILTIN_TINSRB:
15773 case ARM_BUILTIN_TINSRH:
15774 case ARM_BUILTIN_TINSRW:
15775 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
15776 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
15777 : CODE_FOR_iwmmxt_tinsrw);
15778 arg0 = CALL_EXPR_ARG (exp, 0);
15779 arg1 = CALL_EXPR_ARG (exp, 1);
15780 arg2 = CALL_EXPR_ARG (exp, 2);
15781 op0 = expand_normal (arg0);
15782 op1 = expand_normal (arg1);
15783 op2 = expand_normal (arg2);
15784 tmode = insn_data[icode].operand[0].mode;
15785 mode0 = insn_data[icode].operand[1].mode;
15786 mode1 = insn_data[icode].operand[2].mode;
15787 mode2 = insn_data[icode].operand[3].mode;
15789 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15790 op0 = copy_to_mode_reg (mode0, op0);
15791 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15792 op1 = copy_to_mode_reg (mode1, op1);
15793 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15795 /* @@@ better error message */
15796 error ("selector must be an immediate");
15797 return const0_rtx;
15799 if (target == 0
15800 || GET_MODE (target) != tmode
15801 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15802 target = gen_reg_rtx (tmode);
15803 pat = GEN_FCN (icode) (target, op0, op1, op2);
15804 if (! pat)
15805 return 0;
15806 emit_insn (pat);
15807 return target;
15809 case ARM_BUILTIN_SETWCX:
15810 arg0 = CALL_EXPR_ARG (exp, 0);
15811 arg1 = CALL_EXPR_ARG (exp, 1);
15812 op0 = force_reg (SImode, expand_normal (arg0));
15813 op1 = expand_normal (arg1);
15814 emit_insn (gen_iwmmxt_tmcr (op1, op0));
15815 return 0;
15817 case ARM_BUILTIN_GETWCX:
15818 arg0 = CALL_EXPR_ARG (exp, 0);
15819 op0 = expand_normal (arg0);
15820 target = gen_reg_rtx (SImode);
15821 emit_insn (gen_iwmmxt_tmrc (target, op0));
15822 return target;
15824 case ARM_BUILTIN_WSHUFH:
15825 icode = CODE_FOR_iwmmxt_wshufh;
15826 arg0 = CALL_EXPR_ARG (exp, 0);
15827 arg1 = CALL_EXPR_ARG (exp, 1);
15828 op0 = expand_normal (arg0);
15829 op1 = expand_normal (arg1);
15830 tmode = insn_data[icode].operand[0].mode;
15831 mode1 = insn_data[icode].operand[1].mode;
15832 mode2 = insn_data[icode].operand[2].mode;
15834 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15835 op0 = copy_to_mode_reg (mode1, op0);
15836 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15838 /* @@@ better error message */
15839 error ("mask must be an immediate");
15840 return const0_rtx;
15842 if (target == 0
15843 || GET_MODE (target) != tmode
15844 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15845 target = gen_reg_rtx (tmode);
15846 pat = GEN_FCN (icode) (target, op0, op1);
15847 if (! pat)
15848 return 0;
15849 emit_insn (pat);
15850 return target;
15852 case ARM_BUILTIN_WSADB:
15853 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
15854 case ARM_BUILTIN_WSADH:
15855 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
15856 case ARM_BUILTIN_WSADBZ:
15857 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
15858 case ARM_BUILTIN_WSADHZ:
15859 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
15861 /* Several three-argument builtins. */
15862 case ARM_BUILTIN_WMACS:
15863 case ARM_BUILTIN_WMACU:
15864 case ARM_BUILTIN_WALIGN:
15865 case ARM_BUILTIN_TMIA:
15866 case ARM_BUILTIN_TMIAPH:
15867 case ARM_BUILTIN_TMIATT:
15868 case ARM_BUILTIN_TMIATB:
15869 case ARM_BUILTIN_TMIABT:
15870 case ARM_BUILTIN_TMIABB:
15871 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
15872 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
15873 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
15874 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
15875 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
15876 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
15877 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
15878 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
15879 : CODE_FOR_iwmmxt_walign);
15880 arg0 = CALL_EXPR_ARG (exp, 0);
15881 arg1 = CALL_EXPR_ARG (exp, 1);
15882 arg2 = CALL_EXPR_ARG (exp, 2);
15883 op0 = expand_normal (arg0);
15884 op1 = expand_normal (arg1);
15885 op2 = expand_normal (arg2);
15886 tmode = insn_data[icode].operand[0].mode;
15887 mode0 = insn_data[icode].operand[1].mode;
15888 mode1 = insn_data[icode].operand[2].mode;
15889 mode2 = insn_data[icode].operand[3].mode;
15891 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15892 op0 = copy_to_mode_reg (mode0, op0);
15893 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15894 op1 = copy_to_mode_reg (mode1, op1);
15895 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15896 op2 = copy_to_mode_reg (mode2, op2);
15897 if (target == 0
15898 || GET_MODE (target) != tmode
15899 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15900 target = gen_reg_rtx (tmode);
15901 pat = GEN_FCN (icode) (target, op0, op1, op2);
15902 if (! pat)
15903 return 0;
15904 emit_insn (pat);
15905 return target;
15907 case ARM_BUILTIN_WZERO:
15908 target = gen_reg_rtx (DImode);
15909 emit_insn (gen_iwmmxt_clrdi (target));
15910 return target;
15912 case ARM_BUILTIN_THREAD_POINTER:
15913 return arm_load_tp (target);
15915 default:
15916 break;
15919 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15920 if (d->code == (const enum arm_builtins) fcode)
15921 return arm_expand_binop_builtin (d->icode, exp, target);
15923 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15924 if (d->code == (const enum arm_builtins) fcode)
15925 return arm_expand_unop_builtin (d->icode, exp, target, 0);
15927 /* @@@ Should really do something sensible here. */
15928 return NULL_RTX;
15931 /* Return the number (counting from 0) of
15932 the least significant set bit in MASK. */
15934 inline static int
15935 number_of_first_bit_set (unsigned mask)
15937 int bit;
15939 for (bit = 0;
15940 (mask & (1 << bit)) == 0;
15941 ++bit)
15942 continue;
15944 return bit;
15947 /* Emit code to push or pop registers to or from the stack. F is the
15948 assembly file. MASK is the registers to push or pop. PUSH is
15949 nonzero if we should push, and zero if we should pop. For debugging
15950 output, if pushing, adjust CFA_OFFSET by the amount of space added
15951 to the stack. REAL_REGS should have the same number of bits set as
15952 MASK, and will be used instead (in the same order) to describe which
15953 registers were saved - this is used to mark the save slots when we
15954 push high registers after moving them to low registers. */
15955 static void
15956 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
15957 unsigned long real_regs)
15959 int regno;
15960 int lo_mask = mask & 0xFF;
15961 int pushed_words = 0;
15963 gcc_assert (mask);
15965 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
15967 /* Special case. Do not generate a POP PC statement here, do it in
15968 thumb_exit() */
15969 thumb_exit (f, -1);
15970 return;
15973 if (ARM_EABI_UNWIND_TABLES && push)
15975 fprintf (f, "\t.save\t{");
15976 for (regno = 0; regno < 15; regno++)
15978 if (real_regs & (1 << regno))
15980 if (real_regs & ((1 << regno) -1))
15981 fprintf (f, ", ");
15982 asm_fprintf (f, "%r", regno);
15985 fprintf (f, "}\n");
15988 fprintf (f, "\t%s\t{", push ? "push" : "pop");
15990 /* Look at the low registers first. */
15991 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
15993 if (lo_mask & 1)
15995 asm_fprintf (f, "%r", regno);
15997 if ((lo_mask & ~1) != 0)
15998 fprintf (f, ", ");
16000 pushed_words++;
16004 if (push && (mask & (1 << LR_REGNUM)))
16006 /* Catch pushing the LR. */
16007 if (mask & 0xFF)
16008 fprintf (f, ", ");
16010 asm_fprintf (f, "%r", LR_REGNUM);
16012 pushed_words++;
16014 else if (!push && (mask & (1 << PC_REGNUM)))
16016 /* Catch popping the PC. */
16017 if (TARGET_INTERWORK || TARGET_BACKTRACE
16018 || current_function_calls_eh_return)
16020 /* The PC is never poped directly, instead
16021 it is popped into r3 and then BX is used. */
16022 fprintf (f, "}\n");
16024 thumb_exit (f, -1);
16026 return;
16028 else
16030 if (mask & 0xFF)
16031 fprintf (f, ", ");
16033 asm_fprintf (f, "%r", PC_REGNUM);
16037 fprintf (f, "}\n");
16039 if (push && pushed_words && dwarf2out_do_frame ())
16041 char *l = dwarf2out_cfi_label ();
16042 int pushed_mask = real_regs;
16044 *cfa_offset += pushed_words * 4;
16045 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16047 pushed_words = 0;
16048 pushed_mask = real_regs;
16049 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16051 if (pushed_mask & 1)
16052 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16057 /* Generate code to return from a thumb function.
16058 If 'reg_containing_return_addr' is -1, then the return address is
16059 actually on the stack, at the stack pointer. */
16060 static void
16061 thumb_exit (FILE *f, int reg_containing_return_addr)
16063 unsigned regs_available_for_popping;
16064 unsigned regs_to_pop;
16065 int pops_needed;
16066 unsigned available;
16067 unsigned required;
16068 int mode;
16069 int size;
16070 int restore_a4 = FALSE;
16072 /* Compute the registers we need to pop. */
16073 regs_to_pop = 0;
16074 pops_needed = 0;
16076 if (reg_containing_return_addr == -1)
16078 regs_to_pop |= 1 << LR_REGNUM;
16079 ++pops_needed;
16082 if (TARGET_BACKTRACE)
16084 /* Restore the (ARM) frame pointer and stack pointer. */
16085 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16086 pops_needed += 2;
16089 /* If there is nothing to pop then just emit the BX instruction and
16090 return. */
16091 if (pops_needed == 0)
16093 if (current_function_calls_eh_return)
16094 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16096 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16097 return;
16099 /* Otherwise if we are not supporting interworking and we have not created
16100 a backtrace structure and the function was not entered in ARM mode then
16101 just pop the return address straight into the PC. */
16102 else if (!TARGET_INTERWORK
16103 && !TARGET_BACKTRACE
16104 && !is_called_in_ARM_mode (current_function_decl)
16105 && !current_function_calls_eh_return)
16107 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16108 return;
16111 /* Find out how many of the (return) argument registers we can corrupt. */
16112 regs_available_for_popping = 0;
16114 /* If returning via __builtin_eh_return, the bottom three registers
16115 all contain information needed for the return. */
16116 if (current_function_calls_eh_return)
16117 size = 12;
16118 else
16120 /* If we can deduce the registers used from the function's
16121 return value. This is more reliable that examining
16122 df_regs_ever_live_p () because that will be set if the register is
16123 ever used in the function, not just if the register is used
16124 to hold a return value. */
16126 if (current_function_return_rtx != 0)
16127 mode = GET_MODE (current_function_return_rtx);
16128 else
16129 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16131 size = GET_MODE_SIZE (mode);
16133 if (size == 0)
16135 /* In a void function we can use any argument register.
16136 In a function that returns a structure on the stack
16137 we can use the second and third argument registers. */
16138 if (mode == VOIDmode)
16139 regs_available_for_popping =
16140 (1 << ARG_REGISTER (1))
16141 | (1 << ARG_REGISTER (2))
16142 | (1 << ARG_REGISTER (3));
16143 else
16144 regs_available_for_popping =
16145 (1 << ARG_REGISTER (2))
16146 | (1 << ARG_REGISTER (3));
16148 else if (size <= 4)
16149 regs_available_for_popping =
16150 (1 << ARG_REGISTER (2))
16151 | (1 << ARG_REGISTER (3));
16152 else if (size <= 8)
16153 regs_available_for_popping =
16154 (1 << ARG_REGISTER (3));
16157 /* Match registers to be popped with registers into which we pop them. */
16158 for (available = regs_available_for_popping,
16159 required = regs_to_pop;
16160 required != 0 && available != 0;
16161 available &= ~(available & - available),
16162 required &= ~(required & - required))
16163 -- pops_needed;
16165 /* If we have any popping registers left over, remove them. */
16166 if (available > 0)
16167 regs_available_for_popping &= ~available;
16169 /* Otherwise if we need another popping register we can use
16170 the fourth argument register. */
16171 else if (pops_needed)
16173 /* If we have not found any free argument registers and
16174 reg a4 contains the return address, we must move it. */
16175 if (regs_available_for_popping == 0
16176 && reg_containing_return_addr == LAST_ARG_REGNUM)
16178 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16179 reg_containing_return_addr = LR_REGNUM;
16181 else if (size > 12)
16183 /* Register a4 is being used to hold part of the return value,
16184 but we have dire need of a free, low register. */
16185 restore_a4 = TRUE;
16187 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
16190 if (reg_containing_return_addr != LAST_ARG_REGNUM)
16192 /* The fourth argument register is available. */
16193 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
16195 --pops_needed;
16199 /* Pop as many registers as we can. */
16200 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16201 regs_available_for_popping);
16203 /* Process the registers we popped. */
16204 if (reg_containing_return_addr == -1)
16206 /* The return address was popped into the lowest numbered register. */
16207 regs_to_pop &= ~(1 << LR_REGNUM);
16209 reg_containing_return_addr =
16210 number_of_first_bit_set (regs_available_for_popping);
16212 /* Remove this register for the mask of available registers, so that
16213 the return address will not be corrupted by further pops. */
16214 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
16217 /* If we popped other registers then handle them here. */
16218 if (regs_available_for_popping)
16220 int frame_pointer;
16222 /* Work out which register currently contains the frame pointer. */
16223 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
16225 /* Move it into the correct place. */
16226 asm_fprintf (f, "\tmov\t%r, %r\n",
16227 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
16229 /* (Temporarily) remove it from the mask of popped registers. */
16230 regs_available_for_popping &= ~(1 << frame_pointer);
16231 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
16233 if (regs_available_for_popping)
16235 int stack_pointer;
16237 /* We popped the stack pointer as well,
16238 find the register that contains it. */
16239 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
16241 /* Move it into the stack register. */
16242 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
16244 /* At this point we have popped all necessary registers, so
16245 do not worry about restoring regs_available_for_popping
16246 to its correct value:
16248 assert (pops_needed == 0)
16249 assert (regs_available_for_popping == (1 << frame_pointer))
16250 assert (regs_to_pop == (1 << STACK_POINTER)) */
16252 else
16254 /* Since we have just move the popped value into the frame
16255 pointer, the popping register is available for reuse, and
16256 we know that we still have the stack pointer left to pop. */
16257 regs_available_for_popping |= (1 << frame_pointer);
16261 /* If we still have registers left on the stack, but we no longer have
16262 any registers into which we can pop them, then we must move the return
16263 address into the link register and make available the register that
16264 contained it. */
16265 if (regs_available_for_popping == 0 && pops_needed > 0)
16267 regs_available_for_popping |= 1 << reg_containing_return_addr;
16269 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
16270 reg_containing_return_addr);
16272 reg_containing_return_addr = LR_REGNUM;
16275 /* If we have registers left on the stack then pop some more.
16276 We know that at most we will want to pop FP and SP. */
16277 if (pops_needed > 0)
16279 int popped_into;
16280 int move_to;
16282 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16283 regs_available_for_popping);
16285 /* We have popped either FP or SP.
16286 Move whichever one it is into the correct register. */
16287 popped_into = number_of_first_bit_set (regs_available_for_popping);
16288 move_to = number_of_first_bit_set (regs_to_pop);
16290 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
16292 regs_to_pop &= ~(1 << move_to);
16294 --pops_needed;
16297 /* If we still have not popped everything then we must have only
16298 had one register available to us and we are now popping the SP. */
16299 if (pops_needed > 0)
16301 int popped_into;
16303 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16304 regs_available_for_popping);
16306 popped_into = number_of_first_bit_set (regs_available_for_popping);
16308 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
16310 assert (regs_to_pop == (1 << STACK_POINTER))
16311 assert (pops_needed == 1)
16315 /* If necessary restore the a4 register. */
16316 if (restore_a4)
16318 if (reg_containing_return_addr != LR_REGNUM)
16320 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16321 reg_containing_return_addr = LR_REGNUM;
16324 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
16327 if (current_function_calls_eh_return)
16328 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16330 /* Return to caller. */
16331 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16335 void
16336 thumb1_final_prescan_insn (rtx insn)
16338 if (flag_print_asm_name)
16339 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
16340 INSN_ADDRESSES (INSN_UID (insn)));
16344 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
16346 unsigned HOST_WIDE_INT mask = 0xff;
16347 int i;
16349 if (val == 0) /* XXX */
16350 return 0;
16352 for (i = 0; i < 25; i++)
16353 if ((val & (mask << i)) == val)
16354 return 1;
16356 return 0;
16359 /* Returns nonzero if the current function contains,
16360 or might contain a far jump. */
16361 static int
16362 thumb_far_jump_used_p (void)
16364 rtx insn;
16366 /* This test is only important for leaf functions. */
16367 /* assert (!leaf_function_p ()); */
16369 /* If we have already decided that far jumps may be used,
16370 do not bother checking again, and always return true even if
16371 it turns out that they are not being used. Once we have made
16372 the decision that far jumps are present (and that hence the link
16373 register will be pushed onto the stack) we cannot go back on it. */
16374 if (cfun->machine->far_jump_used)
16375 return 1;
16377 /* If this function is not being called from the prologue/epilogue
16378 generation code then it must be being called from the
16379 INITIAL_ELIMINATION_OFFSET macro. */
16380 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
16382 /* In this case we know that we are being asked about the elimination
16383 of the arg pointer register. If that register is not being used,
16384 then there are no arguments on the stack, and we do not have to
16385 worry that a far jump might force the prologue to push the link
16386 register, changing the stack offsets. In this case we can just
16387 return false, since the presence of far jumps in the function will
16388 not affect stack offsets.
16390 If the arg pointer is live (or if it was live, but has now been
16391 eliminated and so set to dead) then we do have to test to see if
16392 the function might contain a far jump. This test can lead to some
16393 false negatives, since before reload is completed, then length of
16394 branch instructions is not known, so gcc defaults to returning their
16395 longest length, which in turn sets the far jump attribute to true.
16397 A false negative will not result in bad code being generated, but it
16398 will result in a needless push and pop of the link register. We
16399 hope that this does not occur too often.
16401 If we need doubleword stack alignment this could affect the other
16402 elimination offsets so we can't risk getting it wrong. */
16403 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
16404 cfun->machine->arg_pointer_live = 1;
16405 else if (!cfun->machine->arg_pointer_live)
16406 return 0;
16409 /* Check to see if the function contains a branch
16410 insn with the far jump attribute set. */
16411 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16413 if (GET_CODE (insn) == JUMP_INSN
16414 /* Ignore tablejump patterns. */
16415 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16416 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
16417 && get_attr_far_jump (insn) == FAR_JUMP_YES
16420 /* Record the fact that we have decided that
16421 the function does use far jumps. */
16422 cfun->machine->far_jump_used = 1;
16423 return 1;
16427 return 0;
16430 /* Return nonzero if FUNC must be entered in ARM mode. */
16432 is_called_in_ARM_mode (tree func)
16434 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
16436 /* Ignore the problem about functions whose address is taken. */
16437 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
16438 return TRUE;
16440 #ifdef ARM_PE
16441 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
16442 #else
16443 return FALSE;
16444 #endif
16447 /* The bits which aren't usefully expanded as rtl. */
16448 const char *
16449 thumb_unexpanded_epilogue (void)
16451 int regno;
16452 unsigned long live_regs_mask = 0;
16453 int high_regs_pushed = 0;
16454 int had_to_push_lr;
16455 int size;
16457 if (return_used_this_function)
16458 return "";
16460 if (IS_NAKED (arm_current_func_type ()))
16461 return "";
16463 live_regs_mask = thumb1_compute_save_reg_mask ();
16464 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16466 /* If we can deduce the registers used from the function's return value.
16467 This is more reliable that examining df_regs_ever_live_p () because that
16468 will be set if the register is ever used in the function, not just if
16469 the register is used to hold a return value. */
16470 size = arm_size_return_regs ();
16472 /* The prolog may have pushed some high registers to use as
16473 work registers. e.g. the testsuite file:
16474 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16475 compiles to produce:
16476 push {r4, r5, r6, r7, lr}
16477 mov r7, r9
16478 mov r6, r8
16479 push {r6, r7}
16480 as part of the prolog. We have to undo that pushing here. */
16482 if (high_regs_pushed)
16484 unsigned long mask = live_regs_mask & 0xff;
16485 int next_hi_reg;
16487 /* The available low registers depend on the size of the value we are
16488 returning. */
16489 if (size <= 12)
16490 mask |= 1 << 3;
16491 if (size <= 8)
16492 mask |= 1 << 2;
16494 if (mask == 0)
16495 /* Oh dear! We have no low registers into which we can pop
16496 high registers! */
16497 internal_error
16498 ("no low registers available for popping high registers");
16500 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
16501 if (live_regs_mask & (1 << next_hi_reg))
16502 break;
16504 while (high_regs_pushed)
16506 /* Find lo register(s) into which the high register(s) can
16507 be popped. */
16508 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16510 if (mask & (1 << regno))
16511 high_regs_pushed--;
16512 if (high_regs_pushed == 0)
16513 break;
16516 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
16518 /* Pop the values into the low register(s). */
16519 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
16521 /* Move the value(s) into the high registers. */
16522 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16524 if (mask & (1 << regno))
16526 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
16527 regno);
16529 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
16530 if (live_regs_mask & (1 << next_hi_reg))
16531 break;
16535 live_regs_mask &= ~0x0f00;
16538 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
16539 live_regs_mask &= 0xff;
16541 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
16543 /* Pop the return address into the PC. */
16544 if (had_to_push_lr)
16545 live_regs_mask |= 1 << PC_REGNUM;
16547 /* Either no argument registers were pushed or a backtrace
16548 structure was created which includes an adjusted stack
16549 pointer, so just pop everything. */
16550 if (live_regs_mask)
16551 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16552 live_regs_mask);
16554 /* We have either just popped the return address into the
16555 PC or it is was kept in LR for the entire function. */
16556 if (!had_to_push_lr)
16557 thumb_exit (asm_out_file, LR_REGNUM);
16559 else
16561 /* Pop everything but the return address. */
16562 if (live_regs_mask)
16563 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16564 live_regs_mask);
16566 if (had_to_push_lr)
16568 if (size > 12)
16570 /* We have no free low regs, so save one. */
16571 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
16572 LAST_ARG_REGNUM);
16575 /* Get the return address into a temporary register. */
16576 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
16577 1 << LAST_ARG_REGNUM);
16579 if (size > 12)
16581 /* Move the return address to lr. */
16582 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
16583 LAST_ARG_REGNUM);
16584 /* Restore the low register. */
16585 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
16586 IP_REGNUM);
16587 regno = LR_REGNUM;
16589 else
16590 regno = LAST_ARG_REGNUM;
16592 else
16593 regno = LR_REGNUM;
16595 /* Remove the argument registers that were pushed onto the stack. */
16596 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
16597 SP_REGNUM, SP_REGNUM,
16598 current_function_pretend_args_size);
16600 thumb_exit (asm_out_file, regno);
16603 return "";
16606 /* Functions to save and restore machine-specific function data. */
16607 static struct machine_function *
16608 arm_init_machine_status (void)
16610 struct machine_function *machine;
16611 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
16613 #if ARM_FT_UNKNOWN != 0
16614 machine->func_type = ARM_FT_UNKNOWN;
16615 #endif
16616 return machine;
16619 /* Return an RTX indicating where the return address to the
16620 calling function can be found. */
16622 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
16624 if (count != 0)
16625 return NULL_RTX;
16627 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
16630 /* Do anything needed before RTL is emitted for each function. */
16631 void
16632 arm_init_expanders (void)
16634 /* Arrange to initialize and mark the machine per-function status. */
16635 init_machine_status = arm_init_machine_status;
16637 /* This is to stop the combine pass optimizing away the alignment
16638 adjustment of va_arg. */
16639 /* ??? It is claimed that this should not be necessary. */
16640 if (cfun)
16641 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
16645 /* Like arm_compute_initial_elimination offset. Simpler because there
16646 isn't an ABI specified frame pointer for Thumb. Instead, we set it
16647 to point at the base of the local variables after static stack
16648 space for a function has been allocated. */
16650 HOST_WIDE_INT
16651 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16653 arm_stack_offsets *offsets;
16655 offsets = arm_get_frame_offsets ();
16657 switch (from)
16659 case ARG_POINTER_REGNUM:
16660 switch (to)
16662 case STACK_POINTER_REGNUM:
16663 return offsets->outgoing_args - offsets->saved_args;
16665 case FRAME_POINTER_REGNUM:
16666 return offsets->soft_frame - offsets->saved_args;
16668 case ARM_HARD_FRAME_POINTER_REGNUM:
16669 return offsets->saved_regs - offsets->saved_args;
16671 case THUMB_HARD_FRAME_POINTER_REGNUM:
16672 return offsets->locals_base - offsets->saved_args;
16674 default:
16675 gcc_unreachable ();
16677 break;
16679 case FRAME_POINTER_REGNUM:
16680 switch (to)
16682 case STACK_POINTER_REGNUM:
16683 return offsets->outgoing_args - offsets->soft_frame;
16685 case ARM_HARD_FRAME_POINTER_REGNUM:
16686 return offsets->saved_regs - offsets->soft_frame;
16688 case THUMB_HARD_FRAME_POINTER_REGNUM:
16689 return offsets->locals_base - offsets->soft_frame;
16691 default:
16692 gcc_unreachable ();
16694 break;
16696 default:
16697 gcc_unreachable ();
16701 /* Generate the rest of a function's prologue. */
16702 void
16703 thumb1_expand_prologue (void)
16705 rtx insn, dwarf;
16707 HOST_WIDE_INT amount;
16708 arm_stack_offsets *offsets;
16709 unsigned long func_type;
16710 int regno;
16711 unsigned long live_regs_mask;
16713 func_type = arm_current_func_type ();
16715 /* Naked functions don't have prologues. */
16716 if (IS_NAKED (func_type))
16717 return;
16719 if (IS_INTERRUPT (func_type))
16721 error ("interrupt Service Routines cannot be coded in Thumb mode");
16722 return;
16725 live_regs_mask = thumb1_compute_save_reg_mask ();
16726 /* Load the pic register before setting the frame pointer,
16727 so we can use r7 as a temporary work register. */
16728 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16729 arm_load_pic_register (live_regs_mask);
16731 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16732 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
16733 stack_pointer_rtx);
16735 offsets = arm_get_frame_offsets ();
16736 amount = offsets->outgoing_args - offsets->saved_regs;
16737 if (amount)
16739 if (amount < 512)
16741 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16742 GEN_INT (- amount)));
16743 RTX_FRAME_RELATED_P (insn) = 1;
16745 else
16747 rtx reg;
16749 /* The stack decrement is too big for an immediate value in a single
16750 insn. In theory we could issue multiple subtracts, but after
16751 three of them it becomes more space efficient to place the full
16752 value in the constant pool and load into a register. (Also the
16753 ARM debugger really likes to see only one stack decrement per
16754 function). So instead we look for a scratch register into which
16755 we can load the decrement, and then we subtract this from the
16756 stack pointer. Unfortunately on the thumb the only available
16757 scratch registers are the argument registers, and we cannot use
16758 these as they may hold arguments to the function. Instead we
16759 attempt to locate a call preserved register which is used by this
16760 function. If we can find one, then we know that it will have
16761 been pushed at the start of the prologue and so we can corrupt
16762 it now. */
16763 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
16764 if (live_regs_mask & (1 << regno)
16765 && !(frame_pointer_needed
16766 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
16767 break;
16769 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
16771 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
16773 /* Choose an arbitrary, non-argument low register. */
16774 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
16776 /* Save it by copying it into a high, scratch register. */
16777 emit_insn (gen_movsi (spare, reg));
16778 /* Add a USE to stop propagate_one_insn() from barfing. */
16779 emit_insn (gen_prologue_use (spare));
16781 /* Decrement the stack. */
16782 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16783 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16784 stack_pointer_rtx, reg));
16785 RTX_FRAME_RELATED_P (insn) = 1;
16786 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16787 plus_constant (stack_pointer_rtx,
16788 -amount));
16789 RTX_FRAME_RELATED_P (dwarf) = 1;
16790 REG_NOTES (insn)
16791 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16792 REG_NOTES (insn));
16794 /* Restore the low register's original value. */
16795 emit_insn (gen_movsi (reg, spare));
16797 /* Emit a USE of the restored scratch register, so that flow
16798 analysis will not consider the restore redundant. The
16799 register won't be used again in this function and isn't
16800 restored by the epilogue. */
16801 emit_insn (gen_prologue_use (reg));
16803 else
16805 reg = gen_rtx_REG (SImode, regno);
16807 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16809 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16810 stack_pointer_rtx, reg));
16811 RTX_FRAME_RELATED_P (insn) = 1;
16812 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16813 plus_constant (stack_pointer_rtx,
16814 -amount));
16815 RTX_FRAME_RELATED_P (dwarf) = 1;
16816 REG_NOTES (insn)
16817 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16818 REG_NOTES (insn));
16823 if (frame_pointer_needed)
16824 thumb_set_frame_pointer (offsets);
16826 /* If we are profiling, make sure no instructions are scheduled before
16827 the call to mcount. Similarly if the user has requested no
16828 scheduling in the prolog. Similarly if we want non-call exceptions
16829 using the EABI unwinder, to prevent faulting instructions from being
16830 swapped with a stack adjustment. */
16831 if (current_function_profile || !TARGET_SCHED_PROLOG
16832 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
16833 emit_insn (gen_blockage ());
16835 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
16836 if (live_regs_mask & 0xff)
16837 cfun->machine->lr_save_eliminated = 0;
16841 void
16842 thumb1_expand_epilogue (void)
16844 HOST_WIDE_INT amount;
16845 arm_stack_offsets *offsets;
16846 int regno;
16848 /* Naked functions don't have prologues. */
16849 if (IS_NAKED (arm_current_func_type ()))
16850 return;
16852 offsets = arm_get_frame_offsets ();
16853 amount = offsets->outgoing_args - offsets->saved_regs;
16855 if (frame_pointer_needed)
16857 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
16858 amount = offsets->locals_base - offsets->saved_regs;
16861 gcc_assert (amount >= 0);
16862 if (amount)
16864 if (amount < 512)
16865 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16866 GEN_INT (amount)));
16867 else
16869 /* r3 is always free in the epilogue. */
16870 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
16872 emit_insn (gen_movsi (reg, GEN_INT (amount)));
16873 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
16877 /* Emit a USE (stack_pointer_rtx), so that
16878 the stack adjustment will not be deleted. */
16879 emit_insn (gen_prologue_use (stack_pointer_rtx));
16881 if (current_function_profile || !TARGET_SCHED_PROLOG)
16882 emit_insn (gen_blockage ());
16884 /* Emit a clobber for each insn that will be restored in the epilogue,
16885 so that flow2 will get register lifetimes correct. */
16886 for (regno = 0; regno < 13; regno++)
16887 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
16888 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
16890 if (! df_regs_ever_live_p (LR_REGNUM))
16891 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
16894 static void
16895 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
16897 unsigned long live_regs_mask = 0;
16898 unsigned long l_mask;
16899 unsigned high_regs_pushed = 0;
16900 int cfa_offset = 0;
16901 int regno;
16903 if (IS_NAKED (arm_current_func_type ()))
16904 return;
16906 if (is_called_in_ARM_mode (current_function_decl))
16908 const char * name;
16910 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
16911 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
16912 == SYMBOL_REF);
16913 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
16915 /* Generate code sequence to switch us into Thumb mode. */
16916 /* The .code 32 directive has already been emitted by
16917 ASM_DECLARE_FUNCTION_NAME. */
16918 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
16919 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
16921 /* Generate a label, so that the debugger will notice the
16922 change in instruction sets. This label is also used by
16923 the assembler to bypass the ARM code when this function
16924 is called from a Thumb encoded function elsewhere in the
16925 same file. Hence the definition of STUB_NAME here must
16926 agree with the definition in gas/config/tc-arm.c. */
16928 #define STUB_NAME ".real_start_of"
16930 fprintf (f, "\t.code\t16\n");
16931 #ifdef ARM_PE
16932 if (arm_dllexport_name_p (name))
16933 name = arm_strip_name_encoding (name);
16934 #endif
16935 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
16936 fprintf (f, "\t.thumb_func\n");
16937 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
16940 if (current_function_pretend_args_size)
16942 /* Output unwind directive for the stack adjustment. */
16943 if (ARM_EABI_UNWIND_TABLES)
16944 fprintf (f, "\t.pad #%d\n",
16945 current_function_pretend_args_size);
16947 if (cfun->machine->uses_anonymous_args)
16949 int num_pushes;
16951 fprintf (f, "\tpush\t{");
16953 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
16955 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
16956 regno <= LAST_ARG_REGNUM;
16957 regno++)
16958 asm_fprintf (f, "%r%s", regno,
16959 regno == LAST_ARG_REGNUM ? "" : ", ");
16961 fprintf (f, "}\n");
16963 else
16964 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
16965 SP_REGNUM, SP_REGNUM,
16966 current_function_pretend_args_size);
16968 /* We don't need to record the stores for unwinding (would it
16969 help the debugger any if we did?), but record the change in
16970 the stack pointer. */
16971 if (dwarf2out_do_frame ())
16973 char *l = dwarf2out_cfi_label ();
16975 cfa_offset = cfa_offset + current_function_pretend_args_size;
16976 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
16980 /* Get the registers we are going to push. */
16981 live_regs_mask = thumb1_compute_save_reg_mask ();
16982 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
16983 l_mask = live_regs_mask & 0x40ff;
16984 /* Then count how many other high registers will need to be pushed. */
16985 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16987 if (TARGET_BACKTRACE)
16989 unsigned offset;
16990 unsigned work_register;
16992 /* We have been asked to create a stack backtrace structure.
16993 The code looks like this:
16995 0 .align 2
16996 0 func:
16997 0 sub SP, #16 Reserve space for 4 registers.
16998 2 push {R7} Push low registers.
16999 4 add R7, SP, #20 Get the stack pointer before the push.
17000 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
17001 8 mov R7, PC Get hold of the start of this code plus 12.
17002 10 str R7, [SP, #16] Store it.
17003 12 mov R7, FP Get hold of the current frame pointer.
17004 14 str R7, [SP, #4] Store it.
17005 16 mov R7, LR Get hold of the current return address.
17006 18 str R7, [SP, #12] Store it.
17007 20 add R7, SP, #16 Point at the start of the backtrace structure.
17008 22 mov FP, R7 Put this value into the frame pointer. */
17010 work_register = thumb_find_work_register (live_regs_mask);
17012 if (ARM_EABI_UNWIND_TABLES)
17013 asm_fprintf (f, "\t.pad #16\n");
17015 asm_fprintf
17016 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17017 SP_REGNUM, SP_REGNUM);
17019 if (dwarf2out_do_frame ())
17021 char *l = dwarf2out_cfi_label ();
17023 cfa_offset = cfa_offset + 16;
17024 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17027 if (l_mask)
17029 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17030 offset = bit_count (l_mask) * UNITS_PER_WORD;
17032 else
17033 offset = 0;
17035 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17036 offset + 16 + current_function_pretend_args_size);
17038 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17039 offset + 4);
17041 /* Make sure that the instruction fetching the PC is in the right place
17042 to calculate "start of backtrace creation code + 12". */
17043 if (l_mask)
17045 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17046 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17047 offset + 12);
17048 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17049 ARM_HARD_FRAME_POINTER_REGNUM);
17050 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17051 offset);
17053 else
17055 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17056 ARM_HARD_FRAME_POINTER_REGNUM);
17057 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17058 offset);
17059 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17060 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17061 offset + 12);
17064 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17065 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17066 offset + 8);
17067 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17068 offset + 12);
17069 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17070 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17072 /* Optimization: If we are not pushing any low registers but we are going
17073 to push some high registers then delay our first push. This will just
17074 be a push of LR and we can combine it with the push of the first high
17075 register. */
17076 else if ((l_mask & 0xff) != 0
17077 || (high_regs_pushed == 0 && l_mask))
17078 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17080 if (high_regs_pushed)
17082 unsigned pushable_regs;
17083 unsigned next_hi_reg;
17085 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17086 if (live_regs_mask & (1 << next_hi_reg))
17087 break;
17089 pushable_regs = l_mask & 0xff;
17091 if (pushable_regs == 0)
17092 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17094 while (high_regs_pushed > 0)
17096 unsigned long real_regs_mask = 0;
17098 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17100 if (pushable_regs & (1 << regno))
17102 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17104 high_regs_pushed --;
17105 real_regs_mask |= (1 << next_hi_reg);
17107 if (high_regs_pushed)
17109 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17110 next_hi_reg --)
17111 if (live_regs_mask & (1 << next_hi_reg))
17112 break;
17114 else
17116 pushable_regs &= ~((1 << regno) - 1);
17117 break;
17122 /* If we had to find a work register and we have not yet
17123 saved the LR then add it to the list of regs to push. */
17124 if (l_mask == (1 << LR_REGNUM))
17126 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17127 1, &cfa_offset,
17128 real_regs_mask | (1 << LR_REGNUM));
17129 l_mask = 0;
17131 else
17132 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17137 /* Handle the case of a double word load into a low register from
17138 a computed memory address. The computed address may involve a
17139 register which is overwritten by the load. */
17140 const char *
17141 thumb_load_double_from_address (rtx *operands)
17143 rtx addr;
17144 rtx base;
17145 rtx offset;
17146 rtx arg1;
17147 rtx arg2;
17149 gcc_assert (GET_CODE (operands[0]) == REG);
17150 gcc_assert (GET_CODE (operands[1]) == MEM);
17152 /* Get the memory address. */
17153 addr = XEXP (operands[1], 0);
17155 /* Work out how the memory address is computed. */
17156 switch (GET_CODE (addr))
17158 case REG:
17159 operands[2] = adjust_address (operands[1], SImode, 4);
17161 if (REGNO (operands[0]) == REGNO (addr))
17163 output_asm_insn ("ldr\t%H0, %2", operands);
17164 output_asm_insn ("ldr\t%0, %1", operands);
17166 else
17168 output_asm_insn ("ldr\t%0, %1", operands);
17169 output_asm_insn ("ldr\t%H0, %2", operands);
17171 break;
17173 case CONST:
17174 /* Compute <address> + 4 for the high order load. */
17175 operands[2] = adjust_address (operands[1], SImode, 4);
17177 output_asm_insn ("ldr\t%0, %1", operands);
17178 output_asm_insn ("ldr\t%H0, %2", operands);
17179 break;
17181 case PLUS:
17182 arg1 = XEXP (addr, 0);
17183 arg2 = XEXP (addr, 1);
17185 if (CONSTANT_P (arg1))
17186 base = arg2, offset = arg1;
17187 else
17188 base = arg1, offset = arg2;
17190 gcc_assert (GET_CODE (base) == REG);
17192 /* Catch the case of <address> = <reg> + <reg> */
17193 if (GET_CODE (offset) == REG)
17195 int reg_offset = REGNO (offset);
17196 int reg_base = REGNO (base);
17197 int reg_dest = REGNO (operands[0]);
17199 /* Add the base and offset registers together into the
17200 higher destination register. */
17201 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
17202 reg_dest + 1, reg_base, reg_offset);
17204 /* Load the lower destination register from the address in
17205 the higher destination register. */
17206 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
17207 reg_dest, reg_dest + 1);
17209 /* Load the higher destination register from its own address
17210 plus 4. */
17211 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
17212 reg_dest + 1, reg_dest + 1);
17214 else
17216 /* Compute <address> + 4 for the high order load. */
17217 operands[2] = adjust_address (operands[1], SImode, 4);
17219 /* If the computed address is held in the low order register
17220 then load the high order register first, otherwise always
17221 load the low order register first. */
17222 if (REGNO (operands[0]) == REGNO (base))
17224 output_asm_insn ("ldr\t%H0, %2", operands);
17225 output_asm_insn ("ldr\t%0, %1", operands);
17227 else
17229 output_asm_insn ("ldr\t%0, %1", operands);
17230 output_asm_insn ("ldr\t%H0, %2", operands);
17233 break;
17235 case LABEL_REF:
17236 /* With no registers to worry about we can just load the value
17237 directly. */
17238 operands[2] = adjust_address (operands[1], SImode, 4);
17240 output_asm_insn ("ldr\t%H0, %2", operands);
17241 output_asm_insn ("ldr\t%0, %1", operands);
17242 break;
17244 default:
17245 gcc_unreachable ();
17248 return "";
17251 const char *
17252 thumb_output_move_mem_multiple (int n, rtx *operands)
17254 rtx tmp;
17256 switch (n)
17258 case 2:
17259 if (REGNO (operands[4]) > REGNO (operands[5]))
17261 tmp = operands[4];
17262 operands[4] = operands[5];
17263 operands[5] = tmp;
17265 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
17266 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
17267 break;
17269 case 3:
17270 if (REGNO (operands[4]) > REGNO (operands[5]))
17272 tmp = operands[4];
17273 operands[4] = operands[5];
17274 operands[5] = tmp;
17276 if (REGNO (operands[5]) > REGNO (operands[6]))
17278 tmp = operands[5];
17279 operands[5] = operands[6];
17280 operands[6] = tmp;
17282 if (REGNO (operands[4]) > REGNO (operands[5]))
17284 tmp = operands[4];
17285 operands[4] = operands[5];
17286 operands[5] = tmp;
17289 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
17290 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
17291 break;
17293 default:
17294 gcc_unreachable ();
17297 return "";
17300 /* Output a call-via instruction for thumb state. */
17301 const char *
17302 thumb_call_via_reg (rtx reg)
17304 int regno = REGNO (reg);
17305 rtx *labelp;
17307 gcc_assert (regno < LR_REGNUM);
17309 /* If we are in the normal text section we can use a single instance
17310 per compilation unit. If we are doing function sections, then we need
17311 an entry per section, since we can't rely on reachability. */
17312 if (in_section == text_section)
17314 thumb_call_reg_needed = 1;
17316 if (thumb_call_via_label[regno] == NULL)
17317 thumb_call_via_label[regno] = gen_label_rtx ();
17318 labelp = thumb_call_via_label + regno;
17320 else
17322 if (cfun->machine->call_via[regno] == NULL)
17323 cfun->machine->call_via[regno] = gen_label_rtx ();
17324 labelp = cfun->machine->call_via + regno;
17327 output_asm_insn ("bl\t%a0", labelp);
17328 return "";
17331 /* Routines for generating rtl. */
17332 void
17333 thumb_expand_movmemqi (rtx *operands)
17335 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
17336 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
17337 HOST_WIDE_INT len = INTVAL (operands[2]);
17338 HOST_WIDE_INT offset = 0;
17340 while (len >= 12)
17342 emit_insn (gen_movmem12b (out, in, out, in));
17343 len -= 12;
17346 if (len >= 8)
17348 emit_insn (gen_movmem8b (out, in, out, in));
17349 len -= 8;
17352 if (len >= 4)
17354 rtx reg = gen_reg_rtx (SImode);
17355 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
17356 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
17357 len -= 4;
17358 offset += 4;
17361 if (len >= 2)
17363 rtx reg = gen_reg_rtx (HImode);
17364 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
17365 plus_constant (in, offset))));
17366 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
17367 reg));
17368 len -= 2;
17369 offset += 2;
17372 if (len)
17374 rtx reg = gen_reg_rtx (QImode);
17375 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
17376 plus_constant (in, offset))));
17377 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
17378 reg));
17382 void
17383 thumb_reload_out_hi (rtx *operands)
17385 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
17388 /* Handle reading a half-word from memory during reload. */
17389 void
17390 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
17392 gcc_unreachable ();
17395 /* Return the length of a function name prefix
17396 that starts with the character 'c'. */
17397 static int
17398 arm_get_strip_length (int c)
17400 switch (c)
17402 ARM_NAME_ENCODING_LENGTHS
17403 default: return 0;
17407 /* Return a pointer to a function's name with any
17408 and all prefix encodings stripped from it. */
17409 const char *
17410 arm_strip_name_encoding (const char *name)
17412 int skip;
17414 while ((skip = arm_get_strip_length (* name)))
17415 name += skip;
17417 return name;
17420 /* If there is a '*' anywhere in the name's prefix, then
17421 emit the stripped name verbatim, otherwise prepend an
17422 underscore if leading underscores are being used. */
17423 void
17424 arm_asm_output_labelref (FILE *stream, const char *name)
17426 int skip;
17427 int verbatim = 0;
17429 while ((skip = arm_get_strip_length (* name)))
17431 verbatim |= (*name == '*');
17432 name += skip;
17435 if (verbatim)
17436 fputs (name, stream);
17437 else
17438 asm_fprintf (stream, "%U%s", name);
17441 static void
17442 arm_file_start (void)
17444 int val;
17446 if (TARGET_UNIFIED_ASM)
17447 asm_fprintf (asm_out_file, "\t.syntax unified\n");
17449 if (TARGET_BPABI)
17451 const char *fpu_name;
17452 if (arm_select[0].string)
17453 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
17454 else if (arm_select[1].string)
17455 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
17456 else
17457 asm_fprintf (asm_out_file, "\t.cpu %s\n",
17458 all_cores[arm_default_cpu].name);
17460 if (TARGET_SOFT_FLOAT)
17462 if (TARGET_VFP)
17463 fpu_name = "softvfp";
17464 else
17465 fpu_name = "softfpa";
17467 else
17469 int set_float_abi_attributes = 0;
17470 switch (arm_fpu_arch)
17472 case FPUTYPE_FPA:
17473 fpu_name = "fpa";
17474 break;
17475 case FPUTYPE_FPA_EMU2:
17476 fpu_name = "fpe2";
17477 break;
17478 case FPUTYPE_FPA_EMU3:
17479 fpu_name = "fpe3";
17480 break;
17481 case FPUTYPE_MAVERICK:
17482 fpu_name = "maverick";
17483 break;
17484 case FPUTYPE_VFP:
17485 fpu_name = "vfp";
17486 set_float_abi_attributes = 1;
17487 break;
17488 case FPUTYPE_VFP3:
17489 fpu_name = "vfp3";
17490 set_float_abi_attributes = 1;
17491 break;
17492 case FPUTYPE_NEON:
17493 fpu_name = "neon";
17494 set_float_abi_attributes = 1;
17495 break;
17496 default:
17497 abort();
17499 if (set_float_abi_attributes)
17501 if (TARGET_HARD_FLOAT)
17502 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
17503 if (TARGET_HARD_FLOAT_ABI)
17504 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
17507 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
17509 /* Some of these attributes only apply when the corresponding features
17510 are used. However we don't have any easy way of figuring this out.
17511 Conservatively record the setting that would have been used. */
17513 /* Tag_ABI_FP_rounding. */
17514 if (flag_rounding_math)
17515 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
17516 if (!flag_unsafe_math_optimizations)
17518 /* Tag_ABI_FP_denomal. */
17519 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
17520 /* Tag_ABI_FP_exceptions. */
17521 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
17523 /* Tag_ABI_FP_user_exceptions. */
17524 if (flag_signaling_nans)
17525 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
17526 /* Tag_ABI_FP_number_model. */
17527 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
17528 flag_finite_math_only ? 1 : 3);
17530 /* Tag_ABI_align8_needed. */
17531 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
17532 /* Tag_ABI_align8_preserved. */
17533 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
17534 /* Tag_ABI_enum_size. */
17535 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
17536 flag_short_enums ? 1 : 2);
17538 /* Tag_ABI_optimization_goals. */
17539 if (optimize_size)
17540 val = 4;
17541 else if (optimize >= 2)
17542 val = 2;
17543 else if (optimize)
17544 val = 1;
17545 else
17546 val = 6;
17547 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
17549 if (arm_lang_output_object_attributes_hook)
17550 arm_lang_output_object_attributes_hook();
17552 default_file_start();
17555 static void
17556 arm_file_end (void)
17558 int regno;
17560 if (NEED_INDICATE_EXEC_STACK)
17561 /* Add .note.GNU-stack. */
17562 file_end_indicate_exec_stack ();
17564 if (! thumb_call_reg_needed)
17565 return;
17567 switch_to_section (text_section);
17568 asm_fprintf (asm_out_file, "\t.code 16\n");
17569 ASM_OUTPUT_ALIGN (asm_out_file, 1);
17571 for (regno = 0; regno < LR_REGNUM; regno++)
17573 rtx label = thumb_call_via_label[regno];
17575 if (label != 0)
17577 targetm.asm_out.internal_label (asm_out_file, "L",
17578 CODE_LABEL_NUMBER (label));
17579 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
17584 #ifndef ARM_PE
17585 /* Symbols in the text segment can be accessed without indirecting via the
17586 constant pool; it may take an extra binary operation, but this is still
17587 faster than indirecting via memory. Don't do this when not optimizing,
17588 since we won't be calculating al of the offsets necessary to do this
17589 simplification. */
17591 static void
17592 arm_encode_section_info (tree decl, rtx rtl, int first)
17594 if (optimize > 0 && TREE_CONSTANT (decl))
17595 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
17597 default_encode_section_info (decl, rtl, first);
17599 #endif /* !ARM_PE */
17601 static void
17602 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
17604 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
17605 && !strcmp (prefix, "L"))
17607 arm_ccfsm_state = 0;
17608 arm_target_insn = NULL;
17610 default_internal_label (stream, prefix, labelno);
17613 /* Output code to add DELTA to the first argument, and then jump
17614 to FUNCTION. Used for C++ multiple inheritance. */
17615 static void
17616 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
17617 HOST_WIDE_INT delta,
17618 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
17619 tree function)
17621 static int thunk_label = 0;
17622 char label[256];
17623 char labelpc[256];
17624 int mi_delta = delta;
17625 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
17626 int shift = 0;
17627 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
17628 ? 1 : 0);
17629 if (mi_delta < 0)
17630 mi_delta = - mi_delta;
17631 /* When generating 16-bit thumb code, thunks are entered in arm mode. */
17632 if (TARGET_THUMB1)
17634 int labelno = thunk_label++;
17635 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
17636 fputs ("\tldr\tr12, ", file);
17637 assemble_name (file, label);
17638 fputc ('\n', file);
17639 if (flag_pic)
17641 /* If we are generating PIC, the ldr instruction below loads
17642 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
17643 the address of the add + 8, so we have:
17645 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
17646 = target + 1.
17648 Note that we have "+ 1" because some versions of GNU ld
17649 don't set the low bit of the result for R_ARM_REL32
17650 relocations against thumb function symbols. */
17651 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
17652 assemble_name (file, labelpc);
17653 fputs (":\n", file);
17654 fputs ("\tadd\tr12, pc, r12\n", file);
17657 /* TODO: Use movw/movt for large constants when available. */
17658 while (mi_delta != 0)
17660 if ((mi_delta & (3 << shift)) == 0)
17661 shift += 2;
17662 else
17664 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17665 mi_op, this_regno, this_regno,
17666 mi_delta & (0xff << shift));
17667 mi_delta &= ~(0xff << shift);
17668 shift += 8;
17671 if (TARGET_THUMB1)
17673 fprintf (file, "\tbx\tr12\n");
17674 ASM_OUTPUT_ALIGN (file, 2);
17675 assemble_name (file, label);
17676 fputs (":\n", file);
17677 if (flag_pic)
17679 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
17680 rtx tem = XEXP (DECL_RTL (function), 0);
17681 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
17682 tem = gen_rtx_MINUS (GET_MODE (tem),
17683 tem,
17684 gen_rtx_SYMBOL_REF (Pmode,
17685 ggc_strdup (labelpc)));
17686 assemble_integer (tem, 4, BITS_PER_WORD, 1);
17688 else
17689 /* Output ".word .LTHUNKn". */
17690 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
17692 else
17694 fputs ("\tb\t", file);
17695 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
17696 if (NEED_PLT_RELOC)
17697 fputs ("(PLT)", file);
17698 fputc ('\n', file);
17703 arm_emit_vector_const (FILE *file, rtx x)
17705 int i;
17706 const char * pattern;
17708 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17710 switch (GET_MODE (x))
17712 case V2SImode: pattern = "%08x"; break;
17713 case V4HImode: pattern = "%04x"; break;
17714 case V8QImode: pattern = "%02x"; break;
17715 default: gcc_unreachable ();
17718 fprintf (file, "0x");
17719 for (i = CONST_VECTOR_NUNITS (x); i--;)
17721 rtx element;
17723 element = CONST_VECTOR_ELT (x, i);
17724 fprintf (file, pattern, INTVAL (element));
17727 return 1;
17730 const char *
17731 arm_output_load_gr (rtx *operands)
17733 rtx reg;
17734 rtx offset;
17735 rtx wcgr;
17736 rtx sum;
17738 if (GET_CODE (operands [1]) != MEM
17739 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
17740 || GET_CODE (reg = XEXP (sum, 0)) != REG
17741 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
17742 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
17743 return "wldrw%?\t%0, %1";
17745 /* Fix up an out-of-range load of a GR register. */
17746 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
17747 wcgr = operands[0];
17748 operands[0] = reg;
17749 output_asm_insn ("ldr%?\t%0, %1", operands);
17751 operands[0] = wcgr;
17752 operands[1] = reg;
17753 output_asm_insn ("tmcr%?\t%0, %1", operands);
17754 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
17756 return "";
17759 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
17761 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
17762 named arg and all anonymous args onto the stack.
17763 XXX I know the prologue shouldn't be pushing registers, but it is faster
17764 that way. */
17766 static void
17767 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
17768 enum machine_mode mode ATTRIBUTE_UNUSED,
17769 tree type ATTRIBUTE_UNUSED,
17770 int *pretend_size,
17771 int second_time ATTRIBUTE_UNUSED)
17773 cfun->machine->uses_anonymous_args = 1;
17774 if (cum->nregs < NUM_ARG_REGS)
17775 *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
17778 /* Return nonzero if the CONSUMER instruction (a store) does not need
17779 PRODUCER's value to calculate the address. */
17782 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
17784 rtx value = PATTERN (producer);
17785 rtx addr = PATTERN (consumer);
17787 if (GET_CODE (value) == COND_EXEC)
17788 value = COND_EXEC_CODE (value);
17789 if (GET_CODE (value) == PARALLEL)
17790 value = XVECEXP (value, 0, 0);
17791 value = XEXP (value, 0);
17792 if (GET_CODE (addr) == COND_EXEC)
17793 addr = COND_EXEC_CODE (addr);
17794 if (GET_CODE (addr) == PARALLEL)
17795 addr = XVECEXP (addr, 0, 0);
17796 addr = XEXP (addr, 0);
17798 return !reg_overlap_mentioned_p (value, addr);
17801 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
17802 have an early register shift value or amount dependency on the
17803 result of PRODUCER. */
17806 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
17808 rtx value = PATTERN (producer);
17809 rtx op = PATTERN (consumer);
17810 rtx early_op;
17812 if (GET_CODE (value) == COND_EXEC)
17813 value = COND_EXEC_CODE (value);
17814 if (GET_CODE (value) == PARALLEL)
17815 value = XVECEXP (value, 0, 0);
17816 value = XEXP (value, 0);
17817 if (GET_CODE (op) == COND_EXEC)
17818 op = COND_EXEC_CODE (op);
17819 if (GET_CODE (op) == PARALLEL)
17820 op = XVECEXP (op, 0, 0);
17821 op = XEXP (op, 1);
17823 early_op = XEXP (op, 0);
17824 /* This is either an actual independent shift, or a shift applied to
17825 the first operand of another operation. We want the whole shift
17826 operation. */
17827 if (GET_CODE (early_op) == REG)
17828 early_op = op;
17830 return !reg_overlap_mentioned_p (value, early_op);
17833 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
17834 have an early register shift value dependency on the result of
17835 PRODUCER. */
17838 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
17840 rtx value = PATTERN (producer);
17841 rtx op = PATTERN (consumer);
17842 rtx early_op;
17844 if (GET_CODE (value) == COND_EXEC)
17845 value = COND_EXEC_CODE (value);
17846 if (GET_CODE (value) == PARALLEL)
17847 value = XVECEXP (value, 0, 0);
17848 value = XEXP (value, 0);
17849 if (GET_CODE (op) == COND_EXEC)
17850 op = COND_EXEC_CODE (op);
17851 if (GET_CODE (op) == PARALLEL)
17852 op = XVECEXP (op, 0, 0);
17853 op = XEXP (op, 1);
17855 early_op = XEXP (op, 0);
17857 /* This is either an actual independent shift, or a shift applied to
17858 the first operand of another operation. We want the value being
17859 shifted, in either case. */
17860 if (GET_CODE (early_op) != REG)
17861 early_op = XEXP (early_op, 0);
17863 return !reg_overlap_mentioned_p (value, early_op);
17866 /* Return nonzero if the CONSUMER (a mul or mac op) does not
17867 have an early register mult dependency on the result of
17868 PRODUCER. */
17871 arm_no_early_mul_dep (rtx producer, rtx consumer)
17873 rtx value = PATTERN (producer);
17874 rtx op = PATTERN (consumer);
17876 if (GET_CODE (value) == COND_EXEC)
17877 value = COND_EXEC_CODE (value);
17878 if (GET_CODE (value) == PARALLEL)
17879 value = XVECEXP (value, 0, 0);
17880 value = XEXP (value, 0);
17881 if (GET_CODE (op) == COND_EXEC)
17882 op = COND_EXEC_CODE (op);
17883 if (GET_CODE (op) == PARALLEL)
17884 op = XVECEXP (op, 0, 0);
17885 op = XEXP (op, 1);
17887 return (GET_CODE (op) == PLUS
17888 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
17891 /* We can't rely on the caller doing the proper promotion when
17892 using APCS or ATPCS. */
17894 static bool
17895 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
17897 return !TARGET_AAPCS_BASED;
17901 /* AAPCS based ABIs use short enums by default. */
17903 static bool
17904 arm_default_short_enums (void)
17906 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
17910 /* AAPCS requires that anonymous bitfields affect structure alignment. */
17912 static bool
17913 arm_align_anon_bitfield (void)
17915 return TARGET_AAPCS_BASED;
17919 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
17921 static tree
17922 arm_cxx_guard_type (void)
17924 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
17927 /* Return non-zero if the consumer (a multiply-accumulate instruction)
17928 has an accumulator dependency on the result of the producer (a
17929 multiplication instruction) and no other dependency on that result. */
17931 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
17933 rtx mul = PATTERN (producer);
17934 rtx mac = PATTERN (consumer);
17935 rtx mul_result;
17936 rtx mac_op0, mac_op1, mac_acc;
17938 if (GET_CODE (mul) == COND_EXEC)
17939 mul = COND_EXEC_CODE (mul);
17940 if (GET_CODE (mac) == COND_EXEC)
17941 mac = COND_EXEC_CODE (mac);
17943 /* Check that mul is of the form (set (...) (mult ...))
17944 and mla is of the form (set (...) (plus (mult ...) (...))). */
17945 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
17946 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
17947 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
17948 return 0;
17950 mul_result = XEXP (mul, 0);
17951 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
17952 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
17953 mac_acc = XEXP (XEXP (mac, 1), 1);
17955 return (reg_overlap_mentioned_p (mul_result, mac_acc)
17956 && !reg_overlap_mentioned_p (mul_result, mac_op0)
17957 && !reg_overlap_mentioned_p (mul_result, mac_op1));
17961 /* The EABI says test the least significant bit of a guard variable. */
17963 static bool
17964 arm_cxx_guard_mask_bit (void)
17966 return TARGET_AAPCS_BASED;
17970 /* The EABI specifies that all array cookies are 8 bytes long. */
17972 static tree
17973 arm_get_cookie_size (tree type)
17975 tree size;
17977 if (!TARGET_AAPCS_BASED)
17978 return default_cxx_get_cookie_size (type);
17980 size = build_int_cst (sizetype, 8);
17981 return size;
17985 /* The EABI says that array cookies should also contain the element size. */
17987 static bool
17988 arm_cookie_has_size (void)
17990 return TARGET_AAPCS_BASED;
17994 /* The EABI says constructors and destructors should return a pointer to
17995 the object constructed/destroyed. */
17997 static bool
17998 arm_cxx_cdtor_returns_this (void)
18000 return TARGET_AAPCS_BASED;
18003 /* The EABI says that an inline function may never be the key
18004 method. */
18006 static bool
18007 arm_cxx_key_method_may_be_inline (void)
18009 return !TARGET_AAPCS_BASED;
18012 static void
18013 arm_cxx_determine_class_data_visibility (tree decl)
18015 if (!TARGET_AAPCS_BASED)
18016 return;
18018 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18019 is exported. However, on systems without dynamic vague linkage,
18020 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18021 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18022 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18023 else
18024 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18025 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18028 static bool
18029 arm_cxx_class_data_always_comdat (void)
18031 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18032 vague linkage if the class has no key function. */
18033 return !TARGET_AAPCS_BASED;
18037 /* The EABI says __aeabi_atexit should be used to register static
18038 destructors. */
18040 static bool
18041 arm_cxx_use_aeabi_atexit (void)
18043 return TARGET_AAPCS_BASED;
18047 void
18048 arm_set_return_address (rtx source, rtx scratch)
18050 arm_stack_offsets *offsets;
18051 HOST_WIDE_INT delta;
18052 rtx addr;
18053 unsigned long saved_regs;
18055 saved_regs = arm_compute_save_reg_mask ();
18057 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18058 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18059 else
18061 if (frame_pointer_needed)
18062 addr = plus_constant(hard_frame_pointer_rtx, -4);
18063 else
18065 /* LR will be the first saved register. */
18066 offsets = arm_get_frame_offsets ();
18067 delta = offsets->outgoing_args - (offsets->frame + 4);
18070 if (delta >= 4096)
18072 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18073 GEN_INT (delta & ~4095)));
18074 addr = scratch;
18075 delta &= 4095;
18077 else
18078 addr = stack_pointer_rtx;
18080 addr = plus_constant (addr, delta);
18082 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18087 void
18088 thumb_set_return_address (rtx source, rtx scratch)
18090 arm_stack_offsets *offsets;
18091 HOST_WIDE_INT delta;
18092 HOST_WIDE_INT limit;
18093 int reg;
18094 rtx addr;
18095 unsigned long mask;
18097 emit_insn (gen_rtx_USE (VOIDmode, source));
18099 mask = thumb1_compute_save_reg_mask ();
18100 if (mask & (1 << LR_REGNUM))
18102 offsets = arm_get_frame_offsets ();
18104 limit = 1024;
18105 /* Find the saved regs. */
18106 if (frame_pointer_needed)
18108 delta = offsets->soft_frame - offsets->saved_args;
18109 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18110 if (TARGET_THUMB1)
18111 limit = 128;
18113 else
18115 delta = offsets->outgoing_args - offsets->saved_args;
18116 reg = SP_REGNUM;
18118 /* Allow for the stack frame. */
18119 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18120 delta -= 16;
18121 /* The link register is always the first saved register. */
18122 delta -= 4;
18124 /* Construct the address. */
18125 addr = gen_rtx_REG (SImode, reg);
18126 if (delta > limit)
18128 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
18129 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
18130 addr = scratch;
18132 else
18133 addr = plus_constant (addr, delta);
18135 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18137 else
18138 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18141 /* Implements target hook vector_mode_supported_p. */
18142 bool
18143 arm_vector_mode_supported_p (enum machine_mode mode)
18145 /* Neon also supports V2SImode, etc. listed in the clause below. */
18146 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18147 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18148 return true;
18150 if ((mode == V2SImode)
18151 || (mode == V4HImode)
18152 || (mode == V8QImode))
18153 return true;
18155 return false;
18158 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18159 ARM insns and therefore guarantee that the shift count is modulo 256.
18160 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18161 guarantee no particular behavior for out-of-range counts. */
18163 static unsigned HOST_WIDE_INT
18164 arm_shift_truncation_mask (enum machine_mode mode)
18166 return mode == SImode ? 255 : 0;
18170 /* Map internal gcc register numbers to DWARF2 register numbers. */
18172 unsigned int
18173 arm_dbx_register_number (unsigned int regno)
18175 if (regno < 16)
18176 return regno;
18178 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18179 compatibility. The EABI defines them as registers 96-103. */
18180 if (IS_FPA_REGNUM (regno))
18181 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
18183 /* FIXME: VFPv3 register numbering. */
18184 if (IS_VFP_REGNUM (regno))
18185 return 64 + regno - FIRST_VFP_REGNUM;
18187 if (IS_IWMMXT_GR_REGNUM (regno))
18188 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
18190 if (IS_IWMMXT_REGNUM (regno))
18191 return 112 + regno - FIRST_IWMMXT_REGNUM;
18193 gcc_unreachable ();
18197 #ifdef TARGET_UNWIND_INFO
18198 /* Emit unwind directives for a store-multiple instruction or stack pointer
18199 push during alignment.
18200 These should only ever be generated by the function prologue code, so
18201 expect them to have a particular form. */
18203 static void
18204 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
18206 int i;
18207 HOST_WIDE_INT offset;
18208 HOST_WIDE_INT nregs;
18209 int reg_size;
18210 unsigned reg;
18211 unsigned lastreg;
18212 rtx e;
18214 e = XVECEXP (p, 0, 0);
18215 if (GET_CODE (e) != SET)
18216 abort ();
18218 /* First insn will adjust the stack pointer. */
18219 if (GET_CODE (e) != SET
18220 || GET_CODE (XEXP (e, 0)) != REG
18221 || REGNO (XEXP (e, 0)) != SP_REGNUM
18222 || GET_CODE (XEXP (e, 1)) != PLUS)
18223 abort ();
18225 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
18226 nregs = XVECLEN (p, 0) - 1;
18228 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
18229 if (reg < 16)
18231 /* The function prologue may also push pc, but not annotate it as it is
18232 never restored. We turn this into a stack pointer adjustment. */
18233 if (nregs * 4 == offset - 4)
18235 fprintf (asm_out_file, "\t.pad #4\n");
18236 offset -= 4;
18238 reg_size = 4;
18239 fprintf (asm_out_file, "\t.save {");
18241 else if (IS_VFP_REGNUM (reg))
18243 reg_size = 8;
18244 fprintf (asm_out_file, "\t.vsave {");
18246 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
18248 /* FPA registers are done differently. */
18249 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
18250 return;
18252 else
18253 /* Unknown register type. */
18254 abort ();
18256 /* If the stack increment doesn't match the size of the saved registers,
18257 something has gone horribly wrong. */
18258 if (offset != nregs * reg_size)
18259 abort ();
18261 offset = 0;
18262 lastreg = 0;
18263 /* The remaining insns will describe the stores. */
18264 for (i = 1; i <= nregs; i++)
18266 /* Expect (set (mem <addr>) (reg)).
18267 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18268 e = XVECEXP (p, 0, i);
18269 if (GET_CODE (e) != SET
18270 || GET_CODE (XEXP (e, 0)) != MEM
18271 || GET_CODE (XEXP (e, 1)) != REG)
18272 abort ();
18274 reg = REGNO (XEXP (e, 1));
18275 if (reg < lastreg)
18276 abort ();
18278 if (i != 1)
18279 fprintf (asm_out_file, ", ");
18280 /* We can't use %r for vfp because we need to use the
18281 double precision register names. */
18282 if (IS_VFP_REGNUM (reg))
18283 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
18284 else
18285 asm_fprintf (asm_out_file, "%r", reg);
18287 #ifdef ENABLE_CHECKING
18288 /* Check that the addresses are consecutive. */
18289 e = XEXP (XEXP (e, 0), 0);
18290 if (GET_CODE (e) == PLUS)
18292 offset += reg_size;
18293 if (GET_CODE (XEXP (e, 0)) != REG
18294 || REGNO (XEXP (e, 0)) != SP_REGNUM
18295 || GET_CODE (XEXP (e, 1)) != CONST_INT
18296 || offset != INTVAL (XEXP (e, 1)))
18297 abort ();
18299 else if (i != 1
18300 || GET_CODE (e) != REG
18301 || REGNO (e) != SP_REGNUM)
18302 abort ();
18303 #endif
18305 fprintf (asm_out_file, "}\n");
18308 /* Emit unwind directives for a SET. */
18310 static void
18311 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
18313 rtx e0;
18314 rtx e1;
18315 unsigned reg;
18317 e0 = XEXP (p, 0);
18318 e1 = XEXP (p, 1);
18319 switch (GET_CODE (e0))
18321 case MEM:
18322 /* Pushing a single register. */
18323 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
18324 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
18325 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
18326 abort ();
18328 asm_fprintf (asm_out_file, "\t.save ");
18329 if (IS_VFP_REGNUM (REGNO (e1)))
18330 asm_fprintf(asm_out_file, "{d%d}\n",
18331 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
18332 else
18333 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
18334 break;
18336 case REG:
18337 if (REGNO (e0) == SP_REGNUM)
18339 /* A stack increment. */
18340 if (GET_CODE (e1) != PLUS
18341 || GET_CODE (XEXP (e1, 0)) != REG
18342 || REGNO (XEXP (e1, 0)) != SP_REGNUM
18343 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18344 abort ();
18346 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
18347 -INTVAL (XEXP (e1, 1)));
18349 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
18351 HOST_WIDE_INT offset;
18353 if (GET_CODE (e1) == PLUS)
18355 if (GET_CODE (XEXP (e1, 0)) != REG
18356 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18357 abort ();
18358 reg = REGNO (XEXP (e1, 0));
18359 offset = INTVAL (XEXP (e1, 1));
18360 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
18361 HARD_FRAME_POINTER_REGNUM, reg,
18362 INTVAL (XEXP (e1, 1)));
18364 else if (GET_CODE (e1) == REG)
18366 reg = REGNO (e1);
18367 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
18368 HARD_FRAME_POINTER_REGNUM, reg);
18370 else
18371 abort ();
18373 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
18375 /* Move from sp to reg. */
18376 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
18378 else if (GET_CODE (e1) == PLUS
18379 && GET_CODE (XEXP (e1, 0)) == REG
18380 && REGNO (XEXP (e1, 0)) == SP_REGNUM
18381 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
18383 /* Set reg to offset from sp. */
18384 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
18385 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
18387 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
18389 /* Stack pointer save before alignment. */
18390 reg = REGNO (e0);
18391 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18392 reg + 0x90, reg);
18394 else
18395 abort ();
18396 break;
18398 default:
18399 abort ();
18404 /* Emit unwind directives for the given insn. */
18406 static void
18407 arm_unwind_emit (FILE * asm_out_file, rtx insn)
18409 rtx pat;
18411 if (!ARM_EABI_UNWIND_TABLES)
18412 return;
18414 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
18415 return;
18417 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
18418 if (pat)
18419 pat = XEXP (pat, 0);
18420 else
18421 pat = PATTERN (insn);
18423 switch (GET_CODE (pat))
18425 case SET:
18426 arm_unwind_emit_set (asm_out_file, pat);
18427 break;
18429 case SEQUENCE:
18430 /* Store multiple. */
18431 arm_unwind_emit_sequence (asm_out_file, pat);
18432 break;
18434 default:
18435 abort();
18440 /* Output a reference from a function exception table to the type_info
18441 object X. The EABI specifies that the symbol should be relocated by
18442 an R_ARM_TARGET2 relocation. */
18444 static bool
18445 arm_output_ttype (rtx x)
18447 fputs ("\t.word\t", asm_out_file);
18448 output_addr_const (asm_out_file, x);
18449 /* Use special relocations for symbol references. */
18450 if (GET_CODE (x) != CONST_INT)
18451 fputs ("(TARGET2)", asm_out_file);
18452 fputc ('\n', asm_out_file);
18454 return TRUE;
18456 #endif /* TARGET_UNWIND_INFO */
18459 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18460 stack alignment. */
18462 static void
18463 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
18465 rtx unspec = SET_SRC (pattern);
18466 gcc_assert (GET_CODE (unspec) == UNSPEC);
18468 switch (index)
18470 case UNSPEC_STACK_ALIGN:
18471 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18472 put anything on the stack, so hopefully it won't matter.
18473 CFA = SP will be correct after alignment. */
18474 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
18475 SET_DEST (pattern));
18476 break;
18477 default:
18478 gcc_unreachable ();
18483 /* Output unwind directives for the start/end of a function. */
18485 void
18486 arm_output_fn_unwind (FILE * f, bool prologue)
18488 if (!ARM_EABI_UNWIND_TABLES)
18489 return;
18491 if (prologue)
18492 fputs ("\t.fnstart\n", f);
18493 else
18494 fputs ("\t.fnend\n", f);
18497 static bool
18498 arm_emit_tls_decoration (FILE *fp, rtx x)
18500 enum tls_reloc reloc;
18501 rtx val;
18503 val = XVECEXP (x, 0, 0);
18504 reloc = INTVAL (XVECEXP (x, 0, 1));
18506 output_addr_const (fp, val);
18508 switch (reloc)
18510 case TLS_GD32:
18511 fputs ("(tlsgd)", fp);
18512 break;
18513 case TLS_LDM32:
18514 fputs ("(tlsldm)", fp);
18515 break;
18516 case TLS_LDO32:
18517 fputs ("(tlsldo)", fp);
18518 break;
18519 case TLS_IE32:
18520 fputs ("(gottpoff)", fp);
18521 break;
18522 case TLS_LE32:
18523 fputs ("(tpoff)", fp);
18524 break;
18525 default:
18526 gcc_unreachable ();
18529 switch (reloc)
18531 case TLS_GD32:
18532 case TLS_LDM32:
18533 case TLS_IE32:
18534 fputs (" + (. - ", fp);
18535 output_addr_const (fp, XVECEXP (x, 0, 2));
18536 fputs (" - ", fp);
18537 output_addr_const (fp, XVECEXP (x, 0, 3));
18538 fputc (')', fp);
18539 break;
18540 default:
18541 break;
18544 return TRUE;
18547 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18549 static void
18550 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
18552 gcc_assert (size == 4);
18553 fputs ("\t.word\t", file);
18554 output_addr_const (file, x);
18555 fputs ("(tlsldo)", file);
18558 bool
18559 arm_output_addr_const_extra (FILE *fp, rtx x)
18561 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
18562 return arm_emit_tls_decoration (fp, x);
18563 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
18565 char label[256];
18566 int labelno = INTVAL (XVECEXP (x, 0, 0));
18568 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
18569 assemble_name_raw (fp, label);
18571 return TRUE;
18573 else if (GET_CODE (x) == CONST_VECTOR)
18574 return arm_emit_vector_const (fp, x);
18576 return FALSE;
18579 /* Output assembly for a shift instruction.
18580 SET_FLAGS determines how the instruction modifies the condition codes.
18581 0 - Do not set condition codes.
18582 1 - Set condition codes.
18583 2 - Use smallest instruction. */
18584 const char *
18585 arm_output_shift(rtx * operands, int set_flags)
18587 char pattern[100];
18588 static const char flag_chars[3] = {'?', '.', '!'};
18589 const char *shift;
18590 HOST_WIDE_INT val;
18591 char c;
18593 c = flag_chars[set_flags];
18594 if (TARGET_UNIFIED_ASM)
18596 shift = shift_op(operands[3], &val);
18597 if (shift)
18599 if (val != -1)
18600 operands[2] = GEN_INT(val);
18601 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
18603 else
18604 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
18606 else
18607 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
18608 output_asm_insn (pattern, operands);
18609 return "";
18612 /* Output a Thumb-2 casesi instruction. */
18613 const char *
18614 thumb2_output_casesi (rtx *operands)
18616 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
18618 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
18620 output_asm_insn ("cmp\t%0, %1", operands);
18621 output_asm_insn ("bhi\t%l3", operands);
18622 switch (GET_MODE(diff_vec))
18624 case QImode:
18625 return "tbb\t[%|pc, %0]";
18626 case HImode:
18627 return "tbh\t[%|pc, %0, lsl #1]";
18628 case SImode:
18629 if (flag_pic)
18631 output_asm_insn ("adr\t%4, %l2", operands);
18632 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
18633 output_asm_insn ("add\t%4, %4, %5", operands);
18634 return "bx\t%4";
18636 else
18638 output_asm_insn ("adr\t%4, %l2", operands);
18639 return "ldr\t%|pc, [%4, %0, lsl #2]";
18641 default:
18642 gcc_unreachable ();
18646 /* A table and a function to perform ARM-specific name mangling for
18647 NEON vector types in order to conform to the AAPCS (see "Procedure
18648 Call Standard for the ARM Architecture", Appendix A). To qualify
18649 for emission with the mangled names defined in that document, a
18650 vector type must not only be of the correct mode but also be
18651 composed of NEON vector element types (e.g. __builtin_neon_qi). */
18652 typedef struct
18654 enum machine_mode mode;
18655 const char *element_type_name;
18656 const char *aapcs_name;
18657 } arm_mangle_map_entry;
18659 static arm_mangle_map_entry arm_mangle_map[] = {
18660 /* 64-bit containerized types. */
18661 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
18662 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
18663 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
18664 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
18665 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
18666 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
18667 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
18668 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
18669 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
18670 /* 128-bit containerized types. */
18671 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
18672 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
18673 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
18674 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
18675 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
18676 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
18677 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
18678 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
18679 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
18680 { VOIDmode, NULL, NULL }
18683 const char *
18684 arm_mangle_type (const_tree type)
18686 arm_mangle_map_entry *pos = arm_mangle_map;
18688 if (TREE_CODE (type) != VECTOR_TYPE)
18689 return NULL;
18691 /* Check the mode of the vector type, and the name of the vector
18692 element type, against the table. */
18693 while (pos->mode != VOIDmode)
18695 tree elt_type = TREE_TYPE (type);
18697 if (pos->mode == TYPE_MODE (type)
18698 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
18699 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
18700 pos->element_type_name))
18701 return pos->aapcs_name;
18703 pos++;
18706 /* Use the default mangling for unrecognized (possibly user-defined)
18707 vector types. */
18708 return NULL;
18711 #include "gt-arm.h"