Merged with mainline at revision 128810.
[official-gcc.git] / gcc / config / arm / arm.c
blobe4b30b3ca580f359797e86605fab189f8dec5693
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 3, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "obstack.h"
31 #include "regs.h"
32 #include "hard-reg-set.h"
33 #include "real.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "flags.h"
39 #include "reload.h"
40 #include "function.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "recog.h"
45 #include "ggc.h"
46 #include "except.h"
47 #include "c-pragma.h"
48 #include "integrate.h"
49 #include "tm_p.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "debug.h"
53 #include "langhooks.h"
54 #include "df.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 /* Forward function declarations. */
63 static arm_stack_offsets *arm_get_frame_offsets (void);
64 static void arm_add_gc_roots (void);
65 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
66 HOST_WIDE_INT, rtx, rtx, int, int);
67 static unsigned bit_count (unsigned long);
68 static int arm_address_register_rtx_p (rtx, int);
69 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
70 static int thumb2_legitimate_index_p (enum machine_mode, rtx, int);
71 static int thumb1_base_register_rtx_p (rtx, enum machine_mode, int);
72 inline static int thumb1_index_register_rtx_p (rtx, int);
73 static int thumb_far_jump_used_p (void);
74 static bool thumb_force_lr_save (void);
75 static unsigned long thumb1_compute_save_reg_mask (void);
76 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
77 static rtx emit_sfm (int, int);
78 static int arm_size_return_regs (void);
79 static bool arm_assemble_integer (rtx, unsigned int, int);
80 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
81 static arm_cc get_arm_condition_code (rtx);
82 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
83 static rtx is_jump_table (rtx);
84 static const char *output_multi_immediate (rtx *, const char *, const char *,
85 int, HOST_WIDE_INT);
86 static const char *shift_op (rtx, HOST_WIDE_INT *);
87 static struct machine_function *arm_init_machine_status (void);
88 static void thumb_exit (FILE *, int);
89 static rtx is_jump_table (rtx);
90 static HOST_WIDE_INT get_jump_table_size (rtx);
91 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
92 static Mnode *add_minipool_forward_ref (Mfix *);
93 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
94 static Mnode *add_minipool_backward_ref (Mfix *);
95 static void assign_minipool_offsets (Mfix *);
96 static void arm_print_value (FILE *, rtx);
97 static void dump_minipool (rtx);
98 static int arm_barrier_cost (rtx);
99 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
100 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
101 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
102 rtx);
103 static void arm_reorg (void);
104 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
105 static unsigned long arm_compute_save_reg0_reg12_mask (void);
106 static unsigned long arm_compute_save_reg_mask (void);
107 static unsigned long arm_isr_value (tree);
108 static unsigned long arm_compute_func_type (void);
109 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
110 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
111 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
112 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
113 #endif
114 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
115 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
116 static void thumb1_output_function_prologue (FILE *, HOST_WIDE_INT);
117 static int arm_comp_type_attributes (const_tree, const_tree);
118 static void arm_set_default_type_attributes (tree);
119 static int arm_adjust_cost (rtx, rtx, rtx, int);
120 static int count_insns_for_constant (HOST_WIDE_INT, int);
121 static int arm_get_strip_length (int);
122 static bool arm_function_ok_for_sibcall (tree, tree);
123 static void arm_internal_label (FILE *, const char *, unsigned long);
124 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
125 tree);
126 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
127 static bool arm_size_rtx_costs (rtx, int, int, int *);
128 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
129 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
130 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
131 static bool arm_9e_rtx_costs (rtx, int, int, int *);
132 static int arm_address_cost (rtx);
133 static bool arm_memory_load_p (rtx);
134 static bool arm_cirrus_insn_p (rtx);
135 static void cirrus_reorg (rtx);
136 static void arm_init_builtins (void);
137 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
138 static void arm_init_iwmmxt_builtins (void);
139 static rtx safe_vector_operand (rtx, enum machine_mode);
140 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
141 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
142 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
143 static void emit_constant_insn (rtx cond, rtx pattern);
144 static rtx emit_set_insn (rtx, rtx);
145 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
146 tree, bool);
148 #ifdef OBJECT_FORMAT_ELF
149 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
150 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
151 #endif
152 #ifndef ARM_PE
153 static void arm_encode_section_info (tree, rtx, int);
154 #endif
156 static void arm_file_end (void);
157 static void arm_file_start (void);
159 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
160 tree, int *, int);
161 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
162 enum machine_mode, const_tree, bool);
163 static bool arm_promote_prototypes (const_tree);
164 static bool arm_default_short_enums (void);
165 static bool arm_align_anon_bitfield (void);
166 static bool arm_return_in_msb (const_tree);
167 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
168 #ifdef TARGET_UNWIND_INFO
169 static void arm_unwind_emit (FILE *, rtx);
170 static bool arm_output_ttype (rtx);
171 #endif
172 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
174 static tree arm_cxx_guard_type (void);
175 static bool arm_cxx_guard_mask_bit (void);
176 static tree arm_get_cookie_size (tree);
177 static bool arm_cookie_has_size (void);
178 static bool arm_cxx_cdtor_returns_this (void);
179 static bool arm_cxx_key_method_may_be_inline (void);
180 static void arm_cxx_determine_class_data_visibility (tree);
181 static bool arm_cxx_class_data_always_comdat (void);
182 static bool arm_cxx_use_aeabi_atexit (void);
183 static void arm_init_libfuncs (void);
184 static bool arm_handle_option (size_t, const char *, int);
185 static void arm_target_help (void);
186 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
187 static bool arm_cannot_copy_insn_p (rtx);
188 static bool arm_tls_symbol_p (rtx x);
189 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
192 /* Initialize the GCC target structure. */
193 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
194 #undef TARGET_MERGE_DECL_ATTRIBUTES
195 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
196 #endif
198 #undef TARGET_ATTRIBUTE_TABLE
199 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
201 #undef TARGET_ASM_FILE_START
202 #define TARGET_ASM_FILE_START arm_file_start
203 #undef TARGET_ASM_FILE_END
204 #define TARGET_ASM_FILE_END arm_file_end
206 #undef TARGET_ASM_ALIGNED_SI_OP
207 #define TARGET_ASM_ALIGNED_SI_OP NULL
208 #undef TARGET_ASM_INTEGER
209 #define TARGET_ASM_INTEGER arm_assemble_integer
211 #undef TARGET_ASM_FUNCTION_PROLOGUE
212 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
214 #undef TARGET_ASM_FUNCTION_EPILOGUE
215 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
217 #undef TARGET_DEFAULT_TARGET_FLAGS
218 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
219 #undef TARGET_HANDLE_OPTION
220 #define TARGET_HANDLE_OPTION arm_handle_option
221 #undef TARGET_HELP
222 #define TARGET_HELP arm_target_help
224 #undef TARGET_COMP_TYPE_ATTRIBUTES
225 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
227 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
228 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
230 #undef TARGET_SCHED_ADJUST_COST
231 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
233 #undef TARGET_ENCODE_SECTION_INFO
234 #ifdef ARM_PE
235 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
236 #else
237 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
238 #endif
240 #undef TARGET_STRIP_NAME_ENCODING
241 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
243 #undef TARGET_ASM_INTERNAL_LABEL
244 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
246 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
247 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
249 #undef TARGET_ASM_OUTPUT_MI_THUNK
250 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
251 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
252 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
254 /* This will be overridden in arm_override_options. */
255 #undef TARGET_RTX_COSTS
256 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
257 #undef TARGET_ADDRESS_COST
258 #define TARGET_ADDRESS_COST arm_address_cost
260 #undef TARGET_SHIFT_TRUNCATION_MASK
261 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
262 #undef TARGET_VECTOR_MODE_SUPPORTED_P
263 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
265 #undef TARGET_MACHINE_DEPENDENT_REORG
266 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
268 #undef TARGET_INIT_BUILTINS
269 #define TARGET_INIT_BUILTINS arm_init_builtins
270 #undef TARGET_EXPAND_BUILTIN
271 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
273 #undef TARGET_INIT_LIBFUNCS
274 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
276 #undef TARGET_PROMOTE_FUNCTION_ARGS
277 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
278 #undef TARGET_PROMOTE_FUNCTION_RETURN
279 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
280 #undef TARGET_PROMOTE_PROTOTYPES
281 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
282 #undef TARGET_PASS_BY_REFERENCE
283 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
284 #undef TARGET_ARG_PARTIAL_BYTES
285 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
287 #undef TARGET_SETUP_INCOMING_VARARGS
288 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
290 #undef TARGET_DEFAULT_SHORT_ENUMS
291 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
293 #undef TARGET_ALIGN_ANON_BITFIELD
294 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
296 #undef TARGET_NARROW_VOLATILE_BITFIELD
297 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
299 #undef TARGET_CXX_GUARD_TYPE
300 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
302 #undef TARGET_CXX_GUARD_MASK_BIT
303 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
305 #undef TARGET_CXX_GET_COOKIE_SIZE
306 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
308 #undef TARGET_CXX_COOKIE_HAS_SIZE
309 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
311 #undef TARGET_CXX_CDTOR_RETURNS_THIS
312 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
314 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
315 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
317 #undef TARGET_CXX_USE_AEABI_ATEXIT
318 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
320 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
321 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
322 arm_cxx_determine_class_data_visibility
324 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
325 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
327 #undef TARGET_RETURN_IN_MSB
328 #define TARGET_RETURN_IN_MSB arm_return_in_msb
330 #undef TARGET_MUST_PASS_IN_STACK
331 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
333 #ifdef TARGET_UNWIND_INFO
334 #undef TARGET_UNWIND_EMIT
335 #define TARGET_UNWIND_EMIT arm_unwind_emit
337 /* EABI unwinding tables use a different format for the typeinfo tables. */
338 #undef TARGET_ASM_TTYPE
339 #define TARGET_ASM_TTYPE arm_output_ttype
341 #undef TARGET_ARM_EABI_UNWINDER
342 #define TARGET_ARM_EABI_UNWINDER true
343 #endif /* TARGET_UNWIND_INFO */
345 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
346 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
348 #undef TARGET_CANNOT_COPY_INSN_P
349 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
351 #ifdef HAVE_AS_TLS
352 #undef TARGET_HAVE_TLS
353 #define TARGET_HAVE_TLS true
354 #endif
356 #undef TARGET_CANNOT_FORCE_CONST_MEM
357 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
359 #undef TARGET_MANGLE_TYPE
360 #define TARGET_MANGLE_TYPE arm_mangle_type
362 #ifdef HAVE_AS_TLS
363 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
364 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
365 #endif
367 struct gcc_target targetm = TARGET_INITIALIZER;
369 /* Obstack for minipool constant handling. */
370 static struct obstack minipool_obstack;
371 static char * minipool_startobj;
373 /* The maximum number of insns skipped which
374 will be conditionalised if possible. */
375 static int max_insns_skipped = 5;
377 extern FILE * asm_out_file;
379 /* True if we are currently building a constant table. */
380 int making_const_table;
382 /* Define the information needed to generate branch insns. This is
383 stored from the compare operation. */
384 rtx arm_compare_op0, arm_compare_op1;
386 /* The processor for which instructions should be scheduled. */
387 enum processor_type arm_tune = arm_none;
389 /* The default processor used if not overridden by commandline. */
390 static enum processor_type arm_default_cpu = arm_none;
392 /* Which floating point model to use. */
393 enum arm_fp_model arm_fp_model;
395 /* Which floating point hardware is available. */
396 enum fputype arm_fpu_arch;
398 /* Which floating point hardware to schedule for. */
399 enum fputype arm_fpu_tune;
401 /* Whether to use floating point hardware. */
402 enum float_abi_type arm_float_abi;
404 /* Which ABI to use. */
405 enum arm_abi_type arm_abi;
407 /* Which thread pointer model to use. */
408 enum arm_tp_type target_thread_pointer = TP_AUTO;
410 /* Used to parse -mstructure_size_boundary command line option. */
411 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
413 /* Used for Thumb call_via trampolines. */
414 rtx thumb_call_via_label[14];
415 static int thumb_call_reg_needed;
417 /* Bit values used to identify processor capabilities. */
418 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
419 #define FL_ARCH3M (1 << 1) /* Extended multiply */
420 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
421 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
422 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
423 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
424 #define FL_THUMB (1 << 6) /* Thumb aware */
425 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
426 #define FL_STRONG (1 << 8) /* StrongARM */
427 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
428 #define FL_XSCALE (1 << 10) /* XScale */
429 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
430 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
431 media instructions. */
432 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
433 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
434 Note: ARM6 & 7 derivatives only. */
435 #define FL_ARCH6K (1 << 15) /* Architecture rel 6 K extensions. */
436 #define FL_THUMB2 (1 << 16) /* Thumb-2. */
437 #define FL_NOTM (1 << 17) /* Instructions not present in the 'M'
438 profile. */
439 #define FL_DIV (1 << 18) /* Hardware divide. */
440 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
441 #define FL_NEON (1 << 20) /* Neon instructions. */
443 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
445 #define FL_FOR_ARCH2 FL_NOTM
446 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
447 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
448 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
449 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
450 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
451 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
452 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
453 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
454 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
455 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
456 #define FL_FOR_ARCH6J FL_FOR_ARCH6
457 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
458 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
459 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
460 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
461 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
462 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM)
463 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
464 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
466 /* The bits in this mask specify which
467 instructions we are allowed to generate. */
468 static unsigned long insn_flags = 0;
470 /* The bits in this mask specify which instruction scheduling options should
471 be used. */
472 static unsigned long tune_flags = 0;
474 /* The following are used in the arm.md file as equivalents to bits
475 in the above two flag variables. */
477 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
478 int arm_arch3m = 0;
480 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
481 int arm_arch4 = 0;
483 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
484 int arm_arch4t = 0;
486 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
487 int arm_arch5 = 0;
489 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
490 int arm_arch5e = 0;
492 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
493 int arm_arch6 = 0;
495 /* Nonzero if this chip supports the ARM 6K extensions. */
496 int arm_arch6k = 0;
498 /* Nonzero if instructions not present in the 'M' profile can be used. */
499 int arm_arch_notm = 0;
501 /* Nonzero if this chip can benefit from load scheduling. */
502 int arm_ld_sched = 0;
504 /* Nonzero if this chip is a StrongARM. */
505 int arm_tune_strongarm = 0;
507 /* Nonzero if this chip is a Cirrus variant. */
508 int arm_arch_cirrus = 0;
510 /* Nonzero if this chip supports Intel Wireless MMX technology. */
511 int arm_arch_iwmmxt = 0;
513 /* Nonzero if this chip is an XScale. */
514 int arm_arch_xscale = 0;
516 /* Nonzero if tuning for XScale */
517 int arm_tune_xscale = 0;
519 /* Nonzero if we want to tune for stores that access the write-buffer.
520 This typically means an ARM6 or ARM7 with MMU or MPU. */
521 int arm_tune_wbuf = 0;
523 /* Nonzero if generating Thumb instructions. */
524 int thumb_code = 0;
526 /* Nonzero if we should define __THUMB_INTERWORK__ in the
527 preprocessor.
528 XXX This is a bit of a hack, it's intended to help work around
529 problems in GLD which doesn't understand that armv5t code is
530 interworking clean. */
531 int arm_cpp_interwork = 0;
533 /* Nonzero if chip supports Thumb 2. */
534 int arm_arch_thumb2;
536 /* Nonzero if chip supports integer division instruction. */
537 int arm_arch_hwdiv;
539 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
540 must report the mode of the memory reference from PRINT_OPERAND to
541 PRINT_OPERAND_ADDRESS. */
542 enum machine_mode output_memory_reference_mode;
544 /* The register number to be used for the PIC offset register. */
545 unsigned arm_pic_register = INVALID_REGNUM;
547 /* Set to 1 when a return insn is output, this means that the epilogue
548 is not needed. */
549 int return_used_this_function;
551 /* Set to 1 after arm_reorg has started. Reset to start at the start of
552 the next function. */
553 static int after_arm_reorg = 0;
555 /* The maximum number of insns to be used when loading a constant. */
556 static int arm_constant_limit = 3;
558 /* For an explanation of these variables, see final_prescan_insn below. */
559 int arm_ccfsm_state;
560 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
561 enum arm_cond_code arm_current_cc;
562 rtx arm_target_insn;
563 int arm_target_label;
564 /* The number of conditionally executed insns, including the current insn. */
565 int arm_condexec_count = 0;
566 /* A bitmask specifying the patterns for the IT block.
567 Zero means do not output an IT block before this insn. */
568 int arm_condexec_mask = 0;
569 /* The number of bits used in arm_condexec_mask. */
570 int arm_condexec_masklen = 0;
572 /* The condition codes of the ARM, and the inverse function. */
573 static const char * const arm_condition_codes[] =
575 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
576 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
579 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
580 #define streq(string1, string2) (strcmp (string1, string2) == 0)
582 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
583 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
584 | (1 << PIC_OFFSET_TABLE_REGNUM)))
586 /* Initialization code. */
588 struct processors
590 const char *const name;
591 enum processor_type core;
592 const char *arch;
593 const unsigned long flags;
594 bool (* rtx_costs) (rtx, int, int, int *);
597 /* Not all of these give usefully different compilation alternatives,
598 but there is no simple way of generalizing them. */
599 static const struct processors all_cores[] =
601 /* ARM Cores */
602 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
603 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
604 #include "arm-cores.def"
605 #undef ARM_CORE
606 {NULL, arm_none, NULL, 0, NULL}
609 static const struct processors all_architectures[] =
611 /* ARM Architectures */
612 /* We don't specify rtx_costs here as it will be figured out
613 from the core. */
615 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
616 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
617 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
618 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
619 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
620 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
621 implementations that support it, so we will leave it out for now. */
622 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
623 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
624 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
625 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
626 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
627 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
628 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
629 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
630 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
631 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
632 {"armv6t2", arm1156t2s, "6T2", FL_CO_PROC | FL_FOR_ARCH6T2, NULL},
633 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
634 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
635 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
636 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
637 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
638 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
639 {NULL, arm_none, NULL, 0 , NULL}
642 struct arm_cpu_select
644 const char * string;
645 const char * name;
646 const struct processors * processors;
649 /* This is a magic structure. The 'string' field is magically filled in
650 with a pointer to the value specified by the user on the command line
651 assuming that the user has specified such a value. */
653 static struct arm_cpu_select arm_select[] =
655 /* string name processors */
656 { NULL, "-mcpu=", all_cores },
657 { NULL, "-march=", all_architectures },
658 { NULL, "-mtune=", all_cores }
661 /* Defines representing the indexes into the above table. */
662 #define ARM_OPT_SET_CPU 0
663 #define ARM_OPT_SET_ARCH 1
664 #define ARM_OPT_SET_TUNE 2
666 /* The name of the preprocessor macro to define for this architecture. */
668 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
670 struct fpu_desc
672 const char * name;
673 enum fputype fpu;
677 /* Available values for -mfpu=. */
679 static const struct fpu_desc all_fpus[] =
681 {"fpa", FPUTYPE_FPA},
682 {"fpe2", FPUTYPE_FPA_EMU2},
683 {"fpe3", FPUTYPE_FPA_EMU2},
684 {"maverick", FPUTYPE_MAVERICK},
685 {"vfp", FPUTYPE_VFP},
686 {"vfp3", FPUTYPE_VFP3},
687 {"neon", FPUTYPE_NEON}
691 /* Floating point models used by the different hardware.
692 See fputype in arm.h. */
694 static const enum fputype fp_model_for_fpu[] =
696 /* No FP hardware. */
697 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
698 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
699 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
700 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
701 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
702 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */
703 ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */
704 ARM_FP_MODEL_VFP /* FPUTYPE_NEON */
708 struct float_abi
710 const char * name;
711 enum float_abi_type abi_type;
715 /* Available values for -mfloat-abi=. */
717 static const struct float_abi all_float_abis[] =
719 {"soft", ARM_FLOAT_ABI_SOFT},
720 {"softfp", ARM_FLOAT_ABI_SOFTFP},
721 {"hard", ARM_FLOAT_ABI_HARD}
725 struct abi_name
727 const char *name;
728 enum arm_abi_type abi_type;
732 /* Available values for -mabi=. */
734 static const struct abi_name arm_all_abis[] =
736 {"apcs-gnu", ARM_ABI_APCS},
737 {"atpcs", ARM_ABI_ATPCS},
738 {"aapcs", ARM_ABI_AAPCS},
739 {"iwmmxt", ARM_ABI_IWMMXT},
740 {"aapcs-linux", ARM_ABI_AAPCS_LINUX}
743 /* Supported TLS relocations. */
745 enum tls_reloc {
746 TLS_GD32,
747 TLS_LDM32,
748 TLS_LDO32,
749 TLS_IE32,
750 TLS_LE32
753 /* Emit an insn that's a simple single-set. Both the operands must be known
754 to be valid. */
755 inline static rtx
756 emit_set_insn (rtx x, rtx y)
758 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
761 /* Return the number of bits set in VALUE. */
762 static unsigned
763 bit_count (unsigned long value)
765 unsigned long count = 0;
767 while (value)
769 count++;
770 value &= value - 1; /* Clear the least-significant set bit. */
773 return count;
776 /* Set up library functions unique to ARM. */
778 static void
779 arm_init_libfuncs (void)
781 /* There are no special library functions unless we are using the
782 ARM BPABI. */
783 if (!TARGET_BPABI)
784 return;
786 /* The functions below are described in Section 4 of the "Run-Time
787 ABI for the ARM architecture", Version 1.0. */
789 /* Double-precision floating-point arithmetic. Table 2. */
790 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
791 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
792 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
793 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
794 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
796 /* Double-precision comparisons. Table 3. */
797 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
798 set_optab_libfunc (ne_optab, DFmode, NULL);
799 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
800 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
801 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
802 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
803 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
805 /* Single-precision floating-point arithmetic. Table 4. */
806 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
807 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
808 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
809 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
810 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
812 /* Single-precision comparisons. Table 5. */
813 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
814 set_optab_libfunc (ne_optab, SFmode, NULL);
815 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
816 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
817 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
818 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
819 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
821 /* Floating-point to integer conversions. Table 6. */
822 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
823 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
824 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
825 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
826 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
827 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
828 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
829 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
831 /* Conversions between floating types. Table 7. */
832 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
833 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
835 /* Integer to floating-point conversions. Table 8. */
836 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
837 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
838 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
839 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
840 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
841 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
842 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
843 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
845 /* Long long. Table 9. */
846 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
847 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
848 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
849 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
850 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
851 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
852 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
853 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
855 /* Integer (32/32->32) division. \S 4.3.1. */
856 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
857 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
859 /* The divmod functions are designed so that they can be used for
860 plain division, even though they return both the quotient and the
861 remainder. The quotient is returned in the usual location (i.e.,
862 r0 for SImode, {r0, r1} for DImode), just as would be expected
863 for an ordinary division routine. Because the AAPCS calling
864 conventions specify that all of { r0, r1, r2, r3 } are
865 callee-saved registers, there is no need to tell the compiler
866 explicitly that those registers are clobbered by these
867 routines. */
868 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
869 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
871 /* For SImode division the ABI provides div-without-mod routines,
872 which are faster. */
873 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
874 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
876 /* We don't have mod libcalls. Fortunately gcc knows how to use the
877 divmod libcalls instead. */
878 set_optab_libfunc (smod_optab, DImode, NULL);
879 set_optab_libfunc (umod_optab, DImode, NULL);
880 set_optab_libfunc (smod_optab, SImode, NULL);
881 set_optab_libfunc (umod_optab, SImode, NULL);
884 /* Implement TARGET_HANDLE_OPTION. */
886 static bool
887 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
889 switch (code)
891 case OPT_march_:
892 arm_select[1].string = arg;
893 return true;
895 case OPT_mcpu_:
896 arm_select[0].string = arg;
897 return true;
899 case OPT_mhard_float:
900 target_float_abi_name = "hard";
901 return true;
903 case OPT_msoft_float:
904 target_float_abi_name = "soft";
905 return true;
907 case OPT_mtune_:
908 arm_select[2].string = arg;
909 return true;
911 default:
912 return true;
916 static void
917 arm_target_help (void)
919 int i;
920 static int columns = 0;
921 int remaining;
923 /* If we have not done so already, obtain the desired maximum width of
924 the output. Note - this is a duplication of the code at the start of
925 gcc/opts.c:print_specific_help() - the two copies should probably be
926 replaced by a single function. */
927 if (columns == 0)
929 const char *p;
931 GET_ENVIRONMENT (p, "COLUMNS");
932 if (p != NULL)
934 int value = atoi (p);
936 if (value > 0)
937 columns = value;
940 if (columns == 0)
941 /* Use a reasonable default. */
942 columns = 80;
945 printf (" Known ARM CPUs (for use with the -mcpu= and -mtune= options):\n");
947 /* The - 2 is because we know that the last entry in the array is NULL. */
948 i = ARRAY_SIZE (all_cores) - 2;
949 gcc_assert (i > 0);
950 printf (" %s", all_cores[i].name);
951 remaining = columns - (strlen (all_cores[i].name) + 4);
952 gcc_assert (remaining >= 0);
954 while (i--)
956 int len = strlen (all_cores[i].name);
958 if (remaining > len + 2)
960 printf (", %s", all_cores[i].name);
961 remaining -= len + 2;
963 else
965 if (remaining > 0)
966 printf (",");
967 printf ("\n %s", all_cores[i].name);
968 remaining = columns - (len + 4);
972 printf ("\n\n Known ARM architectures (for use with the -march= option):\n");
974 i = ARRAY_SIZE (all_architectures) - 2;
975 gcc_assert (i > 0);
977 printf (" %s", all_architectures[i].name);
978 remaining = columns - (strlen (all_architectures[i].name) + 4);
979 gcc_assert (remaining >= 0);
981 while (i--)
983 int len = strlen (all_architectures[i].name);
985 if (remaining > len + 2)
987 printf (", %s", all_architectures[i].name);
988 remaining -= len + 2;
990 else
992 if (remaining > 0)
993 printf (",");
994 printf ("\n %s", all_architectures[i].name);
995 remaining = columns - (len + 4);
998 printf ("\n");
1002 /* Fix up any incompatible options that the user has specified.
1003 This has now turned into a maze. */
1004 void
1005 arm_override_options (void)
1007 unsigned i;
1008 enum processor_type target_arch_cpu = arm_none;
1010 /* Set up the flags based on the cpu/architecture selected by the user. */
1011 for (i = ARRAY_SIZE (arm_select); i--;)
1013 struct arm_cpu_select * ptr = arm_select + i;
1015 if (ptr->string != NULL && ptr->string[0] != '\0')
1017 const struct processors * sel;
1019 for (sel = ptr->processors; sel->name != NULL; sel++)
1020 if (streq (ptr->string, sel->name))
1022 /* Set the architecture define. */
1023 if (i != ARM_OPT_SET_TUNE)
1024 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1026 /* Determine the processor core for which we should
1027 tune code-generation. */
1028 if (/* -mcpu= is a sensible default. */
1029 i == ARM_OPT_SET_CPU
1030 /* -mtune= overrides -mcpu= and -march=. */
1031 || i == ARM_OPT_SET_TUNE)
1032 arm_tune = (enum processor_type) (sel - ptr->processors);
1034 /* Remember the CPU associated with this architecture.
1035 If no other option is used to set the CPU type,
1036 we'll use this to guess the most suitable tuning
1037 options. */
1038 if (i == ARM_OPT_SET_ARCH)
1039 target_arch_cpu = sel->core;
1041 if (i != ARM_OPT_SET_TUNE)
1043 /* If we have been given an architecture and a processor
1044 make sure that they are compatible. We only generate
1045 a warning though, and we prefer the CPU over the
1046 architecture. */
1047 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1048 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1049 ptr->string);
1051 insn_flags = sel->flags;
1054 break;
1057 if (sel->name == NULL)
1058 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1062 /* Guess the tuning options from the architecture if necessary. */
1063 if (arm_tune == arm_none)
1064 arm_tune = target_arch_cpu;
1066 /* If the user did not specify a processor, choose one for them. */
1067 if (insn_flags == 0)
1069 const struct processors * sel;
1070 unsigned int sought;
1071 enum processor_type cpu;
1073 cpu = TARGET_CPU_DEFAULT;
1074 if (cpu == arm_none)
1076 #ifdef SUBTARGET_CPU_DEFAULT
1077 /* Use the subtarget default CPU if none was specified by
1078 configure. */
1079 cpu = SUBTARGET_CPU_DEFAULT;
1080 #endif
1081 /* Default to ARM6. */
1082 if (cpu == arm_none)
1083 cpu = arm6;
1085 sel = &all_cores[cpu];
1087 insn_flags = sel->flags;
1089 /* Now check to see if the user has specified some command line
1090 switch that require certain abilities from the cpu. */
1091 sought = 0;
1093 if (TARGET_INTERWORK || TARGET_THUMB)
1095 sought |= (FL_THUMB | FL_MODE32);
1097 /* There are no ARM processors that support both APCS-26 and
1098 interworking. Therefore we force FL_MODE26 to be removed
1099 from insn_flags here (if it was set), so that the search
1100 below will always be able to find a compatible processor. */
1101 insn_flags &= ~FL_MODE26;
1104 if (sought != 0 && ((sought & insn_flags) != sought))
1106 /* Try to locate a CPU type that supports all of the abilities
1107 of the default CPU, plus the extra abilities requested by
1108 the user. */
1109 for (sel = all_cores; sel->name != NULL; sel++)
1110 if ((sel->flags & sought) == (sought | insn_flags))
1111 break;
1113 if (sel->name == NULL)
1115 unsigned current_bit_count = 0;
1116 const struct processors * best_fit = NULL;
1118 /* Ideally we would like to issue an error message here
1119 saying that it was not possible to find a CPU compatible
1120 with the default CPU, but which also supports the command
1121 line options specified by the programmer, and so they
1122 ought to use the -mcpu=<name> command line option to
1123 override the default CPU type.
1125 If we cannot find a cpu that has both the
1126 characteristics of the default cpu and the given
1127 command line options we scan the array again looking
1128 for a best match. */
1129 for (sel = all_cores; sel->name != NULL; sel++)
1130 if ((sel->flags & sought) == sought)
1132 unsigned count;
1134 count = bit_count (sel->flags & insn_flags);
1136 if (count >= current_bit_count)
1138 best_fit = sel;
1139 current_bit_count = count;
1143 gcc_assert (best_fit);
1144 sel = best_fit;
1147 insn_flags = sel->flags;
1149 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1150 arm_default_cpu = (enum processor_type) (sel - all_cores);
1151 if (arm_tune == arm_none)
1152 arm_tune = arm_default_cpu;
1155 /* The processor for which we should tune should now have been
1156 chosen. */
1157 gcc_assert (arm_tune != arm_none);
1159 tune_flags = all_cores[(int)arm_tune].flags;
1160 if (optimize_size)
1161 targetm.rtx_costs = arm_size_rtx_costs;
1162 else
1163 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1165 /* Make sure that the processor choice does not conflict with any of the
1166 other command line choices. */
1167 if (TARGET_ARM && !(insn_flags & FL_NOTM))
1168 error ("target CPU does not support ARM mode");
1170 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1172 warning (0, "target CPU does not support interworking" );
1173 target_flags &= ~MASK_INTERWORK;
1176 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1178 warning (0, "target CPU does not support THUMB instructions");
1179 target_flags &= ~MASK_THUMB;
1182 if (TARGET_APCS_FRAME && TARGET_THUMB)
1184 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1185 target_flags &= ~MASK_APCS_FRAME;
1188 /* Callee super interworking implies thumb interworking. Adding
1189 this to the flags here simplifies the logic elsewhere. */
1190 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1191 target_flags |= MASK_INTERWORK;
1193 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1194 from here where no function is being compiled currently. */
1195 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1196 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1198 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1199 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1201 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1202 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1204 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1206 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1207 target_flags |= MASK_APCS_FRAME;
1210 if (TARGET_POKE_FUNCTION_NAME)
1211 target_flags |= MASK_APCS_FRAME;
1213 if (TARGET_APCS_REENT && flag_pic)
1214 error ("-fpic and -mapcs-reent are incompatible");
1216 if (TARGET_APCS_REENT)
1217 warning (0, "APCS reentrant code not supported. Ignored");
1219 /* If this target is normally configured to use APCS frames, warn if they
1220 are turned off and debugging is turned on. */
1221 if (TARGET_ARM
1222 && write_symbols != NO_DEBUG
1223 && !TARGET_APCS_FRAME
1224 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1225 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1227 if (TARGET_APCS_FLOAT)
1228 warning (0, "passing floating point arguments in fp regs not yet supported");
1230 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1231 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1232 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1233 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1234 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1235 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1236 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1237 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1238 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1239 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1240 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1241 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1243 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1244 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1245 thumb_code = (TARGET_ARM == 0);
1246 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1247 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1248 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1249 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1251 /* V5 code we generate is completely interworking capable, so we turn off
1252 TARGET_INTERWORK here to avoid many tests later on. */
1254 /* XXX However, we must pass the right pre-processor defines to CPP
1255 or GLD can get confused. This is a hack. */
1256 if (TARGET_INTERWORK)
1257 arm_cpp_interwork = 1;
1259 if (arm_arch5)
1260 target_flags &= ~MASK_INTERWORK;
1262 if (target_abi_name)
1264 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1266 if (streq (arm_all_abis[i].name, target_abi_name))
1268 arm_abi = arm_all_abis[i].abi_type;
1269 break;
1272 if (i == ARRAY_SIZE (arm_all_abis))
1273 error ("invalid ABI option: -mabi=%s", target_abi_name);
1275 else
1276 arm_abi = ARM_DEFAULT_ABI;
1278 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1279 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1281 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1282 error ("iwmmxt abi requires an iwmmxt capable cpu");
1284 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1285 if (target_fpu_name == NULL && target_fpe_name != NULL)
1287 if (streq (target_fpe_name, "2"))
1288 target_fpu_name = "fpe2";
1289 else if (streq (target_fpe_name, "3"))
1290 target_fpu_name = "fpe3";
1291 else
1292 error ("invalid floating point emulation option: -mfpe=%s",
1293 target_fpe_name);
1295 if (target_fpu_name != NULL)
1297 /* The user specified a FPU. */
1298 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1300 if (streq (all_fpus[i].name, target_fpu_name))
1302 arm_fpu_arch = all_fpus[i].fpu;
1303 arm_fpu_tune = arm_fpu_arch;
1304 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1305 break;
1308 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1309 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1311 else
1313 #ifdef FPUTYPE_DEFAULT
1314 /* Use the default if it is specified for this platform. */
1315 arm_fpu_arch = FPUTYPE_DEFAULT;
1316 arm_fpu_tune = FPUTYPE_DEFAULT;
1317 #else
1318 /* Pick one based on CPU type. */
1319 /* ??? Some targets assume FPA is the default.
1320 if ((insn_flags & FL_VFP) != 0)
1321 arm_fpu_arch = FPUTYPE_VFP;
1322 else
1324 if (arm_arch_cirrus)
1325 arm_fpu_arch = FPUTYPE_MAVERICK;
1326 else
1327 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1328 #endif
1329 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1330 arm_fpu_tune = FPUTYPE_FPA;
1331 else
1332 arm_fpu_tune = arm_fpu_arch;
1333 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1334 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1337 if (target_float_abi_name != NULL)
1339 /* The user specified a FP ABI. */
1340 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1342 if (streq (all_float_abis[i].name, target_float_abi_name))
1344 arm_float_abi = all_float_abis[i].abi_type;
1345 break;
1348 if (i == ARRAY_SIZE (all_float_abis))
1349 error ("invalid floating point abi: -mfloat-abi=%s",
1350 target_float_abi_name);
1352 else
1353 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1355 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1356 sorry ("-mfloat-abi=hard and VFP");
1358 /* FPA and iWMMXt are incompatible because the insn encodings overlap.
1359 VFP and iWMMXt can theoretically coexist, but it's unlikely such silicon
1360 will ever exist. GCC makes no attempt to support this combination. */
1361 if (TARGET_IWMMXT && !TARGET_SOFT_FLOAT)
1362 sorry ("iWMMXt and hardware floating point");
1364 /* ??? iWMMXt insn patterns need auditing for Thumb-2. */
1365 if (TARGET_THUMB2 && TARGET_IWMMXT)
1366 sorry ("Thumb-2 iWMMXt");
1368 /* If soft-float is specified then don't use FPU. */
1369 if (TARGET_SOFT_FLOAT)
1370 arm_fpu_arch = FPUTYPE_NONE;
1372 /* For arm2/3 there is no need to do any scheduling if there is only
1373 a floating point emulator, or we are doing software floating-point. */
1374 if ((TARGET_SOFT_FLOAT
1375 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1376 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1377 && (tune_flags & FL_MODE32) == 0)
1378 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1380 if (target_thread_switch)
1382 if (strcmp (target_thread_switch, "soft") == 0)
1383 target_thread_pointer = TP_SOFT;
1384 else if (strcmp (target_thread_switch, "auto") == 0)
1385 target_thread_pointer = TP_AUTO;
1386 else if (strcmp (target_thread_switch, "cp15") == 0)
1387 target_thread_pointer = TP_CP15;
1388 else
1389 error ("invalid thread pointer option: -mtp=%s", target_thread_switch);
1392 /* Use the cp15 method if it is available. */
1393 if (target_thread_pointer == TP_AUTO)
1395 if (arm_arch6k && !TARGET_THUMB)
1396 target_thread_pointer = TP_CP15;
1397 else
1398 target_thread_pointer = TP_SOFT;
1401 if (TARGET_HARD_TP && TARGET_THUMB1)
1402 error ("can not use -mtp=cp15 with 16-bit Thumb");
1404 /* Override the default structure alignment for AAPCS ABI. */
1405 if (TARGET_AAPCS_BASED)
1406 arm_structure_size_boundary = 8;
1408 if (structure_size_string != NULL)
1410 int size = strtol (structure_size_string, NULL, 0);
1412 if (size == 8 || size == 32
1413 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1414 arm_structure_size_boundary = size;
1415 else
1416 warning (0, "structure size boundary can only be set to %s",
1417 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1420 if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic)
1422 error ("RTP PIC is incompatible with Thumb");
1423 flag_pic = 0;
1426 /* If stack checking is disabled, we can use r10 as the PIC register,
1427 which keeps r9 available. The EABI specifies r9 as the PIC register. */
1428 if (flag_pic && TARGET_SINGLE_PIC_BASE)
1430 if (TARGET_VXWORKS_RTP)
1431 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
1432 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
1435 if (flag_pic && TARGET_VXWORKS_RTP)
1436 arm_pic_register = 9;
1438 if (arm_pic_register_string != NULL)
1440 int pic_register = decode_reg_name (arm_pic_register_string);
1442 if (!flag_pic)
1443 warning (0, "-mpic-register= is useless without -fpic");
1445 /* Prevent the user from choosing an obviously stupid PIC register. */
1446 else if (pic_register < 0 || call_used_regs[pic_register]
1447 || pic_register == HARD_FRAME_POINTER_REGNUM
1448 || pic_register == STACK_POINTER_REGNUM
1449 || pic_register >= PC_REGNUM
1450 || (TARGET_VXWORKS_RTP
1451 && (unsigned int) pic_register != arm_pic_register))
1452 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1453 else
1454 arm_pic_register = pic_register;
1457 /* ??? We might want scheduling for thumb2. */
1458 if (TARGET_THUMB && flag_schedule_insns)
1460 /* Don't warn since it's on by default in -O2. */
1461 flag_schedule_insns = 0;
1464 if (optimize_size)
1466 arm_constant_limit = 1;
1468 /* If optimizing for size, bump the number of instructions that we
1469 are prepared to conditionally execute (even on a StrongARM). */
1470 max_insns_skipped = 6;
1472 else
1474 /* For processors with load scheduling, it never costs more than
1475 2 cycles to load a constant, and the load scheduler may well
1476 reduce that to 1. */
1477 if (arm_ld_sched)
1478 arm_constant_limit = 1;
1480 /* On XScale the longer latency of a load makes it more difficult
1481 to achieve a good schedule, so it's faster to synthesize
1482 constants that can be done in two insns. */
1483 if (arm_tune_xscale)
1484 arm_constant_limit = 2;
1486 /* StrongARM has early execution of branches, so a sequence
1487 that is worth skipping is shorter. */
1488 if (arm_tune_strongarm)
1489 max_insns_skipped = 3;
1492 /* Register global variables with the garbage collector. */
1493 arm_add_gc_roots ();
1496 static void
1497 arm_add_gc_roots (void)
1499 gcc_obstack_init(&minipool_obstack);
1500 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1503 /* A table of known ARM exception types.
1504 For use with the interrupt function attribute. */
1506 typedef struct
1508 const char *const arg;
1509 const unsigned long return_value;
1511 isr_attribute_arg;
1513 static const isr_attribute_arg isr_attribute_args [] =
1515 { "IRQ", ARM_FT_ISR },
1516 { "irq", ARM_FT_ISR },
1517 { "FIQ", ARM_FT_FIQ },
1518 { "fiq", ARM_FT_FIQ },
1519 { "ABORT", ARM_FT_ISR },
1520 { "abort", ARM_FT_ISR },
1521 { "ABORT", ARM_FT_ISR },
1522 { "abort", ARM_FT_ISR },
1523 { "UNDEF", ARM_FT_EXCEPTION },
1524 { "undef", ARM_FT_EXCEPTION },
1525 { "SWI", ARM_FT_EXCEPTION },
1526 { "swi", ARM_FT_EXCEPTION },
1527 { NULL, ARM_FT_NORMAL }
1530 /* Returns the (interrupt) function type of the current
1531 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1533 static unsigned long
1534 arm_isr_value (tree argument)
1536 const isr_attribute_arg * ptr;
1537 const char * arg;
1539 if (!arm_arch_notm)
1540 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
1542 /* No argument - default to IRQ. */
1543 if (argument == NULL_TREE)
1544 return ARM_FT_ISR;
1546 /* Get the value of the argument. */
1547 if (TREE_VALUE (argument) == NULL_TREE
1548 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1549 return ARM_FT_UNKNOWN;
1551 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1553 /* Check it against the list of known arguments. */
1554 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1555 if (streq (arg, ptr->arg))
1556 return ptr->return_value;
1558 /* An unrecognized interrupt type. */
1559 return ARM_FT_UNKNOWN;
1562 /* Computes the type of the current function. */
1564 static unsigned long
1565 arm_compute_func_type (void)
1567 unsigned long type = ARM_FT_UNKNOWN;
1568 tree a;
1569 tree attr;
1571 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1573 /* Decide if the current function is volatile. Such functions
1574 never return, and many memory cycles can be saved by not storing
1575 register values that will never be needed again. This optimization
1576 was added to speed up context switching in a kernel application. */
1577 if (optimize > 0
1578 && (TREE_NOTHROW (current_function_decl)
1579 || !(flag_unwind_tables
1580 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS)))
1581 && TREE_THIS_VOLATILE (current_function_decl))
1582 type |= ARM_FT_VOLATILE;
1584 if (cfun->static_chain_decl != NULL)
1585 type |= ARM_FT_NESTED;
1587 attr = DECL_ATTRIBUTES (current_function_decl);
1589 a = lookup_attribute ("naked", attr);
1590 if (a != NULL_TREE)
1591 type |= ARM_FT_NAKED;
1593 a = lookup_attribute ("isr", attr);
1594 if (a == NULL_TREE)
1595 a = lookup_attribute ("interrupt", attr);
1597 if (a == NULL_TREE)
1598 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1599 else
1600 type |= arm_isr_value (TREE_VALUE (a));
1602 return type;
1605 /* Returns the type of the current function. */
1607 unsigned long
1608 arm_current_func_type (void)
1610 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1611 cfun->machine->func_type = arm_compute_func_type ();
1613 return cfun->machine->func_type;
1616 /* Return 1 if it is possible to return using a single instruction.
1617 If SIBLING is non-null, this is a test for a return before a sibling
1618 call. SIBLING is the call insn, so we can examine its register usage. */
1621 use_return_insn (int iscond, rtx sibling)
1623 int regno;
1624 unsigned int func_type;
1625 unsigned long saved_int_regs;
1626 unsigned HOST_WIDE_INT stack_adjust;
1627 arm_stack_offsets *offsets;
1629 /* Never use a return instruction before reload has run. */
1630 if (!reload_completed)
1631 return 0;
1633 func_type = arm_current_func_type ();
1635 /* Naked, volatile and stack alignment functions need special
1636 consideration. */
1637 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
1638 return 0;
1640 /* So do interrupt functions that use the frame pointer and Thumb
1641 interrupt functions. */
1642 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
1643 return 0;
1645 offsets = arm_get_frame_offsets ();
1646 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1648 /* As do variadic functions. */
1649 if (current_function_pretend_args_size
1650 || cfun->machine->uses_anonymous_args
1651 /* Or if the function calls __builtin_eh_return () */
1652 || current_function_calls_eh_return
1653 /* Or if the function calls alloca */
1654 || current_function_calls_alloca
1655 /* Or if there is a stack adjustment. However, if the stack pointer
1656 is saved on the stack, we can use a pre-incrementing stack load. */
1657 || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1658 return 0;
1660 saved_int_regs = arm_compute_save_reg_mask ();
1662 /* Unfortunately, the insn
1664 ldmib sp, {..., sp, ...}
1666 triggers a bug on most SA-110 based devices, such that the stack
1667 pointer won't be correctly restored if the instruction takes a
1668 page fault. We work around this problem by popping r3 along with
1669 the other registers, since that is never slower than executing
1670 another instruction.
1672 We test for !arm_arch5 here, because code for any architecture
1673 less than this could potentially be run on one of the buggy
1674 chips. */
1675 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
1677 /* Validate that r3 is a call-clobbered register (always true in
1678 the default abi) ... */
1679 if (!call_used_regs[3])
1680 return 0;
1682 /* ... that it isn't being used for a return value ... */
1683 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1684 return 0;
1686 /* ... or for a tail-call argument ... */
1687 if (sibling)
1689 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1691 if (find_regno_fusage (sibling, USE, 3))
1692 return 0;
1695 /* ... and that there are no call-saved registers in r0-r2
1696 (always true in the default ABI). */
1697 if (saved_int_regs & 0x7)
1698 return 0;
1701 /* Can't be done if interworking with Thumb, and any registers have been
1702 stacked. */
1703 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
1704 return 0;
1706 /* On StrongARM, conditional returns are expensive if they aren't
1707 taken and multiple registers have been stacked. */
1708 if (iscond && arm_tune_strongarm)
1710 /* Conditional return when just the LR is stored is a simple
1711 conditional-load instruction, that's not expensive. */
1712 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1713 return 0;
1715 if (flag_pic
1716 && arm_pic_register != INVALID_REGNUM
1717 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
1718 return 0;
1721 /* If there are saved registers but the LR isn't saved, then we need
1722 two instructions for the return. */
1723 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1724 return 0;
1726 /* Can't be done if any of the FPA regs are pushed,
1727 since this also requires an insn. */
1728 if (TARGET_HARD_FLOAT && TARGET_FPA)
1729 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1730 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1731 return 0;
1733 /* Likewise VFP regs. */
1734 if (TARGET_HARD_FLOAT && TARGET_VFP)
1735 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1736 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1737 return 0;
1739 if (TARGET_REALLY_IWMMXT)
1740 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1741 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
1742 return 0;
1744 return 1;
1747 /* Return TRUE if int I is a valid immediate ARM constant. */
1750 const_ok_for_arm (HOST_WIDE_INT i)
1752 int lowbit;
1754 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1755 be all zero, or all one. */
1756 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1757 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1758 != ((~(unsigned HOST_WIDE_INT) 0)
1759 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1760 return FALSE;
1762 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1764 /* Fast return for 0 and small values. We must do this for zero, since
1765 the code below can't handle that one case. */
1766 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1767 return TRUE;
1769 /* Get the number of trailing zeros. */
1770 lowbit = ffs((int) i) - 1;
1772 /* Only even shifts are allowed in ARM mode so round down to the
1773 nearest even number. */
1774 if (TARGET_ARM)
1775 lowbit &= ~1;
1777 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1778 return TRUE;
1780 if (TARGET_ARM)
1782 /* Allow rotated constants in ARM mode. */
1783 if (lowbit <= 4
1784 && ((i & ~0xc000003f) == 0
1785 || (i & ~0xf000000f) == 0
1786 || (i & ~0xfc000003) == 0))
1787 return TRUE;
1789 else
1791 HOST_WIDE_INT v;
1793 /* Allow repeated pattern. */
1794 v = i & 0xff;
1795 v |= v << 16;
1796 if (i == v || i == (v | (v << 8)))
1797 return TRUE;
1800 return FALSE;
1803 /* Return true if I is a valid constant for the operation CODE. */
1804 static int
1805 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1807 if (const_ok_for_arm (i))
1808 return 1;
1810 switch (code)
1812 case PLUS:
1813 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1815 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1816 case XOR:
1817 case IOR:
1818 return 0;
1820 case AND:
1821 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1823 default:
1824 gcc_unreachable ();
1828 /* Emit a sequence of insns to handle a large constant.
1829 CODE is the code of the operation required, it can be any of SET, PLUS,
1830 IOR, AND, XOR, MINUS;
1831 MODE is the mode in which the operation is being performed;
1832 VAL is the integer to operate on;
1833 SOURCE is the other operand (a register, or a null-pointer for SET);
1834 SUBTARGETS means it is safe to create scratch registers if that will
1835 either produce a simpler sequence, or we will want to cse the values.
1836 Return value is the number of insns emitted. */
1838 /* ??? Tweak this for thumb2. */
1840 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1841 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1843 rtx cond;
1845 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1846 cond = COND_EXEC_TEST (PATTERN (insn));
1847 else
1848 cond = NULL_RTX;
1850 if (subtargets || code == SET
1851 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1852 && REGNO (target) != REGNO (source)))
1854 /* After arm_reorg has been called, we can't fix up expensive
1855 constants by pushing them into memory so we must synthesize
1856 them in-line, regardless of the cost. This is only likely to
1857 be more costly on chips that have load delay slots and we are
1858 compiling without running the scheduler (so no splitting
1859 occurred before the final instruction emission).
1861 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1863 if (!after_arm_reorg
1864 && !cond
1865 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1866 1, 0)
1867 > arm_constant_limit + (code != SET)))
1869 if (code == SET)
1871 /* Currently SET is the only monadic value for CODE, all
1872 the rest are diadic. */
1873 emit_set_insn (target, GEN_INT (val));
1874 return 1;
1876 else
1878 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1880 emit_set_insn (temp, GEN_INT (val));
1881 /* For MINUS, the value is subtracted from, since we never
1882 have subtraction of a constant. */
1883 if (code == MINUS)
1884 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
1885 else
1886 emit_set_insn (target,
1887 gen_rtx_fmt_ee (code, mode, source, temp));
1888 return 2;
1893 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1897 /* Return the number of ARM instructions required to synthesize the given
1898 constant. */
1899 static int
1900 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1902 HOST_WIDE_INT temp1;
1903 int num_insns = 0;
1906 int end;
1908 if (i <= 0)
1909 i += 32;
1910 if (remainder & (3 << (i - 2)))
1912 end = i - 8;
1913 if (end < 0)
1914 end += 32;
1915 temp1 = remainder & ((0x0ff << end)
1916 | ((i < end) ? (0xff >> (32 - end)) : 0));
1917 remainder &= ~temp1;
1918 num_insns++;
1919 i -= 6;
1921 i -= 2;
1922 } while (remainder);
1923 return num_insns;
1926 /* Emit an instruction with the indicated PATTERN. If COND is
1927 non-NULL, conditionalize the execution of the instruction on COND
1928 being true. */
1930 static void
1931 emit_constant_insn (rtx cond, rtx pattern)
1933 if (cond)
1934 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1935 emit_insn (pattern);
1938 /* As above, but extra parameter GENERATE which, if clear, suppresses
1939 RTL generation. */
1940 /* ??? This needs more work for thumb2. */
1942 static int
1943 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1944 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1945 int generate)
1947 int can_invert = 0;
1948 int can_negate = 0;
1949 int can_negate_initial = 0;
1950 int can_shift = 0;
1951 int i;
1952 int num_bits_set = 0;
1953 int set_sign_bit_copies = 0;
1954 int clear_sign_bit_copies = 0;
1955 int clear_zero_bit_copies = 0;
1956 int set_zero_bit_copies = 0;
1957 int insns = 0;
1958 unsigned HOST_WIDE_INT temp1, temp2;
1959 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1961 /* Find out which operations are safe for a given CODE. Also do a quick
1962 check for degenerate cases; these can occur when DImode operations
1963 are split. */
1964 switch (code)
1966 case SET:
1967 can_invert = 1;
1968 can_shift = 1;
1969 can_negate = 1;
1970 break;
1972 case PLUS:
1973 can_negate = 1;
1974 can_negate_initial = 1;
1975 break;
1977 case IOR:
1978 if (remainder == 0xffffffff)
1980 if (generate)
1981 emit_constant_insn (cond,
1982 gen_rtx_SET (VOIDmode, target,
1983 GEN_INT (ARM_SIGN_EXTEND (val))));
1984 return 1;
1986 if (remainder == 0)
1988 if (reload_completed && rtx_equal_p (target, source))
1989 return 0;
1990 if (generate)
1991 emit_constant_insn (cond,
1992 gen_rtx_SET (VOIDmode, target, source));
1993 return 1;
1995 break;
1997 case AND:
1998 if (remainder == 0)
2000 if (generate)
2001 emit_constant_insn (cond,
2002 gen_rtx_SET (VOIDmode, target, const0_rtx));
2003 return 1;
2005 if (remainder == 0xffffffff)
2007 if (reload_completed && rtx_equal_p (target, source))
2008 return 0;
2009 if (generate)
2010 emit_constant_insn (cond,
2011 gen_rtx_SET (VOIDmode, target, source));
2012 return 1;
2014 can_invert = 1;
2015 break;
2017 case XOR:
2018 if (remainder == 0)
2020 if (reload_completed && rtx_equal_p (target, source))
2021 return 0;
2022 if (generate)
2023 emit_constant_insn (cond,
2024 gen_rtx_SET (VOIDmode, target, source));
2025 return 1;
2028 /* We don't know how to handle other cases yet. */
2029 gcc_assert (remainder == 0xffffffff);
2031 if (generate)
2032 emit_constant_insn (cond,
2033 gen_rtx_SET (VOIDmode, target,
2034 gen_rtx_NOT (mode, source)));
2035 return 1;
2037 case MINUS:
2038 /* We treat MINUS as (val - source), since (source - val) is always
2039 passed as (source + (-val)). */
2040 if (remainder == 0)
2042 if (generate)
2043 emit_constant_insn (cond,
2044 gen_rtx_SET (VOIDmode, target,
2045 gen_rtx_NEG (mode, source)));
2046 return 1;
2048 if (const_ok_for_arm (val))
2050 if (generate)
2051 emit_constant_insn (cond,
2052 gen_rtx_SET (VOIDmode, target,
2053 gen_rtx_MINUS (mode, GEN_INT (val),
2054 source)));
2055 return 1;
2057 can_negate = 1;
2059 break;
2061 default:
2062 gcc_unreachable ();
2065 /* If we can do it in one insn get out quickly. */
2066 if (const_ok_for_arm (val)
2067 || (can_negate_initial && const_ok_for_arm (-val))
2068 || (can_invert && const_ok_for_arm (~val)))
2070 if (generate)
2071 emit_constant_insn (cond,
2072 gen_rtx_SET (VOIDmode, target,
2073 (source
2074 ? gen_rtx_fmt_ee (code, mode, source,
2075 GEN_INT (val))
2076 : GEN_INT (val))));
2077 return 1;
2080 /* Calculate a few attributes that may be useful for specific
2081 optimizations. */
2082 for (i = 31; i >= 0; i--)
2084 if ((remainder & (1 << i)) == 0)
2085 clear_sign_bit_copies++;
2086 else
2087 break;
2090 for (i = 31; i >= 0; i--)
2092 if ((remainder & (1 << i)) != 0)
2093 set_sign_bit_copies++;
2094 else
2095 break;
2098 for (i = 0; i <= 31; i++)
2100 if ((remainder & (1 << i)) == 0)
2101 clear_zero_bit_copies++;
2102 else
2103 break;
2106 for (i = 0; i <= 31; i++)
2108 if ((remainder & (1 << i)) != 0)
2109 set_zero_bit_copies++;
2110 else
2111 break;
2114 switch (code)
2116 case SET:
2117 /* See if we can use movw. */
2118 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2120 if (generate)
2121 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2122 GEN_INT (val)));
2123 return 1;
2126 /* See if we can do this by sign_extending a constant that is known
2127 to be negative. This is a good, way of doing it, since the shift
2128 may well merge into a subsequent insn. */
2129 if (set_sign_bit_copies > 1)
2131 if (const_ok_for_arm
2132 (temp1 = ARM_SIGN_EXTEND (remainder
2133 << (set_sign_bit_copies - 1))))
2135 if (generate)
2137 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2138 emit_constant_insn (cond,
2139 gen_rtx_SET (VOIDmode, new_src,
2140 GEN_INT (temp1)));
2141 emit_constant_insn (cond,
2142 gen_ashrsi3 (target, new_src,
2143 GEN_INT (set_sign_bit_copies - 1)));
2145 return 2;
2147 /* For an inverted constant, we will need to set the low bits,
2148 these will be shifted out of harm's way. */
2149 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
2150 if (const_ok_for_arm (~temp1))
2152 if (generate)
2154 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2155 emit_constant_insn (cond,
2156 gen_rtx_SET (VOIDmode, new_src,
2157 GEN_INT (temp1)));
2158 emit_constant_insn (cond,
2159 gen_ashrsi3 (target, new_src,
2160 GEN_INT (set_sign_bit_copies - 1)));
2162 return 2;
2166 /* See if we can calculate the value as the difference between two
2167 valid immediates. */
2168 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
2170 int topshift = clear_sign_bit_copies & ~1;
2172 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
2173 & (0xff000000 >> topshift));
2175 /* If temp1 is zero, then that means the 9 most significant
2176 bits of remainder were 1 and we've caused it to overflow.
2177 When topshift is 0 we don't need to do anything since we
2178 can borrow from 'bit 32'. */
2179 if (temp1 == 0 && topshift != 0)
2180 temp1 = 0x80000000 >> (topshift - 1);
2182 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
2184 if (const_ok_for_arm (temp2))
2186 if (generate)
2188 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2189 emit_constant_insn (cond,
2190 gen_rtx_SET (VOIDmode, new_src,
2191 GEN_INT (temp1)));
2192 emit_constant_insn (cond,
2193 gen_addsi3 (target, new_src,
2194 GEN_INT (-temp2)));
2197 return 2;
2201 /* See if we can generate this by setting the bottom (or the top)
2202 16 bits, and then shifting these into the other half of the
2203 word. We only look for the simplest cases, to do more would cost
2204 too much. Be careful, however, not to generate this when the
2205 alternative would take fewer insns. */
2206 if (val & 0xffff0000)
2208 temp1 = remainder & 0xffff0000;
2209 temp2 = remainder & 0x0000ffff;
2211 /* Overlaps outside this range are best done using other methods. */
2212 for (i = 9; i < 24; i++)
2214 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
2215 && !const_ok_for_arm (temp2))
2217 rtx new_src = (subtargets
2218 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2219 : target);
2220 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
2221 source, subtargets, generate);
2222 source = new_src;
2223 if (generate)
2224 emit_constant_insn
2225 (cond,
2226 gen_rtx_SET
2227 (VOIDmode, target,
2228 gen_rtx_IOR (mode,
2229 gen_rtx_ASHIFT (mode, source,
2230 GEN_INT (i)),
2231 source)));
2232 return insns + 1;
2236 /* Don't duplicate cases already considered. */
2237 for (i = 17; i < 24; i++)
2239 if (((temp1 | (temp1 >> i)) == remainder)
2240 && !const_ok_for_arm (temp1))
2242 rtx new_src = (subtargets
2243 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
2244 : target);
2245 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2246 source, subtargets, generate);
2247 source = new_src;
2248 if (generate)
2249 emit_constant_insn
2250 (cond,
2251 gen_rtx_SET (VOIDmode, target,
2252 gen_rtx_IOR
2253 (mode,
2254 gen_rtx_LSHIFTRT (mode, source,
2255 GEN_INT (i)),
2256 source)));
2257 return insns + 1;
2261 break;
2263 case IOR:
2264 case XOR:
2265 /* If we have IOR or XOR, and the constant can be loaded in a
2266 single instruction, and we can find a temporary to put it in,
2267 then this can be done in two instructions instead of 3-4. */
2268 if (subtargets
2269 /* TARGET can't be NULL if SUBTARGETS is 0 */
2270 || (reload_completed && !reg_mentioned_p (target, source)))
2272 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2274 if (generate)
2276 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2278 emit_constant_insn (cond,
2279 gen_rtx_SET (VOIDmode, sub,
2280 GEN_INT (val)));
2281 emit_constant_insn (cond,
2282 gen_rtx_SET (VOIDmode, target,
2283 gen_rtx_fmt_ee (code, mode,
2284 source, sub)));
2286 return 2;
2290 if (code == XOR)
2291 break;
2293 if (set_sign_bit_copies > 8
2294 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2296 if (generate)
2298 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2299 rtx shift = GEN_INT (set_sign_bit_copies);
2301 emit_constant_insn
2302 (cond,
2303 gen_rtx_SET (VOIDmode, sub,
2304 gen_rtx_NOT (mode,
2305 gen_rtx_ASHIFT (mode,
2306 source,
2307 shift))));
2308 emit_constant_insn
2309 (cond,
2310 gen_rtx_SET (VOIDmode, target,
2311 gen_rtx_NOT (mode,
2312 gen_rtx_LSHIFTRT (mode, sub,
2313 shift))));
2315 return 2;
2318 if (set_zero_bit_copies > 8
2319 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2321 if (generate)
2323 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2324 rtx shift = GEN_INT (set_zero_bit_copies);
2326 emit_constant_insn
2327 (cond,
2328 gen_rtx_SET (VOIDmode, sub,
2329 gen_rtx_NOT (mode,
2330 gen_rtx_LSHIFTRT (mode,
2331 source,
2332 shift))));
2333 emit_constant_insn
2334 (cond,
2335 gen_rtx_SET (VOIDmode, target,
2336 gen_rtx_NOT (mode,
2337 gen_rtx_ASHIFT (mode, sub,
2338 shift))));
2340 return 2;
2343 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2345 if (generate)
2347 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2348 emit_constant_insn (cond,
2349 gen_rtx_SET (VOIDmode, sub,
2350 gen_rtx_NOT (mode, source)));
2351 source = sub;
2352 if (subtargets)
2353 sub = gen_reg_rtx (mode);
2354 emit_constant_insn (cond,
2355 gen_rtx_SET (VOIDmode, sub,
2356 gen_rtx_AND (mode, source,
2357 GEN_INT (temp1))));
2358 emit_constant_insn (cond,
2359 gen_rtx_SET (VOIDmode, target,
2360 gen_rtx_NOT (mode, sub)));
2362 return 3;
2364 break;
2366 case AND:
2367 /* See if two shifts will do 2 or more insn's worth of work. */
2368 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2370 HOST_WIDE_INT shift_mask = ((0xffffffff
2371 << (32 - clear_sign_bit_copies))
2372 & 0xffffffff);
2374 if ((remainder | shift_mask) != 0xffffffff)
2376 if (generate)
2378 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2379 insns = arm_gen_constant (AND, mode, cond,
2380 remainder | shift_mask,
2381 new_src, source, subtargets, 1);
2382 source = new_src;
2384 else
2386 rtx targ = subtargets ? NULL_RTX : target;
2387 insns = arm_gen_constant (AND, mode, cond,
2388 remainder | shift_mask,
2389 targ, source, subtargets, 0);
2393 if (generate)
2395 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2396 rtx shift = GEN_INT (clear_sign_bit_copies);
2398 emit_insn (gen_ashlsi3 (new_src, source, shift));
2399 emit_insn (gen_lshrsi3 (target, new_src, shift));
2402 return insns + 2;
2405 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2407 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2409 if ((remainder | shift_mask) != 0xffffffff)
2411 if (generate)
2413 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2415 insns = arm_gen_constant (AND, mode, cond,
2416 remainder | shift_mask,
2417 new_src, source, subtargets, 1);
2418 source = new_src;
2420 else
2422 rtx targ = subtargets ? NULL_RTX : target;
2424 insns = arm_gen_constant (AND, mode, cond,
2425 remainder | shift_mask,
2426 targ, source, subtargets, 0);
2430 if (generate)
2432 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2433 rtx shift = GEN_INT (clear_zero_bit_copies);
2435 emit_insn (gen_lshrsi3 (new_src, source, shift));
2436 emit_insn (gen_ashlsi3 (target, new_src, shift));
2439 return insns + 2;
2442 break;
2444 default:
2445 break;
2448 for (i = 0; i < 32; i++)
2449 if (remainder & (1 << i))
2450 num_bits_set++;
2452 if (code == AND || (can_invert && num_bits_set > 16))
2453 remainder = (~remainder) & 0xffffffff;
2454 else if (code == PLUS && num_bits_set > 16)
2455 remainder = (-remainder) & 0xffffffff;
2456 else
2458 can_invert = 0;
2459 can_negate = 0;
2462 /* Now try and find a way of doing the job in either two or three
2463 instructions.
2464 We start by looking for the largest block of zeros that are aligned on
2465 a 2-bit boundary, we then fill up the temps, wrapping around to the
2466 top of the word when we drop off the bottom.
2467 In the worst case this code should produce no more than four insns.
2468 Thumb-2 constants are shifted, not rotated, so the MSB is always the
2469 best place to start. */
2471 /* ??? Use thumb2 replicated constants when the high and low halfwords are
2472 the same. */
2474 int best_start = 0;
2475 if (!TARGET_THUMB2)
2477 int best_consecutive_zeros = 0;
2479 for (i = 0; i < 32; i += 2)
2481 int consecutive_zeros = 0;
2483 if (!(remainder & (3 << i)))
2485 while ((i < 32) && !(remainder & (3 << i)))
2487 consecutive_zeros += 2;
2488 i += 2;
2490 if (consecutive_zeros > best_consecutive_zeros)
2492 best_consecutive_zeros = consecutive_zeros;
2493 best_start = i - consecutive_zeros;
2495 i -= 2;
2499 /* So long as it won't require any more insns to do so, it's
2500 desirable to emit a small constant (in bits 0...9) in the last
2501 insn. This way there is more chance that it can be combined with
2502 a later addressing insn to form a pre-indexed load or store
2503 operation. Consider:
2505 *((volatile int *)0xe0000100) = 1;
2506 *((volatile int *)0xe0000110) = 2;
2508 We want this to wind up as:
2510 mov rA, #0xe0000000
2511 mov rB, #1
2512 str rB, [rA, #0x100]
2513 mov rB, #2
2514 str rB, [rA, #0x110]
2516 rather than having to synthesize both large constants from scratch.
2518 Therefore, we calculate how many insns would be required to emit
2519 the constant starting from `best_start', and also starting from
2520 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2521 yield a shorter sequence, we may as well use zero. */
2522 if (best_start != 0
2523 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2524 && (count_insns_for_constant (remainder, 0) <=
2525 count_insns_for_constant (remainder, best_start)))
2526 best_start = 0;
2529 /* Now start emitting the insns. */
2530 i = best_start;
2533 int end;
2535 if (i <= 0)
2536 i += 32;
2537 if (remainder & (3 << (i - 2)))
2539 end = i - 8;
2540 if (end < 0)
2541 end += 32;
2542 temp1 = remainder & ((0x0ff << end)
2543 | ((i < end) ? (0xff >> (32 - end)) : 0));
2544 remainder &= ~temp1;
2546 if (generate)
2548 rtx new_src, temp1_rtx;
2550 if (code == SET || code == MINUS)
2552 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2553 if (can_invert && code != MINUS)
2554 temp1 = ~temp1;
2556 else
2558 if (remainder && subtargets)
2559 new_src = gen_reg_rtx (mode);
2560 else
2561 new_src = target;
2562 if (can_invert)
2563 temp1 = ~temp1;
2564 else if (can_negate)
2565 temp1 = -temp1;
2568 temp1 = trunc_int_for_mode (temp1, mode);
2569 temp1_rtx = GEN_INT (temp1);
2571 if (code == SET)
2573 else if (code == MINUS)
2574 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2575 else
2576 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2578 emit_constant_insn (cond,
2579 gen_rtx_SET (VOIDmode, new_src,
2580 temp1_rtx));
2581 source = new_src;
2584 if (code == SET)
2586 can_invert = 0;
2587 code = PLUS;
2589 else if (code == MINUS)
2590 code = PLUS;
2592 insns++;
2593 if (TARGET_ARM)
2594 i -= 6;
2595 else
2596 i -= 7;
2598 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary
2599 shifts. */
2600 if (TARGET_ARM)
2601 i -= 2;
2602 else
2603 i--;
2605 while (remainder);
2608 return insns;
2611 /* Canonicalize a comparison so that we are more likely to recognize it.
2612 This can be done for a few constant compares, where we can make the
2613 immediate value easier to load. */
2615 enum rtx_code
2616 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode,
2617 rtx * op1)
2619 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2620 unsigned HOST_WIDE_INT maxval;
2621 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
2623 switch (code)
2625 case EQ:
2626 case NE:
2627 return code;
2629 case GT:
2630 case LE:
2631 if (i != maxval
2632 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2634 *op1 = GEN_INT (i + 1);
2635 return code == GT ? GE : LT;
2637 break;
2639 case GE:
2640 case LT:
2641 if (i != ~maxval
2642 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2644 *op1 = GEN_INT (i - 1);
2645 return code == GE ? GT : LE;
2647 break;
2649 case GTU:
2650 case LEU:
2651 if (i != ~((unsigned HOST_WIDE_INT) 0)
2652 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2654 *op1 = GEN_INT (i + 1);
2655 return code == GTU ? GEU : LTU;
2657 break;
2659 case GEU:
2660 case LTU:
2661 if (i != 0
2662 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2664 *op1 = GEN_INT (i - 1);
2665 return code == GEU ? GTU : LEU;
2667 break;
2669 default:
2670 gcc_unreachable ();
2673 return code;
2677 /* Define how to find the value returned by a function. */
2680 arm_function_value(const_tree type, const_tree func ATTRIBUTE_UNUSED)
2682 enum machine_mode mode;
2683 int unsignedp ATTRIBUTE_UNUSED;
2684 rtx r ATTRIBUTE_UNUSED;
2686 mode = TYPE_MODE (type);
2687 /* Promote integer types. */
2688 if (INTEGRAL_TYPE_P (type))
2689 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2691 /* Promotes small structs returned in a register to full-word size
2692 for big-endian AAPCS. */
2693 if (arm_return_in_msb (type))
2695 HOST_WIDE_INT size = int_size_in_bytes (type);
2696 if (size % UNITS_PER_WORD != 0)
2698 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2699 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2703 return LIBCALL_VALUE(mode);
2706 /* Determine the amount of memory needed to store the possible return
2707 registers of an untyped call. */
2709 arm_apply_result_size (void)
2711 int size = 16;
2713 if (TARGET_ARM)
2715 if (TARGET_HARD_FLOAT_ABI)
2717 if (TARGET_FPA)
2718 size += 12;
2719 if (TARGET_MAVERICK)
2720 size += 8;
2722 if (TARGET_IWMMXT_ABI)
2723 size += 8;
2726 return size;
2729 /* Decide whether a type should be returned in memory (true)
2730 or in a register (false). This is called by the macro
2731 RETURN_IN_MEMORY. */
2733 arm_return_in_memory (const_tree type)
2735 HOST_WIDE_INT size;
2737 size = int_size_in_bytes (type);
2739 /* Vector values should be returned using ARM registers, not memory (unless
2740 they're over 16 bytes, which will break since we only have four
2741 call-clobbered registers to play with). */
2742 if (TREE_CODE (type) == VECTOR_TYPE)
2743 return (size < 0 || size > (4 * UNITS_PER_WORD));
2745 if (!AGGREGATE_TYPE_P (type) &&
2746 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2747 /* All simple types are returned in registers.
2748 For AAPCS, complex types are treated the same as aggregates. */
2749 return 0;
2751 if (arm_abi != ARM_ABI_APCS)
2753 /* ATPCS and later return aggregate types in memory only if they are
2754 larger than a word (or are variable size). */
2755 return (size < 0 || size > UNITS_PER_WORD);
2758 /* For the arm-wince targets we choose to be compatible with Microsoft's
2759 ARM and Thumb compilers, which always return aggregates in memory. */
2760 #ifndef ARM_WINCE
2761 /* All structures/unions bigger than one word are returned in memory.
2762 Also catch the case where int_size_in_bytes returns -1. In this case
2763 the aggregate is either huge or of variable size, and in either case
2764 we will want to return it via memory and not in a register. */
2765 if (size < 0 || size > UNITS_PER_WORD)
2766 return 1;
2768 if (TREE_CODE (type) == RECORD_TYPE)
2770 tree field;
2772 /* For a struct the APCS says that we only return in a register
2773 if the type is 'integer like' and every addressable element
2774 has an offset of zero. For practical purposes this means
2775 that the structure can have at most one non bit-field element
2776 and that this element must be the first one in the structure. */
2778 /* Find the first field, ignoring non FIELD_DECL things which will
2779 have been created by C++. */
2780 for (field = TYPE_FIELDS (type);
2781 field && TREE_CODE (field) != FIELD_DECL;
2782 field = TREE_CHAIN (field))
2783 continue;
2785 if (field == NULL)
2786 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2788 /* Check that the first field is valid for returning in a register. */
2790 /* ... Floats are not allowed */
2791 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2792 return 1;
2794 /* ... Aggregates that are not themselves valid for returning in
2795 a register are not allowed. */
2796 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2797 return 1;
2799 /* Now check the remaining fields, if any. Only bitfields are allowed,
2800 since they are not addressable. */
2801 for (field = TREE_CHAIN (field);
2802 field;
2803 field = TREE_CHAIN (field))
2805 if (TREE_CODE (field) != FIELD_DECL)
2806 continue;
2808 if (!DECL_BIT_FIELD_TYPE (field))
2809 return 1;
2812 return 0;
2815 if (TREE_CODE (type) == UNION_TYPE)
2817 tree field;
2819 /* Unions can be returned in registers if every element is
2820 integral, or can be returned in an integer register. */
2821 for (field = TYPE_FIELDS (type);
2822 field;
2823 field = TREE_CHAIN (field))
2825 if (TREE_CODE (field) != FIELD_DECL)
2826 continue;
2828 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2829 return 1;
2831 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2832 return 1;
2835 return 0;
2837 #endif /* not ARM_WINCE */
2839 /* Return all other types in memory. */
2840 return 1;
2843 /* Indicate whether or not words of a double are in big-endian order. */
2846 arm_float_words_big_endian (void)
2848 if (TARGET_MAVERICK)
2849 return 0;
2851 /* For FPA, float words are always big-endian. For VFP, floats words
2852 follow the memory system mode. */
2854 if (TARGET_FPA)
2856 return 1;
2859 if (TARGET_VFP)
2860 return (TARGET_BIG_END ? 1 : 0);
2862 return 1;
2865 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2866 for a call to a function whose data type is FNTYPE.
2867 For a library call, FNTYPE is NULL. */
2868 void
2869 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2870 rtx libname ATTRIBUTE_UNUSED,
2871 tree fndecl ATTRIBUTE_UNUSED)
2873 /* On the ARM, the offset starts at 0. */
2874 pcum->nregs = 0;
2875 pcum->iwmmxt_nregs = 0;
2876 pcum->can_split = true;
2878 /* Varargs vectors are treated the same as long long.
2879 named_count avoids having to change the way arm handles 'named' */
2880 pcum->named_count = 0;
2881 pcum->nargs = 0;
2883 if (TARGET_REALLY_IWMMXT && fntype)
2885 tree fn_arg;
2887 for (fn_arg = TYPE_ARG_TYPES (fntype);
2888 fn_arg;
2889 fn_arg = TREE_CHAIN (fn_arg))
2890 pcum->named_count += 1;
2892 if (! pcum->named_count)
2893 pcum->named_count = INT_MAX;
2898 /* Return true if mode/type need doubleword alignment. */
2899 bool
2900 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2902 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2903 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2907 /* Determine where to put an argument to a function.
2908 Value is zero to push the argument on the stack,
2909 or a hard register in which to store the argument.
2911 MODE is the argument's machine mode.
2912 TYPE is the data type of the argument (as a tree).
2913 This is null for libcalls where that information may
2914 not be available.
2915 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2916 the preceding args and about the function being called.
2917 NAMED is nonzero if this argument is a named parameter
2918 (otherwise it is an extra parameter matching an ellipsis). */
2921 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2922 tree type, int named)
2924 int nregs;
2926 /* Varargs vectors are treated the same as long long.
2927 named_count avoids having to change the way arm handles 'named' */
2928 if (TARGET_IWMMXT_ABI
2929 && arm_vector_mode_supported_p (mode)
2930 && pcum->named_count > pcum->nargs + 1)
2932 if (pcum->iwmmxt_nregs <= 9)
2933 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2934 else
2936 pcum->can_split = false;
2937 return NULL_RTX;
2941 /* Put doubleword aligned quantities in even register pairs. */
2942 if (pcum->nregs & 1
2943 && ARM_DOUBLEWORD_ALIGN
2944 && arm_needs_doubleword_align (mode, type))
2945 pcum->nregs++;
2947 if (mode == VOIDmode)
2948 /* Pick an arbitrary value for operand 2 of the call insn. */
2949 return const0_rtx;
2951 /* Only allow splitting an arg between regs and memory if all preceding
2952 args were allocated to regs. For args passed by reference we only count
2953 the reference pointer. */
2954 if (pcum->can_split)
2955 nregs = 1;
2956 else
2957 nregs = ARM_NUM_REGS2 (mode, type);
2959 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2960 return NULL_RTX;
2962 return gen_rtx_REG (mode, pcum->nregs);
2965 static int
2966 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2967 tree type, bool named ATTRIBUTE_UNUSED)
2969 int nregs = pcum->nregs;
2971 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
2972 return 0;
2974 if (NUM_ARG_REGS > nregs
2975 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2976 && pcum->can_split)
2977 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2979 return 0;
2982 /* Variable sized types are passed by reference. This is a GCC
2983 extension to the ARM ABI. */
2985 static bool
2986 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2987 enum machine_mode mode ATTRIBUTE_UNUSED,
2988 const_tree type, bool named ATTRIBUTE_UNUSED)
2990 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2993 /* Encode the current state of the #pragma [no_]long_calls. */
2994 typedef enum
2996 OFF, /* No #pragma [no_]long_calls is in effect. */
2997 LONG, /* #pragma long_calls is in effect. */
2998 SHORT /* #pragma no_long_calls is in effect. */
2999 } arm_pragma_enum;
3001 static arm_pragma_enum arm_pragma_long_calls = OFF;
3003 void
3004 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3006 arm_pragma_long_calls = LONG;
3009 void
3010 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3012 arm_pragma_long_calls = SHORT;
3015 void
3016 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
3018 arm_pragma_long_calls = OFF;
3021 /* Table of machine attributes. */
3022 const struct attribute_spec arm_attribute_table[] =
3024 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
3025 /* Function calls made to this symbol must be done indirectly, because
3026 it may lie outside of the 26 bit addressing range of a normal function
3027 call. */
3028 { "long_call", 0, 0, false, true, true, NULL },
3029 /* Whereas these functions are always known to reside within the 26 bit
3030 addressing range. */
3031 { "short_call", 0, 0, false, true, true, NULL },
3032 /* Interrupt Service Routines have special prologue and epilogue requirements. */
3033 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
3034 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
3035 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3036 #ifdef ARM_PE
3037 /* ARM/PE has three new attributes:
3038 interfacearm - ?
3039 dllexport - for exporting a function/variable that will live in a dll
3040 dllimport - for importing a function/variable from a dll
3042 Microsoft allows multiple declspecs in one __declspec, separating
3043 them with spaces. We do NOT support this. Instead, use __declspec
3044 multiple times.
3046 { "dllimport", 0, 0, true, false, false, NULL },
3047 { "dllexport", 0, 0, true, false, false, NULL },
3048 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
3049 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
3050 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
3051 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
3052 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
3053 #endif
3054 { NULL, 0, 0, false, false, false, NULL }
3057 /* Handle an attribute requiring a FUNCTION_DECL;
3058 arguments as in struct attribute_spec.handler. */
3059 static tree
3060 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
3061 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
3063 if (TREE_CODE (*node) != FUNCTION_DECL)
3065 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3066 IDENTIFIER_POINTER (name));
3067 *no_add_attrs = true;
3070 return NULL_TREE;
3073 /* Handle an "interrupt" or "isr" attribute;
3074 arguments as in struct attribute_spec.handler. */
3075 static tree
3076 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
3077 bool *no_add_attrs)
3079 if (DECL_P (*node))
3081 if (TREE_CODE (*node) != FUNCTION_DECL)
3083 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3084 IDENTIFIER_POINTER (name));
3085 *no_add_attrs = true;
3087 /* FIXME: the argument if any is checked for type attributes;
3088 should it be checked for decl ones? */
3090 else
3092 if (TREE_CODE (*node) == FUNCTION_TYPE
3093 || TREE_CODE (*node) == METHOD_TYPE)
3095 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
3097 warning (OPT_Wattributes, "%qs attribute ignored",
3098 IDENTIFIER_POINTER (name));
3099 *no_add_attrs = true;
3102 else if (TREE_CODE (*node) == POINTER_TYPE
3103 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
3104 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
3105 && arm_isr_value (args) != ARM_FT_UNKNOWN)
3107 *node = build_variant_type_copy (*node);
3108 TREE_TYPE (*node) = build_type_attribute_variant
3109 (TREE_TYPE (*node),
3110 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
3111 *no_add_attrs = true;
3113 else
3115 /* Possibly pass this attribute on from the type to a decl. */
3116 if (flags & ((int) ATTR_FLAG_DECL_NEXT
3117 | (int) ATTR_FLAG_FUNCTION_NEXT
3118 | (int) ATTR_FLAG_ARRAY_NEXT))
3120 *no_add_attrs = true;
3121 return tree_cons (name, args, NULL_TREE);
3123 else
3125 warning (OPT_Wattributes, "%qs attribute ignored",
3126 IDENTIFIER_POINTER (name));
3131 return NULL_TREE;
3134 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
3135 /* Handle the "notshared" attribute. This attribute is another way of
3136 requesting hidden visibility. ARM's compiler supports
3137 "__declspec(notshared)"; we support the same thing via an
3138 attribute. */
3140 static tree
3141 arm_handle_notshared_attribute (tree *node,
3142 tree name ATTRIBUTE_UNUSED,
3143 tree args ATTRIBUTE_UNUSED,
3144 int flags ATTRIBUTE_UNUSED,
3145 bool *no_add_attrs)
3147 tree decl = TYPE_NAME (*node);
3149 if (decl)
3151 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
3152 DECL_VISIBILITY_SPECIFIED (decl) = 1;
3153 *no_add_attrs = false;
3155 return NULL_TREE;
3157 #endif
3159 /* Return 0 if the attributes for two types are incompatible, 1 if they
3160 are compatible, and 2 if they are nearly compatible (which causes a
3161 warning to be generated). */
3162 static int
3163 arm_comp_type_attributes (const_tree type1, const_tree type2)
3165 int l1, l2, s1, s2;
3167 /* Check for mismatch of non-default calling convention. */
3168 if (TREE_CODE (type1) != FUNCTION_TYPE)
3169 return 1;
3171 /* Check for mismatched call attributes. */
3172 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
3173 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
3174 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
3175 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
3177 /* Only bother to check if an attribute is defined. */
3178 if (l1 | l2 | s1 | s2)
3180 /* If one type has an attribute, the other must have the same attribute. */
3181 if ((l1 != l2) || (s1 != s2))
3182 return 0;
3184 /* Disallow mixed attributes. */
3185 if ((l1 & s2) || (l2 & s1))
3186 return 0;
3189 /* Check for mismatched ISR attribute. */
3190 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
3191 if (! l1)
3192 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
3193 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
3194 if (! l2)
3195 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
3196 if (l1 != l2)
3197 return 0;
3199 return 1;
3202 /* Assigns default attributes to newly defined type. This is used to
3203 set short_call/long_call attributes for function types of
3204 functions defined inside corresponding #pragma scopes. */
3205 static void
3206 arm_set_default_type_attributes (tree type)
3208 /* Add __attribute__ ((long_call)) to all functions, when
3209 inside #pragma long_calls or __attribute__ ((short_call)),
3210 when inside #pragma no_long_calls. */
3211 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
3213 tree type_attr_list, attr_name;
3214 type_attr_list = TYPE_ATTRIBUTES (type);
3216 if (arm_pragma_long_calls == LONG)
3217 attr_name = get_identifier ("long_call");
3218 else if (arm_pragma_long_calls == SHORT)
3219 attr_name = get_identifier ("short_call");
3220 else
3221 return;
3223 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
3224 TYPE_ATTRIBUTES (type) = type_attr_list;
3228 /* Return true if DECL is known to be linked into section SECTION. */
3230 static bool
3231 arm_function_in_section_p (tree decl, section *section)
3233 /* We can only be certain about functions defined in the same
3234 compilation unit. */
3235 if (!TREE_STATIC (decl))
3236 return false;
3238 /* Make sure that SYMBOL always binds to the definition in this
3239 compilation unit. */
3240 if (!targetm.binds_local_p (decl))
3241 return false;
3243 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
3244 if (!DECL_SECTION_NAME (decl))
3246 /* Only cater for unit-at-a-time mode, where we know that the user
3247 cannot later specify a section for DECL. */
3248 if (!flag_unit_at_a_time)
3249 return false;
3251 /* Make sure that we will not create a unique section for DECL. */
3252 if (flag_function_sections || DECL_ONE_ONLY (decl))
3253 return false;
3256 return function_section (decl) == section;
3259 /* Return nonzero if a 32-bit "long_call" should be generated for
3260 a call from the current function to DECL. We generate a long_call
3261 if the function:
3263 a. has an __attribute__((long call))
3264 or b. is within the scope of a #pragma long_calls
3265 or c. the -mlong-calls command line switch has been specified
3267 However we do not generate a long call if the function:
3269 d. has an __attribute__ ((short_call))
3270 or e. is inside the scope of a #pragma no_long_calls
3271 or f. is defined in the same section as the current function. */
3273 bool
3274 arm_is_long_call_p (tree decl)
3276 tree attrs;
3278 if (!decl)
3279 return TARGET_LONG_CALLS;
3281 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
3282 if (lookup_attribute ("short_call", attrs))
3283 return false;
3285 /* For "f", be conservative, and only cater for cases in which the
3286 whole of the current function is placed in the same section. */
3287 if (!flag_reorder_blocks_and_partition
3288 && arm_function_in_section_p (decl, current_function_section ()))
3289 return false;
3291 if (lookup_attribute ("long_call", attrs))
3292 return true;
3294 return TARGET_LONG_CALLS;
3297 /* Return nonzero if it is ok to make a tail-call to DECL. */
3298 static bool
3299 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3301 unsigned long func_type;
3303 if (cfun->machine->sibcall_blocked)
3304 return false;
3306 /* Never tailcall something for which we have no decl, or if we
3307 are in Thumb mode. */
3308 if (decl == NULL || TARGET_THUMB)
3309 return false;
3311 /* The PIC register is live on entry to VxWorks PLT entries, so we
3312 must make the call before restoring the PIC register. */
3313 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
3314 return false;
3316 /* Cannot tail-call to long calls, since these are out of range of
3317 a branch instruction. */
3318 if (arm_is_long_call_p (decl))
3319 return false;
3321 /* If we are interworking and the function is not declared static
3322 then we can't tail-call it unless we know that it exists in this
3323 compilation unit (since it might be a Thumb routine). */
3324 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3325 return false;
3327 func_type = arm_current_func_type ();
3328 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3329 if (IS_INTERRUPT (func_type))
3330 return false;
3332 /* Never tailcall if function may be called with a misaligned SP. */
3333 if (IS_STACKALIGN (func_type))
3334 return false;
3336 /* Everything else is ok. */
3337 return true;
3341 /* Addressing mode support functions. */
3343 /* Return nonzero if X is a legitimate immediate operand when compiling
3344 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
3346 legitimate_pic_operand_p (rtx x)
3348 if (GET_CODE (x) == SYMBOL_REF
3349 || (GET_CODE (x) == CONST
3350 && GET_CODE (XEXP (x, 0)) == PLUS
3351 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
3352 return 0;
3354 return 1;
3357 /* Record that the current function needs a PIC register. Initialize
3358 cfun->machine->pic_reg if we have not already done so. */
3360 static void
3361 require_pic_register (void)
3363 /* A lot of the logic here is made obscure by the fact that this
3364 routine gets called as part of the rtx cost estimation process.
3365 We don't want those calls to affect any assumptions about the real
3366 function; and further, we can't call entry_of_function() until we
3367 start the real expansion process. */
3368 if (!current_function_uses_pic_offset_table)
3370 gcc_assert (can_create_pseudo_p ());
3371 if (arm_pic_register != INVALID_REGNUM)
3373 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
3375 /* Play games to avoid marking the function as needing pic
3376 if we are being called as part of the cost-estimation
3377 process. */
3378 if (current_ir_type () != IR_GIMPLE)
3379 current_function_uses_pic_offset_table = 1;
3381 else
3383 rtx seq;
3385 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
3387 /* Play games to avoid marking the function as needing pic
3388 if we are being called as part of the cost-estimation
3389 process. */
3390 if (current_ir_type () != IR_GIMPLE)
3392 current_function_uses_pic_offset_table = 1;
3393 start_sequence ();
3395 arm_load_pic_register (0UL);
3397 seq = get_insns ();
3398 end_sequence ();
3399 emit_insn_after (seq, entry_of_function ());
3406 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3408 if (GET_CODE (orig) == SYMBOL_REF
3409 || GET_CODE (orig) == LABEL_REF)
3411 rtx pic_ref, address;
3412 rtx insn;
3413 int subregs = 0;
3415 /* If this function doesn't have a pic register, create one now. */
3416 require_pic_register ();
3418 if (reg == 0)
3420 gcc_assert (can_create_pseudo_p ());
3421 reg = gen_reg_rtx (Pmode);
3423 subregs = 1;
3426 if (subregs)
3427 address = gen_reg_rtx (Pmode);
3428 else
3429 address = reg;
3431 if (TARGET_ARM)
3432 emit_insn (gen_pic_load_addr_arm (address, orig));
3433 else if (TARGET_THUMB2)
3434 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
3435 else /* TARGET_THUMB1 */
3436 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
3438 /* VxWorks does not impose a fixed gap between segments; the run-time
3439 gap can be different from the object-file gap. We therefore can't
3440 use GOTOFF unless we are absolutely sure that the symbol is in the
3441 same segment as the GOT. Unfortunately, the flexibility of linker
3442 scripts means that we can't be sure of that in general, so assume
3443 that GOTOFF is never valid on VxWorks. */
3444 if ((GET_CODE (orig) == LABEL_REF
3445 || (GET_CODE (orig) == SYMBOL_REF &&
3446 SYMBOL_REF_LOCAL_P (orig)))
3447 && NEED_GOT_RELOC
3448 && !TARGET_VXWORKS_RTP)
3449 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address);
3450 else
3452 pic_ref = gen_const_mem (Pmode,
3453 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
3454 address));
3457 insn = emit_move_insn (reg, pic_ref);
3459 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3460 by loop. */
3461 set_unique_reg_note (insn, REG_EQUAL, orig);
3463 return reg;
3465 else if (GET_CODE (orig) == CONST)
3467 rtx base, offset;
3469 if (GET_CODE (XEXP (orig, 0)) == PLUS
3470 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
3471 return orig;
3473 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
3474 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
3475 return orig;
3477 if (reg == 0)
3479 gcc_assert (can_create_pseudo_p ());
3480 reg = gen_reg_rtx (Pmode);
3483 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3485 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3486 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3487 base == reg ? 0 : reg);
3489 if (GET_CODE (offset) == CONST_INT)
3491 /* The base register doesn't really matter, we only want to
3492 test the index for the appropriate mode. */
3493 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3495 gcc_assert (can_create_pseudo_p ());
3496 offset = force_reg (Pmode, offset);
3499 if (GET_CODE (offset) == CONST_INT)
3500 return plus_constant (base, INTVAL (offset));
3503 if (GET_MODE_SIZE (mode) > 4
3504 && (GET_MODE_CLASS (mode) == MODE_INT
3505 || TARGET_SOFT_FLOAT))
3507 emit_insn (gen_addsi3 (reg, base, offset));
3508 return reg;
3511 return gen_rtx_PLUS (Pmode, base, offset);
3514 return orig;
3518 /* Find a spare register to use during the prolog of a function. */
3520 static int
3521 thumb_find_work_register (unsigned long pushed_regs_mask)
3523 int reg;
3525 /* Check the argument registers first as these are call-used. The
3526 register allocation order means that sometimes r3 might be used
3527 but earlier argument registers might not, so check them all. */
3528 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3529 if (!df_regs_ever_live_p (reg))
3530 return reg;
3532 /* Before going on to check the call-saved registers we can try a couple
3533 more ways of deducing that r3 is available. The first is when we are
3534 pushing anonymous arguments onto the stack and we have less than 4
3535 registers worth of fixed arguments(*). In this case r3 will be part of
3536 the variable argument list and so we can be sure that it will be
3537 pushed right at the start of the function. Hence it will be available
3538 for the rest of the prologue.
3539 (*): ie current_function_pretend_args_size is greater than 0. */
3540 if (cfun->machine->uses_anonymous_args
3541 && current_function_pretend_args_size > 0)
3542 return LAST_ARG_REGNUM;
3544 /* The other case is when we have fixed arguments but less than 4 registers
3545 worth. In this case r3 might be used in the body of the function, but
3546 it is not being used to convey an argument into the function. In theory
3547 we could just check current_function_args_size to see how many bytes are
3548 being passed in argument registers, but it seems that it is unreliable.
3549 Sometimes it will have the value 0 when in fact arguments are being
3550 passed. (See testcase execute/20021111-1.c for an example). So we also
3551 check the args_info.nregs field as well. The problem with this field is
3552 that it makes no allowances for arguments that are passed to the
3553 function but which are not used. Hence we could miss an opportunity
3554 when a function has an unused argument in r3. But it is better to be
3555 safe than to be sorry. */
3556 if (! cfun->machine->uses_anonymous_args
3557 && current_function_args_size >= 0
3558 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3559 && cfun->args_info.nregs < 4)
3560 return LAST_ARG_REGNUM;
3562 /* Otherwise look for a call-saved register that is going to be pushed. */
3563 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3564 if (pushed_regs_mask & (1 << reg))
3565 return reg;
3567 if (TARGET_THUMB2)
3569 /* Thumb-2 can use high regs. */
3570 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
3571 if (pushed_regs_mask & (1 << reg))
3572 return reg;
3574 /* Something went wrong - thumb_compute_save_reg_mask()
3575 should have arranged for a suitable register to be pushed. */
3576 gcc_unreachable ();
3579 static GTY(()) int pic_labelno;
3581 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3582 low register. */
3584 void
3585 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
3587 rtx l1, labelno, pic_tmp, pic_tmp2, pic_rtx, pic_reg;
3588 rtx global_offset_table;
3590 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3591 return;
3593 gcc_assert (flag_pic);
3595 pic_reg = cfun->machine->pic_reg;
3596 if (TARGET_VXWORKS_RTP)
3598 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
3599 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3600 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3602 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
3604 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
3605 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
3607 else
3609 /* We use an UNSPEC rather than a LABEL_REF because this label
3610 never appears in the code stream. */
3612 labelno = GEN_INT (pic_labelno++);
3613 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
3614 l1 = gen_rtx_CONST (VOIDmode, l1);
3616 global_offset_table
3617 = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3618 /* On the ARM the PC register contains 'dot + 8' at the time of the
3619 addition, on the Thumb it is 'dot + 4'. */
3620 pic_tmp = plus_constant (l1, TARGET_ARM ? 8 : 4);
3621 if (GOT_PCREL)
3623 pic_tmp2 = gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx);
3624 pic_tmp2 = gen_rtx_CONST (VOIDmode, pic_tmp2);
3626 else
3627 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3629 pic_rtx = gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp);
3630 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
3632 if (TARGET_ARM)
3634 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx));
3635 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
3637 else if (TARGET_THUMB2)
3639 /* Thumb-2 only allows very limited access to the PC. Calculate the
3640 address in a temporary register. */
3641 if (arm_pic_register != INVALID_REGNUM)
3643 pic_tmp = gen_rtx_REG (SImode,
3644 thumb_find_work_register (saved_regs));
3646 else
3648 gcc_assert (can_create_pseudo_p ());
3649 pic_tmp = gen_reg_rtx (Pmode);
3652 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
3653 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
3654 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
3656 else /* TARGET_THUMB1 */
3658 if (arm_pic_register != INVALID_REGNUM
3659 && REGNO (pic_reg) > LAST_LO_REGNUM)
3661 /* We will have pushed the pic register, so we should always be
3662 able to find a work register. */
3663 pic_tmp = gen_rtx_REG (SImode,
3664 thumb_find_work_register (saved_regs));
3665 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
3666 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3668 else
3669 emit_insn (gen_pic_load_addr_thumb1 (pic_reg, pic_rtx));
3670 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
3674 /* Need to emit this whether or not we obey regdecls,
3675 since setjmp/longjmp can cause life info to screw up. */
3676 emit_insn (gen_rtx_USE (VOIDmode, pic_reg));
3680 /* Return nonzero if X is valid as an ARM state addressing register. */
3681 static int
3682 arm_address_register_rtx_p (rtx x, int strict_p)
3684 int regno;
3686 if (GET_CODE (x) != REG)
3687 return 0;
3689 regno = REGNO (x);
3691 if (strict_p)
3692 return ARM_REGNO_OK_FOR_BASE_P (regno);
3694 return (regno <= LAST_ARM_REGNUM
3695 || regno >= FIRST_PSEUDO_REGISTER
3696 || regno == FRAME_POINTER_REGNUM
3697 || regno == ARG_POINTER_REGNUM);
3700 /* Return TRUE if this rtx is the difference of a symbol and a label,
3701 and will reduce to a PC-relative relocation in the object file.
3702 Expressions like this can be left alone when generating PIC, rather
3703 than forced through the GOT. */
3704 static int
3705 pcrel_constant_p (rtx x)
3707 if (GET_CODE (x) == MINUS)
3708 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
3710 return FALSE;
3713 /* Return nonzero if X is a valid ARM state address operand. */
3715 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3716 int strict_p)
3718 bool use_ldrd;
3719 enum rtx_code code = GET_CODE (x);
3721 if (arm_address_register_rtx_p (x, strict_p))
3722 return 1;
3724 use_ldrd = (TARGET_LDRD
3725 && (mode == DImode
3726 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3728 if (code == POST_INC || code == PRE_DEC
3729 || ((code == PRE_INC || code == POST_DEC)
3730 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3731 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3733 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3734 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3735 && GET_CODE (XEXP (x, 1)) == PLUS
3736 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3738 rtx addend = XEXP (XEXP (x, 1), 1);
3740 /* Don't allow ldrd post increment by register because it's hard
3741 to fixup invalid register choices. */
3742 if (use_ldrd
3743 && GET_CODE (x) == POST_MODIFY
3744 && GET_CODE (addend) == REG)
3745 return 0;
3747 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3748 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3751 /* After reload constants split into minipools will have addresses
3752 from a LABEL_REF. */
3753 else if (reload_completed
3754 && (code == LABEL_REF
3755 || (code == CONST
3756 && GET_CODE (XEXP (x, 0)) == PLUS
3757 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3758 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3759 return 1;
3761 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3762 return 0;
3764 else if (code == PLUS)
3766 rtx xop0 = XEXP (x, 0);
3767 rtx xop1 = XEXP (x, 1);
3769 return ((arm_address_register_rtx_p (xop0, strict_p)
3770 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3771 || (arm_address_register_rtx_p (xop1, strict_p)
3772 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3775 #if 0
3776 /* Reload currently can't handle MINUS, so disable this for now */
3777 else if (GET_CODE (x) == MINUS)
3779 rtx xop0 = XEXP (x, 0);
3780 rtx xop1 = XEXP (x, 1);
3782 return (arm_address_register_rtx_p (xop0, strict_p)
3783 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3785 #endif
3787 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3788 && code == SYMBOL_REF
3789 && CONSTANT_POOL_ADDRESS_P (x)
3790 && ! (flag_pic
3791 && symbol_mentioned_p (get_pool_constant (x))
3792 && ! pcrel_constant_p (get_pool_constant (x))))
3793 return 1;
3795 return 0;
3798 /* Return nonzero if X is a valid Thumb-2 address operand. */
3800 thumb2_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3802 bool use_ldrd;
3803 enum rtx_code code = GET_CODE (x);
3805 if (arm_address_register_rtx_p (x, strict_p))
3806 return 1;
3808 use_ldrd = (TARGET_LDRD
3809 && (mode == DImode
3810 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3812 if (code == POST_INC || code == PRE_DEC
3813 || ((code == PRE_INC || code == POST_DEC)
3814 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3815 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3817 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3818 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3819 && GET_CODE (XEXP (x, 1)) == PLUS
3820 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3822 /* Thumb-2 only has autoincrement by constant. */
3823 rtx addend = XEXP (XEXP (x, 1), 1);
3824 HOST_WIDE_INT offset;
3826 if (GET_CODE (addend) != CONST_INT)
3827 return 0;
3829 offset = INTVAL(addend);
3830 if (GET_MODE_SIZE (mode) <= 4)
3831 return (offset > -256 && offset < 256);
3833 return (use_ldrd && offset > -1024 && offset < 1024
3834 && (offset & 3) == 0);
3837 /* After reload constants split into minipools will have addresses
3838 from a LABEL_REF. */
3839 else if (reload_completed
3840 && (code == LABEL_REF
3841 || (code == CONST
3842 && GET_CODE (XEXP (x, 0)) == PLUS
3843 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3844 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3845 return 1;
3847 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
3848 return 0;
3850 else if (code == PLUS)
3852 rtx xop0 = XEXP (x, 0);
3853 rtx xop1 = XEXP (x, 1);
3855 return ((arm_address_register_rtx_p (xop0, strict_p)
3856 && thumb2_legitimate_index_p (mode, xop1, strict_p))
3857 || (arm_address_register_rtx_p (xop1, strict_p)
3858 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
3861 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3862 && code == SYMBOL_REF
3863 && CONSTANT_POOL_ADDRESS_P (x)
3864 && ! (flag_pic
3865 && symbol_mentioned_p (get_pool_constant (x))
3866 && ! pcrel_constant_p (get_pool_constant (x))))
3867 return 1;
3869 return 0;
3872 /* Return nonzero if INDEX is valid for an address index operand in
3873 ARM state. */
3874 static int
3875 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3876 int strict_p)
3878 HOST_WIDE_INT range;
3879 enum rtx_code code = GET_CODE (index);
3881 /* Standard coprocessor addressing modes. */
3882 if (TARGET_HARD_FLOAT
3883 && (TARGET_FPA || TARGET_MAVERICK)
3884 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3885 || (TARGET_MAVERICK && mode == DImode)))
3886 return (code == CONST_INT && INTVAL (index) < 1024
3887 && INTVAL (index) > -1024
3888 && (INTVAL (index) & 3) == 0);
3890 if (TARGET_NEON
3891 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
3892 return (code == CONST_INT
3893 && INTVAL (index) < 1016
3894 && INTVAL (index) > -1024
3895 && (INTVAL (index) & 3) == 0);
3897 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3898 return (code == CONST_INT
3899 && INTVAL (index) < 1024
3900 && INTVAL (index) > -1024
3901 && (INTVAL (index) & 3) == 0);
3903 if (arm_address_register_rtx_p (index, strict_p)
3904 && (GET_MODE_SIZE (mode) <= 4))
3905 return 1;
3907 if (mode == DImode || mode == DFmode)
3909 if (code == CONST_INT)
3911 HOST_WIDE_INT val = INTVAL (index);
3913 if (TARGET_LDRD)
3914 return val > -256 && val < 256;
3915 else
3916 return val > -4096 && val < 4092;
3919 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3922 if (GET_MODE_SIZE (mode) <= 4
3923 && ! (arm_arch4
3924 && (mode == HImode
3925 || (mode == QImode && outer == SIGN_EXTEND))))
3927 if (code == MULT)
3929 rtx xiop0 = XEXP (index, 0);
3930 rtx xiop1 = XEXP (index, 1);
3932 return ((arm_address_register_rtx_p (xiop0, strict_p)
3933 && power_of_two_operand (xiop1, SImode))
3934 || (arm_address_register_rtx_p (xiop1, strict_p)
3935 && power_of_two_operand (xiop0, SImode)));
3937 else if (code == LSHIFTRT || code == ASHIFTRT
3938 || code == ASHIFT || code == ROTATERT)
3940 rtx op = XEXP (index, 1);
3942 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3943 && GET_CODE (op) == CONST_INT
3944 && INTVAL (op) > 0
3945 && INTVAL (op) <= 31);
3949 /* For ARM v4 we may be doing a sign-extend operation during the
3950 load. */
3951 if (arm_arch4)
3953 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3954 range = 256;
3955 else
3956 range = 4096;
3958 else
3959 range = (mode == HImode) ? 4095 : 4096;
3961 return (code == CONST_INT
3962 && INTVAL (index) < range
3963 && INTVAL (index) > -range);
3966 /* Return true if OP is a valid index scaling factor for Thumb-2 address
3967 index operand. i.e. 1, 2, 4 or 8. */
3968 static bool
3969 thumb2_index_mul_operand (rtx op)
3971 HOST_WIDE_INT val;
3973 if (GET_CODE(op) != CONST_INT)
3974 return false;
3976 val = INTVAL(op);
3977 return (val == 1 || val == 2 || val == 4 || val == 8);
3980 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
3981 static int
3982 thumb2_legitimate_index_p (enum machine_mode mode, rtx index, int strict_p)
3984 enum rtx_code code = GET_CODE (index);
3986 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
3987 /* Standard coprocessor addressing modes. */
3988 if (TARGET_HARD_FLOAT
3989 && (TARGET_FPA || TARGET_MAVERICK)
3990 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3991 || (TARGET_MAVERICK && mode == DImode)))
3992 return (code == CONST_INT && INTVAL (index) < 1024
3993 && INTVAL (index) > -1024
3994 && (INTVAL (index) & 3) == 0);
3996 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3998 /* For DImode assume values will usually live in core regs
3999 and only allow LDRD addressing modes. */
4000 if (!TARGET_LDRD || mode != DImode)
4001 return (code == CONST_INT
4002 && INTVAL (index) < 1024
4003 && INTVAL (index) > -1024
4004 && (INTVAL (index) & 3) == 0);
4007 if (TARGET_NEON
4008 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
4009 return (code == CONST_INT
4010 && INTVAL (index) < 1016
4011 && INTVAL (index) > -1024
4012 && (INTVAL (index) & 3) == 0);
4014 if (arm_address_register_rtx_p (index, strict_p)
4015 && (GET_MODE_SIZE (mode) <= 4))
4016 return 1;
4018 if (mode == DImode || mode == DFmode)
4020 HOST_WIDE_INT val = INTVAL (index);
4021 /* ??? Can we assume ldrd for thumb2? */
4022 /* Thumb-2 ldrd only has reg+const addressing modes. */
4023 if (code != CONST_INT)
4024 return 0;
4026 /* ldrd supports offsets of +-1020.
4027 However the ldr fallback does not. */
4028 return val > -256 && val < 256 && (val & 3) == 0;
4031 if (code == MULT)
4033 rtx xiop0 = XEXP (index, 0);
4034 rtx xiop1 = XEXP (index, 1);
4036 return ((arm_address_register_rtx_p (xiop0, strict_p)
4037 && thumb2_index_mul_operand (xiop1))
4038 || (arm_address_register_rtx_p (xiop1, strict_p)
4039 && thumb2_index_mul_operand (xiop0)));
4041 else if (code == ASHIFT)
4043 rtx op = XEXP (index, 1);
4045 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
4046 && GET_CODE (op) == CONST_INT
4047 && INTVAL (op) > 0
4048 && INTVAL (op) <= 3);
4051 return (code == CONST_INT
4052 && INTVAL (index) < 4096
4053 && INTVAL (index) > -256);
4056 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
4057 static int
4058 thumb1_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
4060 int regno;
4062 if (GET_CODE (x) != REG)
4063 return 0;
4065 regno = REGNO (x);
4067 if (strict_p)
4068 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
4070 return (regno <= LAST_LO_REGNUM
4071 || regno > LAST_VIRTUAL_REGISTER
4072 || regno == FRAME_POINTER_REGNUM
4073 || (GET_MODE_SIZE (mode) >= 4
4074 && (regno == STACK_POINTER_REGNUM
4075 || regno >= FIRST_PSEUDO_REGISTER
4076 || x == hard_frame_pointer_rtx
4077 || x == arg_pointer_rtx)));
4080 /* Return nonzero if x is a legitimate index register. This is the case
4081 for any base register that can access a QImode object. */
4082 inline static int
4083 thumb1_index_register_rtx_p (rtx x, int strict_p)
4085 return thumb1_base_register_rtx_p (x, QImode, strict_p);
4088 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
4090 The AP may be eliminated to either the SP or the FP, so we use the
4091 least common denominator, e.g. SImode, and offsets from 0 to 64.
4093 ??? Verify whether the above is the right approach.
4095 ??? Also, the FP may be eliminated to the SP, so perhaps that
4096 needs special handling also.
4098 ??? Look at how the mips16 port solves this problem. It probably uses
4099 better ways to solve some of these problems.
4101 Although it is not incorrect, we don't accept QImode and HImode
4102 addresses based on the frame pointer or arg pointer until the
4103 reload pass starts. This is so that eliminating such addresses
4104 into stack based ones won't produce impossible code. */
4106 thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
4108 /* ??? Not clear if this is right. Experiment. */
4109 if (GET_MODE_SIZE (mode) < 4
4110 && !(reload_in_progress || reload_completed)
4111 && (reg_mentioned_p (frame_pointer_rtx, x)
4112 || reg_mentioned_p (arg_pointer_rtx, x)
4113 || reg_mentioned_p (virtual_incoming_args_rtx, x)
4114 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
4115 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
4116 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
4117 return 0;
4119 /* Accept any base register. SP only in SImode or larger. */
4120 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
4121 return 1;
4123 /* This is PC relative data before arm_reorg runs. */
4124 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
4125 && GET_CODE (x) == SYMBOL_REF
4126 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
4127 return 1;
4129 /* This is PC relative data after arm_reorg runs. */
4130 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
4131 && (GET_CODE (x) == LABEL_REF
4132 || (GET_CODE (x) == CONST
4133 && GET_CODE (XEXP (x, 0)) == PLUS
4134 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
4135 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
4136 return 1;
4138 /* Post-inc indexing only supported for SImode and larger. */
4139 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
4140 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
4141 return 1;
4143 else if (GET_CODE (x) == PLUS)
4145 /* REG+REG address can be any two index registers. */
4146 /* We disallow FRAME+REG addressing since we know that FRAME
4147 will be replaced with STACK, and SP relative addressing only
4148 permits SP+OFFSET. */
4149 if (GET_MODE_SIZE (mode) <= 4
4150 && XEXP (x, 0) != frame_pointer_rtx
4151 && XEXP (x, 1) != frame_pointer_rtx
4152 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4153 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p))
4154 return 1;
4156 /* REG+const has 5-7 bit offset for non-SP registers. */
4157 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
4158 || XEXP (x, 0) == arg_pointer_rtx)
4159 && GET_CODE (XEXP (x, 1)) == CONST_INT
4160 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4161 return 1;
4163 /* REG+const has 10-bit offset for SP, but only SImode and
4164 larger is supported. */
4165 /* ??? Should probably check for DI/DFmode overflow here
4166 just like GO_IF_LEGITIMATE_OFFSET does. */
4167 else if (GET_CODE (XEXP (x, 0)) == REG
4168 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
4169 && GET_MODE_SIZE (mode) >= 4
4170 && GET_CODE (XEXP (x, 1)) == CONST_INT
4171 && INTVAL (XEXP (x, 1)) >= 0
4172 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
4173 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4174 return 1;
4176 else if (GET_CODE (XEXP (x, 0)) == REG
4177 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
4178 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
4179 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
4180 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER))
4181 && GET_MODE_SIZE (mode) >= 4
4182 && GET_CODE (XEXP (x, 1)) == CONST_INT
4183 && (INTVAL (XEXP (x, 1)) & 3) == 0)
4184 return 1;
4187 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
4188 && GET_MODE_SIZE (mode) == 4
4189 && GET_CODE (x) == SYMBOL_REF
4190 && CONSTANT_POOL_ADDRESS_P (x)
4191 && ! (flag_pic
4192 && symbol_mentioned_p (get_pool_constant (x))
4193 && ! pcrel_constant_p (get_pool_constant (x))))
4194 return 1;
4196 return 0;
4199 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
4200 instruction of mode MODE. */
4202 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
4204 switch (GET_MODE_SIZE (mode))
4206 case 1:
4207 return val >= 0 && val < 32;
4209 case 2:
4210 return val >= 0 && val < 64 && (val & 1) == 0;
4212 default:
4213 return (val >= 0
4214 && (val + GET_MODE_SIZE (mode)) <= 128
4215 && (val & 3) == 0);
4219 /* Build the SYMBOL_REF for __tls_get_addr. */
4221 static GTY(()) rtx tls_get_addr_libfunc;
4223 static rtx
4224 get_tls_get_addr (void)
4226 if (!tls_get_addr_libfunc)
4227 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
4228 return tls_get_addr_libfunc;
4231 static rtx
4232 arm_load_tp (rtx target)
4234 if (!target)
4235 target = gen_reg_rtx (SImode);
4237 if (TARGET_HARD_TP)
4239 /* Can return in any reg. */
4240 emit_insn (gen_load_tp_hard (target));
4242 else
4244 /* Always returned in r0. Immediately copy the result into a pseudo,
4245 otherwise other uses of r0 (e.g. setting up function arguments) may
4246 clobber the value. */
4248 rtx tmp;
4250 emit_insn (gen_load_tp_soft ());
4252 tmp = gen_rtx_REG (SImode, 0);
4253 emit_move_insn (target, tmp);
4255 return target;
4258 static rtx
4259 load_tls_operand (rtx x, rtx reg)
4261 rtx tmp;
4263 if (reg == NULL_RTX)
4264 reg = gen_reg_rtx (SImode);
4266 tmp = gen_rtx_CONST (SImode, x);
4268 emit_move_insn (reg, tmp);
4270 return reg;
4273 static rtx
4274 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
4276 rtx insns, label, labelno, sum;
4278 start_sequence ();
4280 labelno = GEN_INT (pic_labelno++);
4281 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4282 label = gen_rtx_CONST (VOIDmode, label);
4284 sum = gen_rtx_UNSPEC (Pmode,
4285 gen_rtvec (4, x, GEN_INT (reloc), label,
4286 GEN_INT (TARGET_ARM ? 8 : 4)),
4287 UNSPEC_TLS);
4288 reg = load_tls_operand (sum, reg);
4290 if (TARGET_ARM)
4291 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
4292 else if (TARGET_THUMB2)
4294 rtx tmp;
4295 /* Thumb-2 only allows very limited access to the PC. Calculate
4296 the address in a temporary register. */
4297 tmp = gen_reg_rtx (SImode);
4298 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4299 emit_insn (gen_addsi3(reg, reg, tmp));
4301 else /* TARGET_THUMB1 */
4302 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4304 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
4305 Pmode, 1, reg, Pmode);
4307 insns = get_insns ();
4308 end_sequence ();
4310 return insns;
4314 legitimize_tls_address (rtx x, rtx reg)
4316 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
4317 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
4319 switch (model)
4321 case TLS_MODEL_GLOBAL_DYNAMIC:
4322 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
4323 dest = gen_reg_rtx (Pmode);
4324 emit_libcall_block (insns, dest, ret, x);
4325 return dest;
4327 case TLS_MODEL_LOCAL_DYNAMIC:
4328 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
4330 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
4331 share the LDM result with other LD model accesses. */
4332 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
4333 UNSPEC_TLS);
4334 dest = gen_reg_rtx (Pmode);
4335 emit_libcall_block (insns, dest, ret, eqv);
4337 /* Load the addend. */
4338 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
4339 UNSPEC_TLS);
4340 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
4341 return gen_rtx_PLUS (Pmode, dest, addend);
4343 case TLS_MODEL_INITIAL_EXEC:
4344 labelno = GEN_INT (pic_labelno++);
4345 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
4346 label = gen_rtx_CONST (VOIDmode, label);
4347 sum = gen_rtx_UNSPEC (Pmode,
4348 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
4349 GEN_INT (TARGET_ARM ? 8 : 4)),
4350 UNSPEC_TLS);
4351 reg = load_tls_operand (sum, reg);
4353 if (TARGET_ARM)
4354 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
4355 else if (TARGET_THUMB2)
4357 rtx tmp;
4358 /* Thumb-2 only allows very limited access to the PC. Calculate
4359 the address in a temporary register. */
4360 tmp = gen_reg_rtx (SImode);
4361 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
4362 emit_insn (gen_addsi3(reg, reg, tmp));
4363 emit_move_insn (reg, gen_const_mem (SImode, reg));
4365 else
4367 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
4368 emit_move_insn (reg, gen_const_mem (SImode, reg));
4371 tp = arm_load_tp (NULL_RTX);
4373 return gen_rtx_PLUS (Pmode, tp, reg);
4375 case TLS_MODEL_LOCAL_EXEC:
4376 tp = arm_load_tp (NULL_RTX);
4378 reg = gen_rtx_UNSPEC (Pmode,
4379 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
4380 UNSPEC_TLS);
4381 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
4383 return gen_rtx_PLUS (Pmode, tp, reg);
4385 default:
4386 abort ();
4390 /* Try machine-dependent ways of modifying an illegitimate address
4391 to be legitimate. If we find one, return the new, valid address. */
4393 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4395 if (arm_tls_symbol_p (x))
4396 return legitimize_tls_address (x, NULL_RTX);
4398 if (GET_CODE (x) == PLUS)
4400 rtx xop0 = XEXP (x, 0);
4401 rtx xop1 = XEXP (x, 1);
4403 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
4404 xop0 = force_reg (SImode, xop0);
4406 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
4407 xop1 = force_reg (SImode, xop1);
4409 if (ARM_BASE_REGISTER_RTX_P (xop0)
4410 && GET_CODE (xop1) == CONST_INT)
4412 HOST_WIDE_INT n, low_n;
4413 rtx base_reg, val;
4414 n = INTVAL (xop1);
4416 /* VFP addressing modes actually allow greater offsets, but for
4417 now we just stick with the lowest common denominator. */
4418 if (mode == DImode
4419 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
4421 low_n = n & 0x0f;
4422 n &= ~0x0f;
4423 if (low_n > 4)
4425 n += 16;
4426 low_n -= 16;
4429 else
4431 low_n = ((mode) == TImode ? 0
4432 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
4433 n -= low_n;
4436 base_reg = gen_reg_rtx (SImode);
4437 val = force_operand (plus_constant (xop0, n), NULL_RTX);
4438 emit_move_insn (base_reg, val);
4439 x = plus_constant (base_reg, low_n);
4441 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4442 x = gen_rtx_PLUS (SImode, xop0, xop1);
4445 /* XXX We don't allow MINUS any more -- see comment in
4446 arm_legitimate_address_p (). */
4447 else if (GET_CODE (x) == MINUS)
4449 rtx xop0 = XEXP (x, 0);
4450 rtx xop1 = XEXP (x, 1);
4452 if (CONSTANT_P (xop0))
4453 xop0 = force_reg (SImode, xop0);
4455 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
4456 xop1 = force_reg (SImode, xop1);
4458 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
4459 x = gen_rtx_MINUS (SImode, xop0, xop1);
4462 /* Make sure to take full advantage of the pre-indexed addressing mode
4463 with absolute addresses which often allows for the base register to
4464 be factorized for multiple adjacent memory references, and it might
4465 even allows for the mini pool to be avoided entirely. */
4466 else if (GET_CODE (x) == CONST_INT && optimize > 0)
4468 unsigned int bits;
4469 HOST_WIDE_INT mask, base, index;
4470 rtx base_reg;
4472 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
4473 use a 8-bit index. So let's use a 12-bit index for SImode only and
4474 hope that arm_gen_constant will enable ldrb to use more bits. */
4475 bits = (mode == SImode) ? 12 : 8;
4476 mask = (1 << bits) - 1;
4477 base = INTVAL (x) & ~mask;
4478 index = INTVAL (x) & mask;
4479 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
4481 /* It'll most probably be more efficient to generate the base
4482 with more bits set and use a negative index instead. */
4483 base |= mask;
4484 index -= mask;
4486 base_reg = force_reg (SImode, GEN_INT (base));
4487 x = plus_constant (base_reg, index);
4490 if (flag_pic)
4492 /* We need to find and carefully transform any SYMBOL and LABEL
4493 references; so go back to the original address expression. */
4494 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4496 if (new_x != orig_x)
4497 x = new_x;
4500 return x;
4504 /* Try machine-dependent ways of modifying an illegitimate Thumb address
4505 to be legitimate. If we find one, return the new, valid address. */
4507 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
4509 if (arm_tls_symbol_p (x))
4510 return legitimize_tls_address (x, NULL_RTX);
4512 if (GET_CODE (x) == PLUS
4513 && GET_CODE (XEXP (x, 1)) == CONST_INT
4514 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
4515 || INTVAL (XEXP (x, 1)) < 0))
4517 rtx xop0 = XEXP (x, 0);
4518 rtx xop1 = XEXP (x, 1);
4519 HOST_WIDE_INT offset = INTVAL (xop1);
4521 /* Try and fold the offset into a biasing of the base register and
4522 then offsetting that. Don't do this when optimizing for space
4523 since it can cause too many CSEs. */
4524 if (optimize_size && offset >= 0
4525 && offset < 256 + 31 * GET_MODE_SIZE (mode))
4527 HOST_WIDE_INT delta;
4529 if (offset >= 256)
4530 delta = offset - (256 - GET_MODE_SIZE (mode));
4531 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
4532 delta = 31 * GET_MODE_SIZE (mode);
4533 else
4534 delta = offset & (~31 * GET_MODE_SIZE (mode));
4536 xop0 = force_operand (plus_constant (xop0, offset - delta),
4537 NULL_RTX);
4538 x = plus_constant (xop0, delta);
4540 else if (offset < 0 && offset > -256)
4541 /* Small negative offsets are best done with a subtract before the
4542 dereference, forcing these into a register normally takes two
4543 instructions. */
4544 x = force_operand (x, NULL_RTX);
4545 else
4547 /* For the remaining cases, force the constant into a register. */
4548 xop1 = force_reg (SImode, xop1);
4549 x = gen_rtx_PLUS (SImode, xop0, xop1);
4552 else if (GET_CODE (x) == PLUS
4553 && s_register_operand (XEXP (x, 1), SImode)
4554 && !s_register_operand (XEXP (x, 0), SImode))
4556 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
4558 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
4561 if (flag_pic)
4563 /* We need to find and carefully transform any SYMBOL and LABEL
4564 references; so go back to the original address expression. */
4565 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
4567 if (new_x != orig_x)
4568 x = new_x;
4571 return x;
4575 thumb_legitimize_reload_address (rtx *x_p,
4576 enum machine_mode mode,
4577 int opnum, int type,
4578 int ind_levels ATTRIBUTE_UNUSED)
4580 rtx x = *x_p;
4582 if (GET_CODE (x) == PLUS
4583 && GET_MODE_SIZE (mode) < 4
4584 && REG_P (XEXP (x, 0))
4585 && XEXP (x, 0) == stack_pointer_rtx
4586 && GET_CODE (XEXP (x, 1)) == CONST_INT
4587 && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
4589 rtx orig_x = x;
4591 x = copy_rtx (x);
4592 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4593 Pmode, VOIDmode, 0, 0, opnum, type);
4594 return x;
4597 /* If both registers are hi-regs, then it's better to reload the
4598 entire expression rather than each register individually. That
4599 only requires one reload register rather than two. */
4600 if (GET_CODE (x) == PLUS
4601 && REG_P (XEXP (x, 0))
4602 && REG_P (XEXP (x, 1))
4603 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode)
4604 && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode))
4606 rtx orig_x = x;
4608 x = copy_rtx (x);
4609 push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode),
4610 Pmode, VOIDmode, 0, 0, opnum, type);
4611 return x;
4614 return NULL;
4617 /* Test for various thread-local symbols. */
4619 /* Return TRUE if X is a thread-local symbol. */
4621 static bool
4622 arm_tls_symbol_p (rtx x)
4624 if (! TARGET_HAVE_TLS)
4625 return false;
4627 if (GET_CODE (x) != SYMBOL_REF)
4628 return false;
4630 return SYMBOL_REF_TLS_MODEL (x) != 0;
4633 /* Helper for arm_tls_referenced_p. */
4635 static int
4636 arm_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
4638 if (GET_CODE (*x) == SYMBOL_REF)
4639 return SYMBOL_REF_TLS_MODEL (*x) != 0;
4641 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
4642 TLS offsets, not real symbol references. */
4643 if (GET_CODE (*x) == UNSPEC
4644 && XINT (*x, 1) == UNSPEC_TLS)
4645 return -1;
4647 return 0;
4650 /* Return TRUE if X contains any TLS symbol references. */
4652 bool
4653 arm_tls_referenced_p (rtx x)
4655 if (! TARGET_HAVE_TLS)
4656 return false;
4658 return for_each_rtx (&x, arm_tls_operand_p_1, NULL);
4661 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
4663 bool
4664 arm_cannot_force_const_mem (rtx x)
4666 rtx base, offset;
4668 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
4670 split_const (x, &base, &offset);
4671 if (GET_CODE (base) == SYMBOL_REF
4672 && !offset_within_block_p (base, INTVAL (offset)))
4673 return true;
4675 return arm_tls_referenced_p (x);
4678 #define REG_OR_SUBREG_REG(X) \
4679 (GET_CODE (X) == REG \
4680 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
4682 #define REG_OR_SUBREG_RTX(X) \
4683 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
4685 #ifndef COSTS_N_INSNS
4686 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
4687 #endif
4688 static inline int
4689 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
4691 enum machine_mode mode = GET_MODE (x);
4693 switch (code)
4695 case ASHIFT:
4696 case ASHIFTRT:
4697 case LSHIFTRT:
4698 case ROTATERT:
4699 case PLUS:
4700 case MINUS:
4701 case COMPARE:
4702 case NEG:
4703 case NOT:
4704 return COSTS_N_INSNS (1);
4706 case MULT:
4707 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4709 int cycles = 0;
4710 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
4712 while (i)
4714 i >>= 2;
4715 cycles++;
4717 return COSTS_N_INSNS (2) + cycles;
4719 return COSTS_N_INSNS (1) + 16;
4721 case SET:
4722 return (COSTS_N_INSNS (1)
4723 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
4724 + GET_CODE (SET_DEST (x)) == MEM));
4726 case CONST_INT:
4727 if (outer == SET)
4729 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
4730 return 0;
4731 if (thumb_shiftable_const (INTVAL (x)))
4732 return COSTS_N_INSNS (2);
4733 return COSTS_N_INSNS (3);
4735 else if ((outer == PLUS || outer == COMPARE)
4736 && INTVAL (x) < 256 && INTVAL (x) > -256)
4737 return 0;
4738 else if (outer == AND
4739 && INTVAL (x) < 256 && INTVAL (x) >= -256)
4740 return COSTS_N_INSNS (1);
4741 else if (outer == ASHIFT || outer == ASHIFTRT
4742 || outer == LSHIFTRT)
4743 return 0;
4744 return COSTS_N_INSNS (2);
4746 case CONST:
4747 case CONST_DOUBLE:
4748 case LABEL_REF:
4749 case SYMBOL_REF:
4750 return COSTS_N_INSNS (3);
4752 case UDIV:
4753 case UMOD:
4754 case DIV:
4755 case MOD:
4756 return 100;
4758 case TRUNCATE:
4759 return 99;
4761 case AND:
4762 case XOR:
4763 case IOR:
4764 /* XXX guess. */
4765 return 8;
4767 case MEM:
4768 /* XXX another guess. */
4769 /* Memory costs quite a lot for the first word, but subsequent words
4770 load at the equivalent of a single insn each. */
4771 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4772 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
4773 ? 4 : 0));
4775 case IF_THEN_ELSE:
4776 /* XXX a guess. */
4777 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4778 return 14;
4779 return 2;
4781 case ZERO_EXTEND:
4782 /* XXX still guessing. */
4783 switch (GET_MODE (XEXP (x, 0)))
4785 case QImode:
4786 return (1 + (mode == DImode ? 4 : 0)
4787 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4789 case HImode:
4790 return (4 + (mode == DImode ? 4 : 0)
4791 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4793 case SImode:
4794 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4796 default:
4797 return 99;
4800 default:
4801 return 99;
4806 /* Worker routine for arm_rtx_costs. */
4807 /* ??? This needs updating for thumb2. */
4808 static inline int
4809 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
4811 enum machine_mode mode = GET_MODE (x);
4812 enum rtx_code subcode;
4813 int extra_cost;
4815 switch (code)
4817 case MEM:
4818 /* Memory costs quite a lot for the first word, but subsequent words
4819 load at the equivalent of a single insn each. */
4820 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4821 + (GET_CODE (x) == SYMBOL_REF
4822 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4824 case DIV:
4825 case MOD:
4826 case UDIV:
4827 case UMOD:
4828 return optimize_size ? COSTS_N_INSNS (2) : 100;
4830 case ROTATE:
4831 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4832 return 4;
4833 /* Fall through */
4834 case ROTATERT:
4835 if (mode != SImode)
4836 return 8;
4837 /* Fall through */
4838 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4839 if (mode == DImode)
4840 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4841 + ((GET_CODE (XEXP (x, 0)) == REG
4842 || (GET_CODE (XEXP (x, 0)) == SUBREG
4843 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4844 ? 0 : 8));
4845 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4846 || (GET_CODE (XEXP (x, 0)) == SUBREG
4847 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4848 ? 0 : 4)
4849 + ((GET_CODE (XEXP (x, 1)) == REG
4850 || (GET_CODE (XEXP (x, 1)) == SUBREG
4851 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4852 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4853 ? 0 : 4));
4855 case MINUS:
4856 if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
4858 extra_cost = rtx_cost (XEXP (x, 1), code);
4859 if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
4860 extra_cost += 4 * ARM_NUM_REGS (mode);
4861 return extra_cost;
4864 if (mode == DImode)
4865 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4866 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4867 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4868 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4869 ? 0 : 8));
4871 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4872 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4873 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4874 && arm_const_double_rtx (XEXP (x, 1))))
4875 ? 0 : 8)
4876 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4877 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4878 && arm_const_double_rtx (XEXP (x, 0))))
4879 ? 0 : 8));
4881 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4882 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4883 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4884 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4885 || subcode == ASHIFTRT || subcode == LSHIFTRT
4886 || subcode == ROTATE || subcode == ROTATERT
4887 || (subcode == MULT
4888 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4889 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4890 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4891 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4892 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4893 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4894 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4895 return 1;
4896 /* Fall through */
4898 case PLUS:
4899 if (GET_CODE (XEXP (x, 0)) == MULT)
4901 extra_cost = rtx_cost (XEXP (x, 0), code);
4902 if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
4903 extra_cost += 4 * ARM_NUM_REGS (mode);
4904 return extra_cost;
4907 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4908 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4909 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4910 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4911 && arm_const_double_rtx (XEXP (x, 1))))
4912 ? 0 : 8));
4914 /* Fall through */
4915 case AND: case XOR: case IOR:
4916 extra_cost = 0;
4918 /* Normally the frame registers will be spilt into reg+const during
4919 reload, so it is a bad idea to combine them with other instructions,
4920 since then they might not be moved outside of loops. As a compromise
4921 we allow integration with ops that have a constant as their second
4922 operand. */
4923 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4924 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4925 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4926 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4927 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4928 extra_cost = 4;
4930 if (mode == DImode)
4931 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4932 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4933 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4934 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4935 ? 0 : 8));
4937 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4938 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4939 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4940 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4941 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4942 ? 0 : 4));
4944 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4945 return (1 + extra_cost
4946 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4947 || subcode == LSHIFTRT || subcode == ASHIFTRT
4948 || subcode == ROTATE || subcode == ROTATERT
4949 || (subcode == MULT
4950 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4951 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4952 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4953 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4954 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4955 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4956 ? 0 : 4));
4958 return 8;
4960 case MULT:
4961 /* This should have been handled by the CPU specific routines. */
4962 gcc_unreachable ();
4964 case TRUNCATE:
4965 if (arm_arch3m && mode == SImode
4966 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4967 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4968 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4969 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4970 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4971 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4972 return 8;
4973 return 99;
4975 case NEG:
4976 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4977 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4978 /* Fall through */
4979 case NOT:
4980 if (mode == DImode)
4981 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4983 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4985 case IF_THEN_ELSE:
4986 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4987 return 14;
4988 return 2;
4990 case COMPARE:
4991 return 1;
4993 case ABS:
4994 return 4 + (mode == DImode ? 4 : 0);
4996 case SIGN_EXTEND:
4997 /* ??? value extensions are cheaper on armv6. */
4998 if (GET_MODE (XEXP (x, 0)) == QImode)
4999 return (4 + (mode == DImode ? 4 : 0)
5000 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5001 /* Fall through */
5002 case ZERO_EXTEND:
5003 switch (GET_MODE (XEXP (x, 0)))
5005 case QImode:
5006 return (1 + (mode == DImode ? 4 : 0)
5007 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5009 case HImode:
5010 return (4 + (mode == DImode ? 4 : 0)
5011 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5013 case SImode:
5014 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
5016 case V8QImode:
5017 case V4HImode:
5018 case V2SImode:
5019 case V4QImode:
5020 case V2HImode:
5021 return 1;
5023 default:
5024 gcc_unreachable ();
5026 gcc_unreachable ();
5028 case CONST_INT:
5029 if (const_ok_for_arm (INTVAL (x)))
5030 return outer == SET ? 2 : -1;
5031 else if (outer == AND
5032 && const_ok_for_arm (~INTVAL (x)))
5033 return -1;
5034 else if ((outer == COMPARE
5035 || outer == PLUS || outer == MINUS)
5036 && const_ok_for_arm (-INTVAL (x)))
5037 return -1;
5038 else
5039 return 5;
5041 case CONST:
5042 case LABEL_REF:
5043 case SYMBOL_REF:
5044 return 6;
5046 case CONST_DOUBLE:
5047 if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
5048 return outer == SET ? 2 : -1;
5049 else if ((outer == COMPARE || outer == PLUS)
5050 && neg_const_double_rtx_ok_for_fpa (x))
5051 return -1;
5052 return 7;
5054 default:
5055 return 99;
5059 /* RTX costs when optimizing for size. */
5060 static bool
5061 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
5063 enum machine_mode mode = GET_MODE (x);
5065 if (TARGET_THUMB)
5067 /* XXX TBD. For now, use the standard costs. */
5068 *total = thumb1_rtx_costs (x, code, outer_code);
5069 return true;
5072 switch (code)
5074 case MEM:
5075 /* A memory access costs 1 insn if the mode is small, or the address is
5076 a single register, otherwise it costs one insn per word. */
5077 if (REG_P (XEXP (x, 0)))
5078 *total = COSTS_N_INSNS (1);
5079 else
5080 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5081 return true;
5083 case DIV:
5084 case MOD:
5085 case UDIV:
5086 case UMOD:
5087 /* Needs a libcall, so it costs about this. */
5088 *total = COSTS_N_INSNS (2);
5089 return false;
5091 case ROTATE:
5092 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
5094 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
5095 return true;
5097 /* Fall through */
5098 case ROTATERT:
5099 case ASHIFT:
5100 case LSHIFTRT:
5101 case ASHIFTRT:
5102 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
5104 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
5105 return true;
5107 else if (mode == SImode)
5109 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
5110 /* Slightly disparage register shifts, but not by much. */
5111 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5112 *total += 1 + rtx_cost (XEXP (x, 1), code);
5113 return true;
5116 /* Needs a libcall. */
5117 *total = COSTS_N_INSNS (2);
5118 return false;
5120 case MINUS:
5121 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5123 *total = COSTS_N_INSNS (1);
5124 return false;
5127 if (mode == SImode)
5129 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
5130 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
5132 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
5133 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
5134 || subcode1 == ROTATE || subcode1 == ROTATERT
5135 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
5136 || subcode1 == ASHIFTRT)
5138 /* It's just the cost of the two operands. */
5139 *total = 0;
5140 return false;
5143 *total = COSTS_N_INSNS (1);
5144 return false;
5147 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5148 return false;
5150 case PLUS:
5151 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5153 *total = COSTS_N_INSNS (1);
5154 return false;
5157 /* Fall through */
5158 case AND: case XOR: case IOR:
5159 if (mode == SImode)
5161 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
5163 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
5164 || subcode == LSHIFTRT || subcode == ASHIFTRT
5165 || (code == AND && subcode == NOT))
5167 /* It's just the cost of the two operands. */
5168 *total = 0;
5169 return false;
5173 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5174 return false;
5176 case MULT:
5177 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5178 return false;
5180 case NEG:
5181 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5182 *total = COSTS_N_INSNS (1);
5183 /* Fall through */
5184 case NOT:
5185 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5187 return false;
5189 case IF_THEN_ELSE:
5190 *total = 0;
5191 return false;
5193 case COMPARE:
5194 if (cc_register (XEXP (x, 0), VOIDmode))
5195 * total = 0;
5196 else
5197 *total = COSTS_N_INSNS (1);
5198 return false;
5200 case ABS:
5201 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
5202 *total = COSTS_N_INSNS (1);
5203 else
5204 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
5205 return false;
5207 case SIGN_EXTEND:
5208 *total = 0;
5209 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
5211 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5212 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5214 if (mode == DImode)
5215 *total += COSTS_N_INSNS (1);
5216 return false;
5218 case ZERO_EXTEND:
5219 *total = 0;
5220 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
5222 switch (GET_MODE (XEXP (x, 0)))
5224 case QImode:
5225 *total += COSTS_N_INSNS (1);
5226 break;
5228 case HImode:
5229 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
5231 case SImode:
5232 break;
5234 default:
5235 *total += COSTS_N_INSNS (2);
5239 if (mode == DImode)
5240 *total += COSTS_N_INSNS (1);
5242 return false;
5244 case CONST_INT:
5245 if (const_ok_for_arm (INTVAL (x)))
5246 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
5247 else if (const_ok_for_arm (~INTVAL (x)))
5248 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
5249 else if (const_ok_for_arm (-INTVAL (x)))
5251 if (outer_code == COMPARE || outer_code == PLUS
5252 || outer_code == MINUS)
5253 *total = 0;
5254 else
5255 *total = COSTS_N_INSNS (1);
5257 else
5258 *total = COSTS_N_INSNS (2);
5259 return true;
5261 case CONST:
5262 case LABEL_REF:
5263 case SYMBOL_REF:
5264 *total = COSTS_N_INSNS (2);
5265 return true;
5267 case CONST_DOUBLE:
5268 *total = COSTS_N_INSNS (4);
5269 return true;
5271 default:
5272 if (mode != VOIDmode)
5273 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
5274 else
5275 *total = COSTS_N_INSNS (4); /* How knows? */
5276 return false;
5280 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
5281 supported on any "slowmul" cores, so it can be ignored. */
5283 static bool
5284 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5286 enum machine_mode mode = GET_MODE (x);
5288 if (TARGET_THUMB)
5290 *total = thumb1_rtx_costs (x, code, outer_code);
5291 return true;
5294 switch (code)
5296 case MULT:
5297 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5298 || mode == DImode)
5300 *total = 30;
5301 return true;
5304 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5306 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5307 & (unsigned HOST_WIDE_INT) 0xffffffff);
5308 int cost, const_ok = const_ok_for_arm (i);
5309 int j, booth_unit_size;
5311 /* Tune as appropriate. */
5312 cost = const_ok ? 4 : 8;
5313 booth_unit_size = 2;
5314 for (j = 0; i && j < 32; j += booth_unit_size)
5316 i >>= booth_unit_size;
5317 cost += 2;
5320 *total = cost;
5321 return true;
5324 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5325 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5326 return true;
5328 default:
5329 *total = arm_rtx_costs_1 (x, code, outer_code);
5330 return true;
5335 /* RTX cost for cores with a fast multiply unit (M variants). */
5337 static bool
5338 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
5340 enum machine_mode mode = GET_MODE (x);
5342 if (TARGET_THUMB1)
5344 *total = thumb1_rtx_costs (x, code, outer_code);
5345 return true;
5348 /* ??? should thumb2 use different costs? */
5349 switch (code)
5351 case MULT:
5352 /* There is no point basing this on the tuning, since it is always the
5353 fast variant if it exists at all. */
5354 if (mode == DImode
5355 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5356 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5357 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5359 *total = 8;
5360 return true;
5364 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5365 || mode == DImode)
5367 *total = 30;
5368 return true;
5371 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5373 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5374 & (unsigned HOST_WIDE_INT) 0xffffffff);
5375 int cost, const_ok = const_ok_for_arm (i);
5376 int j, booth_unit_size;
5378 /* Tune as appropriate. */
5379 cost = const_ok ? 4 : 8;
5380 booth_unit_size = 8;
5381 for (j = 0; i && j < 32; j += booth_unit_size)
5383 i >>= booth_unit_size;
5384 cost += 2;
5387 *total = cost;
5388 return true;
5391 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5392 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5393 return true;
5395 default:
5396 *total = arm_rtx_costs_1 (x, code, outer_code);
5397 return true;
5402 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
5403 so it can be ignored. */
5405 static bool
5406 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
5408 enum machine_mode mode = GET_MODE (x);
5410 if (TARGET_THUMB)
5412 *total = thumb1_rtx_costs (x, code, outer_code);
5413 return true;
5416 switch (code)
5418 case MULT:
5419 /* There is no point basing this on the tuning, since it is always the
5420 fast variant if it exists at all. */
5421 if (mode == DImode
5422 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5423 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5424 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5426 *total = 8;
5427 return true;
5431 if (GET_MODE_CLASS (mode) == MODE_FLOAT
5432 || mode == DImode)
5434 *total = 30;
5435 return true;
5438 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5440 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
5441 & (unsigned HOST_WIDE_INT) 0xffffffff);
5442 int cost, const_ok = const_ok_for_arm (i);
5443 unsigned HOST_WIDE_INT masked_const;
5445 /* The cost will be related to two insns.
5446 First a load of the constant (MOV or LDR), then a multiply. */
5447 cost = 2;
5448 if (! const_ok)
5449 cost += 1; /* LDR is probably more expensive because
5450 of longer result latency. */
5451 masked_const = i & 0xffff8000;
5452 if (masked_const != 0 && masked_const != 0xffff8000)
5454 masked_const = i & 0xf8000000;
5455 if (masked_const == 0 || masked_const == 0xf8000000)
5456 cost += 1;
5457 else
5458 cost += 2;
5460 *total = cost;
5461 return true;
5464 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
5465 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
5466 return true;
5468 case COMPARE:
5469 /* A COMPARE of a MULT is slow on XScale; the muls instruction
5470 will stall until the multiplication is complete. */
5471 if (GET_CODE (XEXP (x, 0)) == MULT)
5472 *total = 4 + rtx_cost (XEXP (x, 0), code);
5473 else
5474 *total = arm_rtx_costs_1 (x, code, outer_code);
5475 return true;
5477 default:
5478 *total = arm_rtx_costs_1 (x, code, outer_code);
5479 return true;
5484 /* RTX costs for 9e (and later) cores. */
5486 static bool
5487 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
5489 enum machine_mode mode = GET_MODE (x);
5490 int nonreg_cost;
5491 int cost;
5493 if (TARGET_THUMB1)
5495 switch (code)
5497 case MULT:
5498 *total = COSTS_N_INSNS (3);
5499 return true;
5501 default:
5502 *total = thumb1_rtx_costs (x, code, outer_code);
5503 return true;
5507 switch (code)
5509 case MULT:
5510 /* There is no point basing this on the tuning, since it is always the
5511 fast variant if it exists at all. */
5512 if (mode == DImode
5513 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
5514 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
5515 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
5517 *total = 3;
5518 return true;
5522 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5524 *total = 30;
5525 return true;
5527 if (mode == DImode)
5529 cost = 7;
5530 nonreg_cost = 8;
5532 else
5534 cost = 2;
5535 nonreg_cost = 4;
5539 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
5540 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
5541 return true;
5543 default:
5544 *total = arm_rtx_costs_1 (x, code, outer_code);
5545 return true;
5548 /* All address computations that can be done are free, but rtx cost returns
5549 the same for practically all of them. So we weight the different types
5550 of address here in the order (most pref first):
5551 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
5552 static inline int
5553 arm_arm_address_cost (rtx x)
5555 enum rtx_code c = GET_CODE (x);
5557 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
5558 return 0;
5559 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
5560 return 10;
5562 if (c == PLUS || c == MINUS)
5564 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5565 return 2;
5567 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
5568 return 3;
5570 return 4;
5573 return 6;
5576 static inline int
5577 arm_thumb_address_cost (rtx x)
5579 enum rtx_code c = GET_CODE (x);
5581 if (c == REG)
5582 return 1;
5583 if (c == PLUS
5584 && GET_CODE (XEXP (x, 0)) == REG
5585 && GET_CODE (XEXP (x, 1)) == CONST_INT)
5586 return 1;
5588 return 2;
5591 static int
5592 arm_address_cost (rtx x)
5594 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
5597 static int
5598 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
5600 rtx i_pat, d_pat;
5602 /* Some true dependencies can have a higher cost depending
5603 on precisely how certain input operands are used. */
5604 if (arm_tune_xscale
5605 && REG_NOTE_KIND (link) == 0
5606 && recog_memoized (insn) >= 0
5607 && recog_memoized (dep) >= 0)
5609 int shift_opnum = get_attr_shift (insn);
5610 enum attr_type attr_type = get_attr_type (dep);
5612 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
5613 operand for INSN. If we have a shifted input operand and the
5614 instruction we depend on is another ALU instruction, then we may
5615 have to account for an additional stall. */
5616 if (shift_opnum != 0
5617 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
5619 rtx shifted_operand;
5620 int opno;
5622 /* Get the shifted operand. */
5623 extract_insn (insn);
5624 shifted_operand = recog_data.operand[shift_opnum];
5626 /* Iterate over all the operands in DEP. If we write an operand
5627 that overlaps with SHIFTED_OPERAND, then we have increase the
5628 cost of this dependency. */
5629 extract_insn (dep);
5630 preprocess_constraints ();
5631 for (opno = 0; opno < recog_data.n_operands; opno++)
5633 /* We can ignore strict inputs. */
5634 if (recog_data.operand_type[opno] == OP_IN)
5635 continue;
5637 if (reg_overlap_mentioned_p (recog_data.operand[opno],
5638 shifted_operand))
5639 return 2;
5644 /* XXX This is not strictly true for the FPA. */
5645 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
5646 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
5647 return 0;
5649 /* Call insns don't incur a stall, even if they follow a load. */
5650 if (REG_NOTE_KIND (link) == 0
5651 && GET_CODE (insn) == CALL_INSN)
5652 return 1;
5654 if ((i_pat = single_set (insn)) != NULL
5655 && GET_CODE (SET_SRC (i_pat)) == MEM
5656 && (d_pat = single_set (dep)) != NULL
5657 && GET_CODE (SET_DEST (d_pat)) == MEM)
5659 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
5660 /* This is a load after a store, there is no conflict if the load reads
5661 from a cached area. Assume that loads from the stack, and from the
5662 constant pool are cached, and that others will miss. This is a
5663 hack. */
5665 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
5666 || reg_mentioned_p (stack_pointer_rtx, src_mem)
5667 || reg_mentioned_p (frame_pointer_rtx, src_mem)
5668 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
5669 return 1;
5672 return cost;
5675 static int fp_consts_inited = 0;
5677 /* Only zero is valid for VFP. Other values are also valid for FPA. */
5678 static const char * const strings_fp[8] =
5680 "0", "1", "2", "3",
5681 "4", "5", "0.5", "10"
5684 static REAL_VALUE_TYPE values_fp[8];
5686 static void
5687 init_fp_table (void)
5689 int i;
5690 REAL_VALUE_TYPE r;
5692 if (TARGET_VFP)
5693 fp_consts_inited = 1;
5694 else
5695 fp_consts_inited = 8;
5697 for (i = 0; i < fp_consts_inited; i++)
5699 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
5700 values_fp[i] = r;
5704 /* Return TRUE if rtx X is a valid immediate FP constant. */
5706 arm_const_double_rtx (rtx x)
5708 REAL_VALUE_TYPE r;
5709 int i;
5711 if (!fp_consts_inited)
5712 init_fp_table ();
5714 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5715 if (REAL_VALUE_MINUS_ZERO (r))
5716 return 0;
5718 for (i = 0; i < fp_consts_inited; i++)
5719 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5720 return 1;
5722 return 0;
5725 /* Return TRUE if rtx X is a valid immediate FPA constant. */
5727 neg_const_double_rtx_ok_for_fpa (rtx x)
5729 REAL_VALUE_TYPE r;
5730 int i;
5732 if (!fp_consts_inited)
5733 init_fp_table ();
5735 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5736 r = REAL_VALUE_NEGATE (r);
5737 if (REAL_VALUE_MINUS_ZERO (r))
5738 return 0;
5740 for (i = 0; i < 8; i++)
5741 if (REAL_VALUES_EQUAL (r, values_fp[i]))
5742 return 1;
5744 return 0;
5748 /* VFPv3 has a fairly wide range of representable immediates, formed from
5749 "quarter-precision" floating-point values. These can be evaluated using this
5750 formula (with ^ for exponentiation):
5752 -1^s * n * 2^-r
5754 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
5755 16 <= n <= 31 and 0 <= r <= 7.
5757 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
5759 - A (most-significant) is the sign bit.
5760 - BCD are the exponent (encoded as r XOR 3).
5761 - EFGH are the mantissa (encoded as n - 16).
5764 /* Return an integer index for a VFPv3 immediate operand X suitable for the
5765 fconst[sd] instruction, or -1 if X isn't suitable. */
5766 static int
5767 vfp3_const_double_index (rtx x)
5769 REAL_VALUE_TYPE r, m;
5770 int sign, exponent;
5771 unsigned HOST_WIDE_INT mantissa, mant_hi;
5772 unsigned HOST_WIDE_INT mask;
5773 HOST_WIDE_INT m1, m2;
5774 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
5776 if (!TARGET_VFP3 || GET_CODE (x) != CONST_DOUBLE)
5777 return -1;
5779 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5781 /* We can't represent these things, so detect them first. */
5782 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
5783 return -1;
5785 /* Extract sign, exponent and mantissa. */
5786 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
5787 r = REAL_VALUE_ABS (r);
5788 exponent = REAL_EXP (&r);
5789 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
5790 highest (sign) bit, with a fixed binary point at bit point_pos.
5791 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
5792 bits for the mantissa, this may fail (low bits would be lost). */
5793 real_ldexp (&m, &r, point_pos - exponent);
5794 REAL_VALUE_TO_INT (&m1, &m2, m);
5795 mantissa = m1;
5796 mant_hi = m2;
5798 /* If there are bits set in the low part of the mantissa, we can't
5799 represent this value. */
5800 if (mantissa != 0)
5801 return -1;
5803 /* Now make it so that mantissa contains the most-significant bits, and move
5804 the point_pos to indicate that the least-significant bits have been
5805 discarded. */
5806 point_pos -= HOST_BITS_PER_WIDE_INT;
5807 mantissa = mant_hi;
5809 /* We can permit four significant bits of mantissa only, plus a high bit
5810 which is always 1. */
5811 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
5812 if ((mantissa & mask) != 0)
5813 return -1;
5815 /* Now we know the mantissa is in range, chop off the unneeded bits. */
5816 mantissa >>= point_pos - 5;
5818 /* The mantissa may be zero. Disallow that case. (It's possible to load the
5819 floating-point immediate zero with Neon using an integer-zero load, but
5820 that case is handled elsewhere.) */
5821 if (mantissa == 0)
5822 return -1;
5824 gcc_assert (mantissa >= 16 && mantissa <= 31);
5826 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
5827 normalized significands are in the range [1, 2). (Our mantissa is shifted
5828 left 4 places at this point relative to normalized IEEE754 values). GCC
5829 internally uses [0.5, 1) (see real.c), so the exponent returned from
5830 REAL_EXP must be altered. */
5831 exponent = 5 - exponent;
5833 if (exponent < 0 || exponent > 7)
5834 return -1;
5836 /* Sign, mantissa and exponent are now in the correct form to plug into the
5837 formula described in the comment above. */
5838 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
5841 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
5843 vfp3_const_double_rtx (rtx x)
5845 if (!TARGET_VFP3)
5846 return 0;
5848 return vfp3_const_double_index (x) != -1;
5851 /* Recognize immediates which can be used in various Neon instructions. Legal
5852 immediates are described by the following table (for VMVN variants, the
5853 bitwise inverse of the constant shown is recognized. In either case, VMOV
5854 is output and the correct instruction to use for a given constant is chosen
5855 by the assembler). The constant shown is replicated across all elements of
5856 the destination vector.
5858 insn elems variant constant (binary)
5859 ---- ----- ------- -----------------
5860 vmov i32 0 00000000 00000000 00000000 abcdefgh
5861 vmov i32 1 00000000 00000000 abcdefgh 00000000
5862 vmov i32 2 00000000 abcdefgh 00000000 00000000
5863 vmov i32 3 abcdefgh 00000000 00000000 00000000
5864 vmov i16 4 00000000 abcdefgh
5865 vmov i16 5 abcdefgh 00000000
5866 vmvn i32 6 00000000 00000000 00000000 abcdefgh
5867 vmvn i32 7 00000000 00000000 abcdefgh 00000000
5868 vmvn i32 8 00000000 abcdefgh 00000000 00000000
5869 vmvn i32 9 abcdefgh 00000000 00000000 00000000
5870 vmvn i16 10 00000000 abcdefgh
5871 vmvn i16 11 abcdefgh 00000000
5872 vmov i32 12 00000000 00000000 abcdefgh 11111111
5873 vmvn i32 13 00000000 00000000 abcdefgh 11111111
5874 vmov i32 14 00000000 abcdefgh 11111111 11111111
5875 vmvn i32 15 00000000 abcdefgh 11111111 11111111
5876 vmov i8 16 abcdefgh
5877 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
5878 eeeeeeee ffffffff gggggggg hhhhhhhh
5879 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
5881 For case 18, B = !b. Representable values are exactly those accepted by
5882 vfp3_const_double_index, but are output as floating-point numbers rather
5883 than indices.
5885 Variants 0-5 (inclusive) may also be used as immediates for the second
5886 operand of VORR/VBIC instructions.
5888 The INVERSE argument causes the bitwise inverse of the given operand to be
5889 recognized instead (used for recognizing legal immediates for the VAND/VORN
5890 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
5891 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
5892 output, rather than the real insns vbic/vorr).
5894 INVERSE makes no difference to the recognition of float vectors.
5896 The return value is the variant of immediate as shown in the above table, or
5897 -1 if the given value doesn't match any of the listed patterns.
5899 static int
5900 neon_valid_immediate (rtx op, enum machine_mode mode, int inverse,
5901 rtx *modconst, int *elementwidth)
5903 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
5904 matches = 1; \
5905 for (i = 0; i < idx; i += (STRIDE)) \
5906 if (!(TEST)) \
5907 matches = 0; \
5908 if (matches) \
5910 immtype = (CLASS); \
5911 elsize = (ELSIZE); \
5912 break; \
5915 unsigned int i, elsize, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
5916 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
5917 unsigned char bytes[16];
5918 int immtype = -1, matches;
5919 unsigned int invmask = inverse ? 0xff : 0;
5921 /* Vectors of float constants. */
5922 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5924 rtx el0 = CONST_VECTOR_ELT (op, 0);
5925 REAL_VALUE_TYPE r0;
5927 if (!vfp3_const_double_rtx (el0))
5928 return -1;
5930 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
5932 for (i = 1; i < n_elts; i++)
5934 rtx elt = CONST_VECTOR_ELT (op, i);
5935 REAL_VALUE_TYPE re;
5937 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
5939 if (!REAL_VALUES_EQUAL (r0, re))
5940 return -1;
5943 if (modconst)
5944 *modconst = CONST_VECTOR_ELT (op, 0);
5946 if (elementwidth)
5947 *elementwidth = 0;
5949 return 18;
5952 /* Splat vector constant out into a byte vector. */
5953 for (i = 0; i < n_elts; i++)
5955 rtx el = CONST_VECTOR_ELT (op, i);
5956 unsigned HOST_WIDE_INT elpart;
5957 unsigned int part, parts;
5959 if (GET_CODE (el) == CONST_INT)
5961 elpart = INTVAL (el);
5962 parts = 1;
5964 else if (GET_CODE (el) == CONST_DOUBLE)
5966 elpart = CONST_DOUBLE_LOW (el);
5967 parts = 2;
5969 else
5970 gcc_unreachable ();
5972 for (part = 0; part < parts; part++)
5974 unsigned int byte;
5975 for (byte = 0; byte < innersize; byte++)
5977 bytes[idx++] = (elpart & 0xff) ^ invmask;
5978 elpart >>= BITS_PER_UNIT;
5980 if (GET_CODE (el) == CONST_DOUBLE)
5981 elpart = CONST_DOUBLE_HIGH (el);
5985 /* Sanity check. */
5986 gcc_assert (idx == GET_MODE_SIZE (mode));
5990 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
5991 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
5993 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
5994 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
5996 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
5997 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
5999 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6000 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
6002 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
6004 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
6006 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6007 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6009 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6010 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6012 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6013 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6015 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6016 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
6018 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
6020 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
6022 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6023 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
6025 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6026 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
6028 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6029 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
6031 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6032 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
6034 CHECK (1, 8, 16, bytes[i] == bytes[0]);
6036 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6037 && bytes[i] == bytes[(i + 8) % idx]);
6039 while (0);
6041 if (immtype == -1)
6042 return -1;
6044 if (elementwidth)
6045 *elementwidth = elsize;
6047 if (modconst)
6049 unsigned HOST_WIDE_INT imm = 0;
6051 /* Un-invert bytes of recognized vector, if necessary. */
6052 if (invmask != 0)
6053 for (i = 0; i < idx; i++)
6054 bytes[i] ^= invmask;
6056 if (immtype == 17)
6058 /* FIXME: Broken on 32-bit H_W_I hosts. */
6059 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6061 for (i = 0; i < 8; i++)
6062 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6063 << (i * BITS_PER_UNIT);
6065 *modconst = GEN_INT (imm);
6067 else
6069 unsigned HOST_WIDE_INT imm = 0;
6071 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6072 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6074 *modconst = GEN_INT (imm);
6078 return immtype;
6079 #undef CHECK
6082 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
6083 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
6084 float elements), and a modified constant (whatever should be output for a
6085 VMOV) in *MODCONST. */
6088 neon_immediate_valid_for_move (rtx op, enum machine_mode mode,
6089 rtx *modconst, int *elementwidth)
6091 rtx tmpconst;
6092 int tmpwidth;
6093 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
6095 if (retval == -1)
6096 return 0;
6098 if (modconst)
6099 *modconst = tmpconst;
6101 if (elementwidth)
6102 *elementwidth = tmpwidth;
6104 return 1;
6107 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
6108 the immediate is valid, write a constant suitable for using as an operand
6109 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
6110 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
6113 neon_immediate_valid_for_logic (rtx op, enum machine_mode mode, int inverse,
6114 rtx *modconst, int *elementwidth)
6116 rtx tmpconst;
6117 int tmpwidth;
6118 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
6120 if (retval < 0 || retval > 5)
6121 return 0;
6123 if (modconst)
6124 *modconst = tmpconst;
6126 if (elementwidth)
6127 *elementwidth = tmpwidth;
6129 return 1;
6132 /* Return a string suitable for output of Neon immediate logic operation
6133 MNEM. */
6135 char *
6136 neon_output_logic_immediate (const char *mnem, rtx *op2, enum machine_mode mode,
6137 int inverse, int quad)
6139 int width, is_valid;
6140 static char templ[40];
6142 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
6144 gcc_assert (is_valid != 0);
6146 if (quad)
6147 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
6148 else
6149 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
6151 return templ;
6154 /* Output a sequence of pairwise operations to implement a reduction.
6155 NOTE: We do "too much work" here, because pairwise operations work on two
6156 registers-worth of operands in one go. Unfortunately we can't exploit those
6157 extra calculations to do the full operation in fewer steps, I don't think.
6158 Although all vector elements of the result but the first are ignored, we
6159 actually calculate the same result in each of the elements. An alternative
6160 such as initially loading a vector with zero to use as each of the second
6161 operands would use up an additional register and take an extra instruction,
6162 for no particular gain. */
6164 void
6165 neon_pairwise_reduce (rtx op0, rtx op1, enum machine_mode mode,
6166 rtx (*reduc) (rtx, rtx, rtx))
6168 enum machine_mode inner = GET_MODE_INNER (mode);
6169 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
6170 rtx tmpsum = op1;
6172 for (i = parts / 2; i >= 1; i /= 2)
6174 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
6175 emit_insn (reduc (dest, tmpsum, tmpsum));
6176 tmpsum = dest;
6180 /* Initialize a vector with non-constant elements. FIXME: We can do better
6181 than the current implementation (building a vector on the stack and then
6182 loading it) in many cases. See rs6000.c. */
6184 void
6185 neon_expand_vector_init (rtx target, rtx vals)
6187 enum machine_mode mode = GET_MODE (target);
6188 enum machine_mode inner = GET_MODE_INNER (mode);
6189 unsigned int i, n_elts = GET_MODE_NUNITS (mode);
6190 rtx mem;
6192 gcc_assert (VECTOR_MODE_P (mode));
6194 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
6195 for (i = 0; i < n_elts; i++)
6196 emit_move_insn (adjust_address_nv (mem, inner, i * GET_MODE_SIZE (inner)),
6197 XVECEXP (vals, 0, i));
6199 emit_move_insn (target, mem);
6202 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
6203 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
6204 reported source locations are bogus. */
6206 static void
6207 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
6208 const char *err)
6210 HOST_WIDE_INT lane;
6212 gcc_assert (GET_CODE (operand) == CONST_INT);
6214 lane = INTVAL (operand);
6216 if (lane < low || lane >= high)
6217 error (err);
6220 /* Bounds-check lanes. */
6222 void
6223 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6225 bounds_check (operand, low, high, "lane out of range");
6228 /* Bounds-check constants. */
6230 void
6231 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6233 bounds_check (operand, low, high, "constant out of range");
6236 HOST_WIDE_INT
6237 neon_element_bits (enum machine_mode mode)
6239 if (mode == DImode)
6240 return GET_MODE_BITSIZE (mode);
6241 else
6242 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
6246 /* Predicates for `match_operand' and `match_operator'. */
6248 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
6250 cirrus_memory_offset (rtx op)
6252 /* Reject eliminable registers. */
6253 if (! (reload_in_progress || reload_completed)
6254 && ( reg_mentioned_p (frame_pointer_rtx, op)
6255 || reg_mentioned_p (arg_pointer_rtx, op)
6256 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6257 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6258 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6259 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6260 return 0;
6262 if (GET_CODE (op) == MEM)
6264 rtx ind;
6266 ind = XEXP (op, 0);
6268 /* Match: (mem (reg)). */
6269 if (GET_CODE (ind) == REG)
6270 return 1;
6272 /* Match:
6273 (mem (plus (reg)
6274 (const))). */
6275 if (GET_CODE (ind) == PLUS
6276 && GET_CODE (XEXP (ind, 0)) == REG
6277 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6278 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
6279 return 1;
6282 return 0;
6285 /* Return TRUE if OP is a valid coprocessor memory address pattern.
6286 WB is true if full writeback address modes are allowed and is false
6287 if limited writeback address modes (POST_INC and PRE_DEC) are
6288 allowed. */
6291 arm_coproc_mem_operand (rtx op, bool wb)
6293 rtx ind;
6295 /* Reject eliminable registers. */
6296 if (! (reload_in_progress || reload_completed)
6297 && ( reg_mentioned_p (frame_pointer_rtx, op)
6298 || reg_mentioned_p (arg_pointer_rtx, op)
6299 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6300 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6301 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6302 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6303 return FALSE;
6305 /* Constants are converted into offsets from labels. */
6306 if (GET_CODE (op) != MEM)
6307 return FALSE;
6309 ind = XEXP (op, 0);
6311 if (reload_completed
6312 && (GET_CODE (ind) == LABEL_REF
6313 || (GET_CODE (ind) == CONST
6314 && GET_CODE (XEXP (ind, 0)) == PLUS
6315 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6316 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6317 return TRUE;
6319 /* Match: (mem (reg)). */
6320 if (GET_CODE (ind) == REG)
6321 return arm_address_register_rtx_p (ind, 0);
6323 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
6324 acceptable in any case (subject to verification by
6325 arm_address_register_rtx_p). We need WB to be true to accept
6326 PRE_INC and POST_DEC. */
6327 if (GET_CODE (ind) == POST_INC
6328 || GET_CODE (ind) == PRE_DEC
6329 || (wb
6330 && (GET_CODE (ind) == PRE_INC
6331 || GET_CODE (ind) == POST_DEC)))
6332 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6334 if (wb
6335 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
6336 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6337 && GET_CODE (XEXP (ind, 1)) == PLUS
6338 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6339 ind = XEXP (ind, 1);
6341 /* Match:
6342 (plus (reg)
6343 (const)). */
6344 if (GET_CODE (ind) == PLUS
6345 && GET_CODE (XEXP (ind, 0)) == REG
6346 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6347 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6348 && INTVAL (XEXP (ind, 1)) > -1024
6349 && INTVAL (XEXP (ind, 1)) < 1024
6350 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6351 return TRUE;
6353 return FALSE;
6356 /* Return TRUE if OP is a memory operand which we can load or store a vector
6357 to/from. If CORE is true, we're moving from ARM registers not Neon
6358 registers. */
6360 neon_vector_mem_operand (rtx op, bool core)
6362 rtx ind;
6364 /* Reject eliminable registers. */
6365 if (! (reload_in_progress || reload_completed)
6366 && ( reg_mentioned_p (frame_pointer_rtx, op)
6367 || reg_mentioned_p (arg_pointer_rtx, op)
6368 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6369 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6370 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6371 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6372 return FALSE;
6374 /* Constants are converted into offsets from labels. */
6375 if (GET_CODE (op) != MEM)
6376 return FALSE;
6378 ind = XEXP (op, 0);
6380 if (reload_completed
6381 && (GET_CODE (ind) == LABEL_REF
6382 || (GET_CODE (ind) == CONST
6383 && GET_CODE (XEXP (ind, 0)) == PLUS
6384 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6385 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6386 return TRUE;
6388 /* Match: (mem (reg)). */
6389 if (GET_CODE (ind) == REG)
6390 return arm_address_register_rtx_p (ind, 0);
6392 /* Allow post-increment with Neon registers. */
6393 if (!core && GET_CODE (ind) == POST_INC)
6394 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
6396 #if 0
6397 /* FIXME: We can support this too if we use VLD1/VST1. */
6398 if (!core
6399 && GET_CODE (ind) == POST_MODIFY
6400 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
6401 && GET_CODE (XEXP (ind, 1)) == PLUS
6402 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
6403 ind = XEXP (ind, 1);
6404 #endif
6406 /* Match:
6407 (plus (reg)
6408 (const)). */
6409 if (!core
6410 && GET_CODE (ind) == PLUS
6411 && GET_CODE (XEXP (ind, 0)) == REG
6412 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
6413 && GET_CODE (XEXP (ind, 1)) == CONST_INT
6414 && INTVAL (XEXP (ind, 1)) > -1024
6415 && INTVAL (XEXP (ind, 1)) < 1016
6416 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
6417 return TRUE;
6419 return FALSE;
6422 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
6423 type. */
6425 neon_struct_mem_operand (rtx op)
6427 rtx ind;
6429 /* Reject eliminable registers. */
6430 if (! (reload_in_progress || reload_completed)
6431 && ( reg_mentioned_p (frame_pointer_rtx, op)
6432 || reg_mentioned_p (arg_pointer_rtx, op)
6433 || reg_mentioned_p (virtual_incoming_args_rtx, op)
6434 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
6435 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
6436 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
6437 return FALSE;
6439 /* Constants are converted into offsets from labels. */
6440 if (GET_CODE (op) != MEM)
6441 return FALSE;
6443 ind = XEXP (op, 0);
6445 if (reload_completed
6446 && (GET_CODE (ind) == LABEL_REF
6447 || (GET_CODE (ind) == CONST
6448 && GET_CODE (XEXP (ind, 0)) == PLUS
6449 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
6450 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
6451 return TRUE;
6453 /* Match: (mem (reg)). */
6454 if (GET_CODE (ind) == REG)
6455 return arm_address_register_rtx_p (ind, 0);
6457 return FALSE;
6460 /* Return true if X is a register that will be eliminated later on. */
6462 arm_eliminable_register (rtx x)
6464 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
6465 || REGNO (x) == ARG_POINTER_REGNUM
6466 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
6467 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
6470 /* Return GENERAL_REGS if a scratch register required to reload x to/from
6471 coprocessor registers. Otherwise return NO_REGS. */
6473 enum reg_class
6474 coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
6476 if (TARGET_NEON
6477 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6478 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6479 && neon_vector_mem_operand (x, FALSE))
6480 return NO_REGS;
6482 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
6483 return NO_REGS;
6485 return GENERAL_REGS;
6488 /* Values which must be returned in the most-significant end of the return
6489 register. */
6491 static bool
6492 arm_return_in_msb (const_tree valtype)
6494 return (TARGET_AAPCS_BASED
6495 && BYTES_BIG_ENDIAN
6496 && (AGGREGATE_TYPE_P (valtype)
6497 || TREE_CODE (valtype) == COMPLEX_TYPE));
6500 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
6501 Use by the Cirrus Maverick code which has to workaround
6502 a hardware bug triggered by such instructions. */
6503 static bool
6504 arm_memory_load_p (rtx insn)
6506 rtx body, lhs, rhs;;
6508 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
6509 return false;
6511 body = PATTERN (insn);
6513 if (GET_CODE (body) != SET)
6514 return false;
6516 lhs = XEXP (body, 0);
6517 rhs = XEXP (body, 1);
6519 lhs = REG_OR_SUBREG_RTX (lhs);
6521 /* If the destination is not a general purpose
6522 register we do not have to worry. */
6523 if (GET_CODE (lhs) != REG
6524 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
6525 return false;
6527 /* As well as loads from memory we also have to react
6528 to loads of invalid constants which will be turned
6529 into loads from the minipool. */
6530 return (GET_CODE (rhs) == MEM
6531 || GET_CODE (rhs) == SYMBOL_REF
6532 || note_invalid_constants (insn, -1, false));
6535 /* Return TRUE if INSN is a Cirrus instruction. */
6536 static bool
6537 arm_cirrus_insn_p (rtx insn)
6539 enum attr_cirrus attr;
6541 /* get_attr cannot accept USE or CLOBBER. */
6542 if (!insn
6543 || GET_CODE (insn) != INSN
6544 || GET_CODE (PATTERN (insn)) == USE
6545 || GET_CODE (PATTERN (insn)) == CLOBBER)
6546 return 0;
6548 attr = get_attr_cirrus (insn);
6550 return attr != CIRRUS_NOT;
6553 /* Cirrus reorg for invalid instruction combinations. */
6554 static void
6555 cirrus_reorg (rtx first)
6557 enum attr_cirrus attr;
6558 rtx body = PATTERN (first);
6559 rtx t;
6560 int nops;
6562 /* Any branch must be followed by 2 non Cirrus instructions. */
6563 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
6565 nops = 0;
6566 t = next_nonnote_insn (first);
6568 if (arm_cirrus_insn_p (t))
6569 ++ nops;
6571 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6572 ++ nops;
6574 while (nops --)
6575 emit_insn_after (gen_nop (), first);
6577 return;
6580 /* (float (blah)) is in parallel with a clobber. */
6581 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
6582 body = XVECEXP (body, 0, 0);
6584 if (GET_CODE (body) == SET)
6586 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
6588 /* cfldrd, cfldr64, cfstrd, cfstr64 must
6589 be followed by a non Cirrus insn. */
6590 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
6592 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
6593 emit_insn_after (gen_nop (), first);
6595 return;
6597 else if (arm_memory_load_p (first))
6599 unsigned int arm_regno;
6601 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
6602 ldr/cfmv64hr combination where the Rd field is the same
6603 in both instructions must be split with a non Cirrus
6604 insn. Example:
6606 ldr r0, blah
6608 cfmvsr mvf0, r0. */
6610 /* Get Arm register number for ldr insn. */
6611 if (GET_CODE (lhs) == REG)
6612 arm_regno = REGNO (lhs);
6613 else
6615 gcc_assert (GET_CODE (rhs) == REG);
6616 arm_regno = REGNO (rhs);
6619 /* Next insn. */
6620 first = next_nonnote_insn (first);
6622 if (! arm_cirrus_insn_p (first))
6623 return;
6625 body = PATTERN (first);
6627 /* (float (blah)) is in parallel with a clobber. */
6628 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
6629 body = XVECEXP (body, 0, 0);
6631 if (GET_CODE (body) == FLOAT)
6632 body = XEXP (body, 0);
6634 if (get_attr_cirrus (first) == CIRRUS_MOVE
6635 && GET_CODE (XEXP (body, 1)) == REG
6636 && arm_regno == REGNO (XEXP (body, 1)))
6637 emit_insn_after (gen_nop (), first);
6639 return;
6643 /* get_attr cannot accept USE or CLOBBER. */
6644 if (!first
6645 || GET_CODE (first) != INSN
6646 || GET_CODE (PATTERN (first)) == USE
6647 || GET_CODE (PATTERN (first)) == CLOBBER)
6648 return;
6650 attr = get_attr_cirrus (first);
6652 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
6653 must be followed by a non-coprocessor instruction. */
6654 if (attr == CIRRUS_COMPARE)
6656 nops = 0;
6658 t = next_nonnote_insn (first);
6660 if (arm_cirrus_insn_p (t))
6661 ++ nops;
6663 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
6664 ++ nops;
6666 while (nops --)
6667 emit_insn_after (gen_nop (), first);
6669 return;
6673 /* Return TRUE if X references a SYMBOL_REF. */
6675 symbol_mentioned_p (rtx x)
6677 const char * fmt;
6678 int i;
6680 if (GET_CODE (x) == SYMBOL_REF)
6681 return 1;
6683 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
6684 are constant offsets, not symbols. */
6685 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6686 return 0;
6688 fmt = GET_RTX_FORMAT (GET_CODE (x));
6690 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6692 if (fmt[i] == 'E')
6694 int j;
6696 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6697 if (symbol_mentioned_p (XVECEXP (x, i, j)))
6698 return 1;
6700 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
6701 return 1;
6704 return 0;
6707 /* Return TRUE if X references a LABEL_REF. */
6709 label_mentioned_p (rtx x)
6711 const char * fmt;
6712 int i;
6714 if (GET_CODE (x) == LABEL_REF)
6715 return 1;
6717 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
6718 instruction, but they are constant offsets, not symbols. */
6719 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
6720 return 0;
6722 fmt = GET_RTX_FORMAT (GET_CODE (x));
6723 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6725 if (fmt[i] == 'E')
6727 int j;
6729 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
6730 if (label_mentioned_p (XVECEXP (x, i, j)))
6731 return 1;
6733 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
6734 return 1;
6737 return 0;
6741 tls_mentioned_p (rtx x)
6743 switch (GET_CODE (x))
6745 case CONST:
6746 return tls_mentioned_p (XEXP (x, 0));
6748 case UNSPEC:
6749 if (XINT (x, 1) == UNSPEC_TLS)
6750 return 1;
6752 default:
6753 return 0;
6757 /* Must not copy a SET whose source operand is PC-relative. */
6759 static bool
6760 arm_cannot_copy_insn_p (rtx insn)
6762 rtx pat = PATTERN (insn);
6764 if (GET_CODE (pat) == SET)
6766 rtx rhs = SET_SRC (pat);
6768 if (GET_CODE (rhs) == UNSPEC
6769 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
6770 return TRUE;
6772 if (GET_CODE (rhs) == MEM
6773 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
6774 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
6775 return TRUE;
6778 return FALSE;
6781 enum rtx_code
6782 minmax_code (rtx x)
6784 enum rtx_code code = GET_CODE (x);
6786 switch (code)
6788 case SMAX:
6789 return GE;
6790 case SMIN:
6791 return LE;
6792 case UMIN:
6793 return LEU;
6794 case UMAX:
6795 return GEU;
6796 default:
6797 gcc_unreachable ();
6801 /* Return 1 if memory locations are adjacent. */
6803 adjacent_mem_locations (rtx a, rtx b)
6805 /* We don't guarantee to preserve the order of these memory refs. */
6806 if (volatile_refs_p (a) || volatile_refs_p (b))
6807 return 0;
6809 if ((GET_CODE (XEXP (a, 0)) == REG
6810 || (GET_CODE (XEXP (a, 0)) == PLUS
6811 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
6812 && (GET_CODE (XEXP (b, 0)) == REG
6813 || (GET_CODE (XEXP (b, 0)) == PLUS
6814 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
6816 HOST_WIDE_INT val0 = 0, val1 = 0;
6817 rtx reg0, reg1;
6818 int val_diff;
6820 if (GET_CODE (XEXP (a, 0)) == PLUS)
6822 reg0 = XEXP (XEXP (a, 0), 0);
6823 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
6825 else
6826 reg0 = XEXP (a, 0);
6828 if (GET_CODE (XEXP (b, 0)) == PLUS)
6830 reg1 = XEXP (XEXP (b, 0), 0);
6831 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
6833 else
6834 reg1 = XEXP (b, 0);
6836 /* Don't accept any offset that will require multiple
6837 instructions to handle, since this would cause the
6838 arith_adjacentmem pattern to output an overlong sequence. */
6839 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
6840 return 0;
6842 /* Don't allow an eliminable register: register elimination can make
6843 the offset too large. */
6844 if (arm_eliminable_register (reg0))
6845 return 0;
6847 val_diff = val1 - val0;
6849 if (arm_ld_sched)
6851 /* If the target has load delay slots, then there's no benefit
6852 to using an ldm instruction unless the offset is zero and
6853 we are optimizing for size. */
6854 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
6855 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
6856 && (val_diff == 4 || val_diff == -4));
6859 return ((REGNO (reg0) == REGNO (reg1))
6860 && (val_diff == 4 || val_diff == -4));
6863 return 0;
6867 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
6868 HOST_WIDE_INT *load_offset)
6870 int unsorted_regs[4];
6871 HOST_WIDE_INT unsorted_offsets[4];
6872 int order[4];
6873 int base_reg = -1;
6874 int i;
6876 /* Can only handle 2, 3, or 4 insns at present,
6877 though could be easily extended if required. */
6878 gcc_assert (nops >= 2 && nops <= 4);
6880 /* Loop over the operands and check that the memory references are
6881 suitable (i.e. immediate offsets from the same base register). At
6882 the same time, extract the target register, and the memory
6883 offsets. */
6884 for (i = 0; i < nops; i++)
6886 rtx reg;
6887 rtx offset;
6889 /* Convert a subreg of a mem into the mem itself. */
6890 if (GET_CODE (operands[nops + i]) == SUBREG)
6891 operands[nops + i] = alter_subreg (operands + (nops + i));
6893 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
6895 /* Don't reorder volatile memory references; it doesn't seem worth
6896 looking for the case where the order is ok anyway. */
6897 if (MEM_VOLATILE_P (operands[nops + i]))
6898 return 0;
6900 offset = const0_rtx;
6902 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
6903 || (GET_CODE (reg) == SUBREG
6904 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6905 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
6906 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
6907 == REG)
6908 || (GET_CODE (reg) == SUBREG
6909 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
6910 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
6911 == CONST_INT)))
6913 if (i == 0)
6915 base_reg = REGNO (reg);
6916 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
6917 ? REGNO (operands[i])
6918 : REGNO (SUBREG_REG (operands[i])));
6919 order[0] = 0;
6921 else
6923 if (base_reg != (int) REGNO (reg))
6924 /* Not addressed from the same base register. */
6925 return 0;
6927 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
6928 ? REGNO (operands[i])
6929 : REGNO (SUBREG_REG (operands[i])));
6930 if (unsorted_regs[i] < unsorted_regs[order[0]])
6931 order[0] = i;
6934 /* If it isn't an integer register, or if it overwrites the
6935 base register but isn't the last insn in the list, then
6936 we can't do this. */
6937 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
6938 || (i != nops - 1 && unsorted_regs[i] == base_reg))
6939 return 0;
6941 unsorted_offsets[i] = INTVAL (offset);
6943 else
6944 /* Not a suitable memory address. */
6945 return 0;
6948 /* All the useful information has now been extracted from the
6949 operands into unsorted_regs and unsorted_offsets; additionally,
6950 order[0] has been set to the lowest numbered register in the
6951 list. Sort the registers into order, and check that the memory
6952 offsets are ascending and adjacent. */
6954 for (i = 1; i < nops; i++)
6956 int j;
6958 order[i] = order[i - 1];
6959 for (j = 0; j < nops; j++)
6960 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
6961 && (order[i] == order[i - 1]
6962 || unsorted_regs[j] < unsorted_regs[order[i]]))
6963 order[i] = j;
6965 /* Have we found a suitable register? if not, one must be used more
6966 than once. */
6967 if (order[i] == order[i - 1])
6968 return 0;
6970 /* Is the memory address adjacent and ascending? */
6971 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
6972 return 0;
6975 if (base)
6977 *base = base_reg;
6979 for (i = 0; i < nops; i++)
6980 regs[i] = unsorted_regs[order[i]];
6982 *load_offset = unsorted_offsets[order[0]];
6985 if (unsorted_offsets[order[0]] == 0)
6986 return 1; /* ldmia */
6988 if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
6989 return 2; /* ldmib */
6991 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
6992 return 3; /* ldmda */
6994 if (unsorted_offsets[order[nops - 1]] == -4)
6995 return 4; /* ldmdb */
6997 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
6998 if the offset isn't small enough. The reason 2 ldrs are faster
6999 is because these ARMs are able to do more than one cache access
7000 in a single cycle. The ARM9 and StrongARM have Harvard caches,
7001 whilst the ARM8 has a double bandwidth cache. This means that
7002 these cores can do both an instruction fetch and a data fetch in
7003 a single cycle, so the trick of calculating the address into a
7004 scratch register (one of the result regs) and then doing a load
7005 multiple actually becomes slower (and no smaller in code size).
7006 That is the transformation
7008 ldr rd1, [rbase + offset]
7009 ldr rd2, [rbase + offset + 4]
7013 add rd1, rbase, offset
7014 ldmia rd1, {rd1, rd2}
7016 produces worse code -- '3 cycles + any stalls on rd2' instead of
7017 '2 cycles + any stalls on rd2'. On ARMs with only one cache
7018 access per cycle, the first sequence could never complete in less
7019 than 6 cycles, whereas the ldm sequence would only take 5 and
7020 would make better use of sequential accesses if not hitting the
7021 cache.
7023 We cheat here and test 'arm_ld_sched' which we currently know to
7024 only be true for the ARM8, ARM9 and StrongARM. If this ever
7025 changes, then the test below needs to be reworked. */
7026 if (nops == 2 && arm_ld_sched)
7027 return 0;
7029 /* Can't do it without setting up the offset, only do this if it takes
7030 no more than one insn. */
7031 return (const_ok_for_arm (unsorted_offsets[order[0]])
7032 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
7035 const char *
7036 emit_ldm_seq (rtx *operands, int nops)
7038 int regs[4];
7039 int base_reg;
7040 HOST_WIDE_INT offset;
7041 char buf[100];
7042 int i;
7044 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7046 case 1:
7047 strcpy (buf, "ldm%(ia%)\t");
7048 break;
7050 case 2:
7051 strcpy (buf, "ldm%(ib%)\t");
7052 break;
7054 case 3:
7055 strcpy (buf, "ldm%(da%)\t");
7056 break;
7058 case 4:
7059 strcpy (buf, "ldm%(db%)\t");
7060 break;
7062 case 5:
7063 if (offset >= 0)
7064 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7065 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7066 (long) offset);
7067 else
7068 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
7069 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
7070 (long) -offset);
7071 output_asm_insn (buf, operands);
7072 base_reg = regs[0];
7073 strcpy (buf, "ldm%(ia%)\t");
7074 break;
7076 default:
7077 gcc_unreachable ();
7080 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7081 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7083 for (i = 1; i < nops; i++)
7084 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7085 reg_names[regs[i]]);
7087 strcat (buf, "}\t%@ phole ldm");
7089 output_asm_insn (buf, operands);
7090 return "";
7094 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
7095 HOST_WIDE_INT * load_offset)
7097 int unsorted_regs[4];
7098 HOST_WIDE_INT unsorted_offsets[4];
7099 int order[4];
7100 int base_reg = -1;
7101 int i;
7103 /* Can only handle 2, 3, or 4 insns at present, though could be easily
7104 extended if required. */
7105 gcc_assert (nops >= 2 && nops <= 4);
7107 /* Loop over the operands and check that the memory references are
7108 suitable (i.e. immediate offsets from the same base register). At
7109 the same time, extract the target register, and the memory
7110 offsets. */
7111 for (i = 0; i < nops; i++)
7113 rtx reg;
7114 rtx offset;
7116 /* Convert a subreg of a mem into the mem itself. */
7117 if (GET_CODE (operands[nops + i]) == SUBREG)
7118 operands[nops + i] = alter_subreg (operands + (nops + i));
7120 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
7122 /* Don't reorder volatile memory references; it doesn't seem worth
7123 looking for the case where the order is ok anyway. */
7124 if (MEM_VOLATILE_P (operands[nops + i]))
7125 return 0;
7127 offset = const0_rtx;
7129 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
7130 || (GET_CODE (reg) == SUBREG
7131 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7132 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
7133 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
7134 == REG)
7135 || (GET_CODE (reg) == SUBREG
7136 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
7137 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
7138 == CONST_INT)))
7140 if (i == 0)
7142 base_reg = REGNO (reg);
7143 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
7144 ? REGNO (operands[i])
7145 : REGNO (SUBREG_REG (operands[i])));
7146 order[0] = 0;
7148 else
7150 if (base_reg != (int) REGNO (reg))
7151 /* Not addressed from the same base register. */
7152 return 0;
7154 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
7155 ? REGNO (operands[i])
7156 : REGNO (SUBREG_REG (operands[i])));
7157 if (unsorted_regs[i] < unsorted_regs[order[0]])
7158 order[0] = i;
7161 /* If it isn't an integer register, then we can't do this. */
7162 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
7163 return 0;
7165 unsorted_offsets[i] = INTVAL (offset);
7167 else
7168 /* Not a suitable memory address. */
7169 return 0;
7172 /* All the useful information has now been extracted from the
7173 operands into unsorted_regs and unsorted_offsets; additionally,
7174 order[0] has been set to the lowest numbered register in the
7175 list. Sort the registers into order, and check that the memory
7176 offsets are ascending and adjacent. */
7178 for (i = 1; i < nops; i++)
7180 int j;
7182 order[i] = order[i - 1];
7183 for (j = 0; j < nops; j++)
7184 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
7185 && (order[i] == order[i - 1]
7186 || unsorted_regs[j] < unsorted_regs[order[i]]))
7187 order[i] = j;
7189 /* Have we found a suitable register? if not, one must be used more
7190 than once. */
7191 if (order[i] == order[i - 1])
7192 return 0;
7194 /* Is the memory address adjacent and ascending? */
7195 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
7196 return 0;
7199 if (base)
7201 *base = base_reg;
7203 for (i = 0; i < nops; i++)
7204 regs[i] = unsorted_regs[order[i]];
7206 *load_offset = unsorted_offsets[order[0]];
7209 if (unsorted_offsets[order[0]] == 0)
7210 return 1; /* stmia */
7212 if (unsorted_offsets[order[0]] == 4)
7213 return 2; /* stmib */
7215 if (unsorted_offsets[order[nops - 1]] == 0)
7216 return 3; /* stmda */
7218 if (unsorted_offsets[order[nops - 1]] == -4)
7219 return 4; /* stmdb */
7221 return 0;
7224 const char *
7225 emit_stm_seq (rtx *operands, int nops)
7227 int regs[4];
7228 int base_reg;
7229 HOST_WIDE_INT offset;
7230 char buf[100];
7231 int i;
7233 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
7235 case 1:
7236 strcpy (buf, "stm%(ia%)\t");
7237 break;
7239 case 2:
7240 strcpy (buf, "stm%(ib%)\t");
7241 break;
7243 case 3:
7244 strcpy (buf, "stm%(da%)\t");
7245 break;
7247 case 4:
7248 strcpy (buf, "stm%(db%)\t");
7249 break;
7251 default:
7252 gcc_unreachable ();
7255 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
7256 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
7258 for (i = 1; i < nops; i++)
7259 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
7260 reg_names[regs[i]]);
7262 strcat (buf, "}\t%@ phole stm");
7264 output_asm_insn (buf, operands);
7265 return "";
7268 /* Routines for use in generating RTL. */
7271 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
7272 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7274 HOST_WIDE_INT offset = *offsetp;
7275 int i = 0, j;
7276 rtx result;
7277 int sign = up ? 1 : -1;
7278 rtx mem, addr;
7280 /* XScale has load-store double instructions, but they have stricter
7281 alignment requirements than load-store multiple, so we cannot
7282 use them.
7284 For XScale ldm requires 2 + NREGS cycles to complete and blocks
7285 the pipeline until completion.
7287 NREGS CYCLES
7293 An ldr instruction takes 1-3 cycles, but does not block the
7294 pipeline.
7296 NREGS CYCLES
7297 1 1-3
7298 2 2-6
7299 3 3-9
7300 4 4-12
7302 Best case ldr will always win. However, the more ldr instructions
7303 we issue, the less likely we are to be able to schedule them well.
7304 Using ldr instructions also increases code size.
7306 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
7307 for counts of 3 or 4 regs. */
7308 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7310 rtx seq;
7312 start_sequence ();
7314 for (i = 0; i < count; i++)
7316 addr = plus_constant (from, i * 4 * sign);
7317 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7318 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
7319 offset += 4 * sign;
7322 if (write_back)
7324 emit_move_insn (from, plus_constant (from, count * 4 * sign));
7325 *offsetp = offset;
7328 seq = get_insns ();
7329 end_sequence ();
7331 return seq;
7334 result = gen_rtx_PARALLEL (VOIDmode,
7335 rtvec_alloc (count + (write_back ? 1 : 0)));
7336 if (write_back)
7338 XVECEXP (result, 0, 0)
7339 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign));
7340 i = 1;
7341 count++;
7344 for (j = 0; i < count; i++, j++)
7346 addr = plus_constant (from, j * 4 * sign);
7347 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7348 XVECEXP (result, 0, i)
7349 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
7350 offset += 4 * sign;
7353 if (write_back)
7354 *offsetp = offset;
7356 return result;
7360 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
7361 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
7363 HOST_WIDE_INT offset = *offsetp;
7364 int i = 0, j;
7365 rtx result;
7366 int sign = up ? 1 : -1;
7367 rtx mem, addr;
7369 /* See arm_gen_load_multiple for discussion of
7370 the pros/cons of ldm/stm usage for XScale. */
7371 if (arm_tune_xscale && count <= 2 && ! optimize_size)
7373 rtx seq;
7375 start_sequence ();
7377 for (i = 0; i < count; i++)
7379 addr = plus_constant (to, i * 4 * sign);
7380 mem = adjust_automodify_address (basemem, SImode, addr, offset);
7381 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
7382 offset += 4 * sign;
7385 if (write_back)
7387 emit_move_insn (to, plus_constant (to, count * 4 * sign));
7388 *offsetp = offset;
7391 seq = get_insns ();
7392 end_sequence ();
7394 return seq;
7397 result = gen_rtx_PARALLEL (VOIDmode,
7398 rtvec_alloc (count + (write_back ? 1 : 0)));
7399 if (write_back)
7401 XVECEXP (result, 0, 0)
7402 = gen_rtx_SET (VOIDmode, to,
7403 plus_constant (to, count * 4 * sign));
7404 i = 1;
7405 count++;
7408 for (j = 0; i < count; i++, j++)
7410 addr = plus_constant (to, j * 4 * sign);
7411 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
7412 XVECEXP (result, 0, i)
7413 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
7414 offset += 4 * sign;
7417 if (write_back)
7418 *offsetp = offset;
7420 return result;
7424 arm_gen_movmemqi (rtx *operands)
7426 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
7427 HOST_WIDE_INT srcoffset, dstoffset;
7428 int i;
7429 rtx src, dst, srcbase, dstbase;
7430 rtx part_bytes_reg = NULL;
7431 rtx mem;
7433 if (GET_CODE (operands[2]) != CONST_INT
7434 || GET_CODE (operands[3]) != CONST_INT
7435 || INTVAL (operands[2]) > 64
7436 || INTVAL (operands[3]) & 3)
7437 return 0;
7439 dstbase = operands[0];
7440 srcbase = operands[1];
7442 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
7443 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
7445 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
7446 out_words_to_go = INTVAL (operands[2]) / 4;
7447 last_bytes = INTVAL (operands[2]) & 3;
7448 dstoffset = srcoffset = 0;
7450 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
7451 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
7453 for (i = 0; in_words_to_go >= 2; i+=4)
7455 if (in_words_to_go > 4)
7456 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
7457 srcbase, &srcoffset));
7458 else
7459 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
7460 FALSE, srcbase, &srcoffset));
7462 if (out_words_to_go)
7464 if (out_words_to_go > 4)
7465 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
7466 dstbase, &dstoffset));
7467 else if (out_words_to_go != 1)
7468 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
7469 dst, TRUE,
7470 (last_bytes == 0
7471 ? FALSE : TRUE),
7472 dstbase, &dstoffset));
7473 else
7475 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7476 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
7477 if (last_bytes != 0)
7479 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
7480 dstoffset += 4;
7485 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
7486 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
7489 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
7490 if (out_words_to_go)
7492 rtx sreg;
7494 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7495 sreg = copy_to_reg (mem);
7497 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
7498 emit_move_insn (mem, sreg);
7499 in_words_to_go--;
7501 gcc_assert (!in_words_to_go); /* Sanity check */
7504 if (in_words_to_go)
7506 gcc_assert (in_words_to_go > 0);
7508 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
7509 part_bytes_reg = copy_to_mode_reg (SImode, mem);
7512 gcc_assert (!last_bytes || part_bytes_reg);
7514 if (BYTES_BIG_ENDIAN && last_bytes)
7516 rtx tmp = gen_reg_rtx (SImode);
7518 /* The bytes we want are in the top end of the word. */
7519 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
7520 GEN_INT (8 * (4 - last_bytes))));
7521 part_bytes_reg = tmp;
7523 while (last_bytes)
7525 mem = adjust_automodify_address (dstbase, QImode,
7526 plus_constant (dst, last_bytes - 1),
7527 dstoffset + last_bytes - 1);
7528 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7530 if (--last_bytes)
7532 tmp = gen_reg_rtx (SImode);
7533 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
7534 part_bytes_reg = tmp;
7539 else
7541 if (last_bytes > 1)
7543 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
7544 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
7545 last_bytes -= 2;
7546 if (last_bytes)
7548 rtx tmp = gen_reg_rtx (SImode);
7549 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
7550 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
7551 part_bytes_reg = tmp;
7552 dstoffset += 2;
7556 if (last_bytes)
7558 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
7559 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
7563 return 1;
7566 /* Select a dominance comparison mode if possible for a test of the general
7567 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
7568 COND_OR == DOM_CC_X_AND_Y => (X && Y)
7569 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
7570 COND_OR == DOM_CC_X_OR_Y => (X || Y)
7571 In all cases OP will be either EQ or NE, but we don't need to know which
7572 here. If we are unable to support a dominance comparison we return
7573 CC mode. This will then fail to match for the RTL expressions that
7574 generate this call. */
7575 enum machine_mode
7576 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
7578 enum rtx_code cond1, cond2;
7579 int swapped = 0;
7581 /* Currently we will probably get the wrong result if the individual
7582 comparisons are not simple. This also ensures that it is safe to
7583 reverse a comparison if necessary. */
7584 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
7585 != CCmode)
7586 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
7587 != CCmode))
7588 return CCmode;
7590 /* The if_then_else variant of this tests the second condition if the
7591 first passes, but is true if the first fails. Reverse the first
7592 condition to get a true "inclusive-or" expression. */
7593 if (cond_or == DOM_CC_NX_OR_Y)
7594 cond1 = reverse_condition (cond1);
7596 /* If the comparisons are not equal, and one doesn't dominate the other,
7597 then we can't do this. */
7598 if (cond1 != cond2
7599 && !comparison_dominates_p (cond1, cond2)
7600 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
7601 return CCmode;
7603 if (swapped)
7605 enum rtx_code temp = cond1;
7606 cond1 = cond2;
7607 cond2 = temp;
7610 switch (cond1)
7612 case EQ:
7613 if (cond_or == DOM_CC_X_AND_Y)
7614 return CC_DEQmode;
7616 switch (cond2)
7618 case EQ: return CC_DEQmode;
7619 case LE: return CC_DLEmode;
7620 case LEU: return CC_DLEUmode;
7621 case GE: return CC_DGEmode;
7622 case GEU: return CC_DGEUmode;
7623 default: gcc_unreachable ();
7626 case LT:
7627 if (cond_or == DOM_CC_X_AND_Y)
7628 return CC_DLTmode;
7630 switch (cond2)
7632 case LT:
7633 return CC_DLTmode;
7634 case LE:
7635 return CC_DLEmode;
7636 case NE:
7637 return CC_DNEmode;
7638 default:
7639 gcc_unreachable ();
7642 case GT:
7643 if (cond_or == DOM_CC_X_AND_Y)
7644 return CC_DGTmode;
7646 switch (cond2)
7648 case GT:
7649 return CC_DGTmode;
7650 case GE:
7651 return CC_DGEmode;
7652 case NE:
7653 return CC_DNEmode;
7654 default:
7655 gcc_unreachable ();
7658 case LTU:
7659 if (cond_or == DOM_CC_X_AND_Y)
7660 return CC_DLTUmode;
7662 switch (cond2)
7664 case LTU:
7665 return CC_DLTUmode;
7666 case LEU:
7667 return CC_DLEUmode;
7668 case NE:
7669 return CC_DNEmode;
7670 default:
7671 gcc_unreachable ();
7674 case GTU:
7675 if (cond_or == DOM_CC_X_AND_Y)
7676 return CC_DGTUmode;
7678 switch (cond2)
7680 case GTU:
7681 return CC_DGTUmode;
7682 case GEU:
7683 return CC_DGEUmode;
7684 case NE:
7685 return CC_DNEmode;
7686 default:
7687 gcc_unreachable ();
7690 /* The remaining cases only occur when both comparisons are the
7691 same. */
7692 case NE:
7693 gcc_assert (cond1 == cond2);
7694 return CC_DNEmode;
7696 case LE:
7697 gcc_assert (cond1 == cond2);
7698 return CC_DLEmode;
7700 case GE:
7701 gcc_assert (cond1 == cond2);
7702 return CC_DGEmode;
7704 case LEU:
7705 gcc_assert (cond1 == cond2);
7706 return CC_DLEUmode;
7708 case GEU:
7709 gcc_assert (cond1 == cond2);
7710 return CC_DGEUmode;
7712 default:
7713 gcc_unreachable ();
7717 enum machine_mode
7718 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
7720 /* All floating point compares return CCFP if it is an equality
7721 comparison, and CCFPE otherwise. */
7722 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
7724 switch (op)
7726 case EQ:
7727 case NE:
7728 case UNORDERED:
7729 case ORDERED:
7730 case UNLT:
7731 case UNLE:
7732 case UNGT:
7733 case UNGE:
7734 case UNEQ:
7735 case LTGT:
7736 return CCFPmode;
7738 case LT:
7739 case LE:
7740 case GT:
7741 case GE:
7742 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
7743 return CCFPmode;
7744 return CCFPEmode;
7746 default:
7747 gcc_unreachable ();
7751 /* A compare with a shifted operand. Because of canonicalization, the
7752 comparison will have to be swapped when we emit the assembler. */
7753 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
7754 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7755 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
7756 || GET_CODE (x) == ROTATERT))
7757 return CC_SWPmode;
7759 /* This operation is performed swapped, but since we only rely on the Z
7760 flag we don't need an additional mode. */
7761 if (GET_MODE (y) == SImode && REG_P (y)
7762 && GET_CODE (x) == NEG
7763 && (op == EQ || op == NE))
7764 return CC_Zmode;
7766 /* This is a special case that is used by combine to allow a
7767 comparison of a shifted byte load to be split into a zero-extend
7768 followed by a comparison of the shifted integer (only valid for
7769 equalities and unsigned inequalities). */
7770 if (GET_MODE (x) == SImode
7771 && GET_CODE (x) == ASHIFT
7772 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
7773 && GET_CODE (XEXP (x, 0)) == SUBREG
7774 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
7775 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
7776 && (op == EQ || op == NE
7777 || op == GEU || op == GTU || op == LTU || op == LEU)
7778 && GET_CODE (y) == CONST_INT)
7779 return CC_Zmode;
7781 /* A construct for a conditional compare, if the false arm contains
7782 0, then both conditions must be true, otherwise either condition
7783 must be true. Not all conditions are possible, so CCmode is
7784 returned if it can't be done. */
7785 if (GET_CODE (x) == IF_THEN_ELSE
7786 && (XEXP (x, 2) == const0_rtx
7787 || XEXP (x, 2) == const1_rtx)
7788 && COMPARISON_P (XEXP (x, 0))
7789 && COMPARISON_P (XEXP (x, 1)))
7790 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7791 INTVAL (XEXP (x, 2)));
7793 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
7794 if (GET_CODE (x) == AND
7795 && COMPARISON_P (XEXP (x, 0))
7796 && COMPARISON_P (XEXP (x, 1)))
7797 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7798 DOM_CC_X_AND_Y);
7800 if (GET_CODE (x) == IOR
7801 && COMPARISON_P (XEXP (x, 0))
7802 && COMPARISON_P (XEXP (x, 1)))
7803 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
7804 DOM_CC_X_OR_Y);
7806 /* An operation (on Thumb) where we want to test for a single bit.
7807 This is done by shifting that bit up into the top bit of a
7808 scratch register; we can then branch on the sign bit. */
7809 if (TARGET_THUMB1
7810 && GET_MODE (x) == SImode
7811 && (op == EQ || op == NE)
7812 && GET_CODE (x) == ZERO_EXTRACT
7813 && XEXP (x, 1) == const1_rtx)
7814 return CC_Nmode;
7816 /* An operation that sets the condition codes as a side-effect, the
7817 V flag is not set correctly, so we can only use comparisons where
7818 this doesn't matter. (For LT and GE we can use "mi" and "pl"
7819 instead.) */
7820 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
7821 if (GET_MODE (x) == SImode
7822 && y == const0_rtx
7823 && (op == EQ || op == NE || op == LT || op == GE)
7824 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
7825 || GET_CODE (x) == AND || GET_CODE (x) == IOR
7826 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
7827 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
7828 || GET_CODE (x) == LSHIFTRT
7829 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
7830 || GET_CODE (x) == ROTATERT
7831 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
7832 return CC_NOOVmode;
7834 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
7835 return CC_Zmode;
7837 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
7838 && GET_CODE (x) == PLUS
7839 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
7840 return CC_Cmode;
7842 return CCmode;
7845 /* X and Y are two things to compare using CODE. Emit the compare insn and
7846 return the rtx for register 0 in the proper mode. FP means this is a
7847 floating point compare: I don't think that it is needed on the arm. */
7849 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
7851 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
7852 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
7854 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
7856 return cc_reg;
7859 /* Generate a sequence of insns that will generate the correct return
7860 address mask depending on the physical architecture that the program
7861 is running on. */
7863 arm_gen_return_addr_mask (void)
7865 rtx reg = gen_reg_rtx (Pmode);
7867 emit_insn (gen_return_addr_mask (reg));
7868 return reg;
7871 void
7872 arm_reload_in_hi (rtx *operands)
7874 rtx ref = operands[1];
7875 rtx base, scratch;
7876 HOST_WIDE_INT offset = 0;
7878 if (GET_CODE (ref) == SUBREG)
7880 offset = SUBREG_BYTE (ref);
7881 ref = SUBREG_REG (ref);
7884 if (GET_CODE (ref) == REG)
7886 /* We have a pseudo which has been spilt onto the stack; there
7887 are two cases here: the first where there is a simple
7888 stack-slot replacement and a second where the stack-slot is
7889 out of range, or is used as a subreg. */
7890 if (reg_equiv_mem[REGNO (ref)])
7892 ref = reg_equiv_mem[REGNO (ref)];
7893 base = find_replacement (&XEXP (ref, 0));
7895 else
7896 /* The slot is out of range, or was dressed up in a SUBREG. */
7897 base = reg_equiv_address[REGNO (ref)];
7899 else
7900 base = find_replacement (&XEXP (ref, 0));
7902 /* Handle the case where the address is too complex to be offset by 1. */
7903 if (GET_CODE (base) == MINUS
7904 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
7906 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7908 emit_set_insn (base_plus, base);
7909 base = base_plus;
7911 else if (GET_CODE (base) == PLUS)
7913 /* The addend must be CONST_INT, or we would have dealt with it above. */
7914 HOST_WIDE_INT hi, lo;
7916 offset += INTVAL (XEXP (base, 1));
7917 base = XEXP (base, 0);
7919 /* Rework the address into a legal sequence of insns. */
7920 /* Valid range for lo is -4095 -> 4095 */
7921 lo = (offset >= 0
7922 ? (offset & 0xfff)
7923 : -((-offset) & 0xfff));
7925 /* Corner case, if lo is the max offset then we would be out of range
7926 once we have added the additional 1 below, so bump the msb into the
7927 pre-loading insn(s). */
7928 if (lo == 4095)
7929 lo &= 0x7ff;
7931 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
7932 ^ (HOST_WIDE_INT) 0x80000000)
7933 - (HOST_WIDE_INT) 0x80000000);
7935 gcc_assert (hi + lo == offset);
7937 if (hi != 0)
7939 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7941 /* Get the base address; addsi3 knows how to handle constants
7942 that require more than one insn. */
7943 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
7944 base = base_plus;
7945 offset = lo;
7949 /* Operands[2] may overlap operands[0] (though it won't overlap
7950 operands[1]), that's why we asked for a DImode reg -- so we can
7951 use the bit that does not overlap. */
7952 if (REGNO (operands[2]) == REGNO (operands[0]))
7953 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
7954 else
7955 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
7957 emit_insn (gen_zero_extendqisi2 (scratch,
7958 gen_rtx_MEM (QImode,
7959 plus_constant (base,
7960 offset))));
7961 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
7962 gen_rtx_MEM (QImode,
7963 plus_constant (base,
7964 offset + 1))));
7965 if (!BYTES_BIG_ENDIAN)
7966 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7967 gen_rtx_IOR (SImode,
7968 gen_rtx_ASHIFT
7969 (SImode,
7970 gen_rtx_SUBREG (SImode, operands[0], 0),
7971 GEN_INT (8)),
7972 scratch));
7973 else
7974 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
7975 gen_rtx_IOR (SImode,
7976 gen_rtx_ASHIFT (SImode, scratch,
7977 GEN_INT (8)),
7978 gen_rtx_SUBREG (SImode, operands[0], 0)));
7981 /* Handle storing a half-word to memory during reload by synthesizing as two
7982 byte stores. Take care not to clobber the input values until after we
7983 have moved them somewhere safe. This code assumes that if the DImode
7984 scratch in operands[2] overlaps either the input value or output address
7985 in some way, then that value must die in this insn (we absolutely need
7986 two scratch registers for some corner cases). */
7987 void
7988 arm_reload_out_hi (rtx *operands)
7990 rtx ref = operands[0];
7991 rtx outval = operands[1];
7992 rtx base, scratch;
7993 HOST_WIDE_INT offset = 0;
7995 if (GET_CODE (ref) == SUBREG)
7997 offset = SUBREG_BYTE (ref);
7998 ref = SUBREG_REG (ref);
8001 if (GET_CODE (ref) == REG)
8003 /* We have a pseudo which has been spilt onto the stack; there
8004 are two cases here: the first where there is a simple
8005 stack-slot replacement and a second where the stack-slot is
8006 out of range, or is used as a subreg. */
8007 if (reg_equiv_mem[REGNO (ref)])
8009 ref = reg_equiv_mem[REGNO (ref)];
8010 base = find_replacement (&XEXP (ref, 0));
8012 else
8013 /* The slot is out of range, or was dressed up in a SUBREG. */
8014 base = reg_equiv_address[REGNO (ref)];
8016 else
8017 base = find_replacement (&XEXP (ref, 0));
8019 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
8021 /* Handle the case where the address is too complex to be offset by 1. */
8022 if (GET_CODE (base) == MINUS
8023 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
8025 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8027 /* Be careful not to destroy OUTVAL. */
8028 if (reg_overlap_mentioned_p (base_plus, outval))
8030 /* Updating base_plus might destroy outval, see if we can
8031 swap the scratch and base_plus. */
8032 if (!reg_overlap_mentioned_p (scratch, outval))
8034 rtx tmp = scratch;
8035 scratch = base_plus;
8036 base_plus = tmp;
8038 else
8040 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8042 /* Be conservative and copy OUTVAL into the scratch now,
8043 this should only be necessary if outval is a subreg
8044 of something larger than a word. */
8045 /* XXX Might this clobber base? I can't see how it can,
8046 since scratch is known to overlap with OUTVAL, and
8047 must be wider than a word. */
8048 emit_insn (gen_movhi (scratch_hi, outval));
8049 outval = scratch_hi;
8053 emit_set_insn (base_plus, base);
8054 base = base_plus;
8056 else if (GET_CODE (base) == PLUS)
8058 /* The addend must be CONST_INT, or we would have dealt with it above. */
8059 HOST_WIDE_INT hi, lo;
8061 offset += INTVAL (XEXP (base, 1));
8062 base = XEXP (base, 0);
8064 /* Rework the address into a legal sequence of insns. */
8065 /* Valid range for lo is -4095 -> 4095 */
8066 lo = (offset >= 0
8067 ? (offset & 0xfff)
8068 : -((-offset) & 0xfff));
8070 /* Corner case, if lo is the max offset then we would be out of range
8071 once we have added the additional 1 below, so bump the msb into the
8072 pre-loading insn(s). */
8073 if (lo == 4095)
8074 lo &= 0x7ff;
8076 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
8077 ^ (HOST_WIDE_INT) 0x80000000)
8078 - (HOST_WIDE_INT) 0x80000000);
8080 gcc_assert (hi + lo == offset);
8082 if (hi != 0)
8084 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
8086 /* Be careful not to destroy OUTVAL. */
8087 if (reg_overlap_mentioned_p (base_plus, outval))
8089 /* Updating base_plus might destroy outval, see if we
8090 can swap the scratch and base_plus. */
8091 if (!reg_overlap_mentioned_p (scratch, outval))
8093 rtx tmp = scratch;
8094 scratch = base_plus;
8095 base_plus = tmp;
8097 else
8099 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
8101 /* Be conservative and copy outval into scratch now,
8102 this should only be necessary if outval is a
8103 subreg of something larger than a word. */
8104 /* XXX Might this clobber base? I can't see how it
8105 can, since scratch is known to overlap with
8106 outval. */
8107 emit_insn (gen_movhi (scratch_hi, outval));
8108 outval = scratch_hi;
8112 /* Get the base address; addsi3 knows how to handle constants
8113 that require more than one insn. */
8114 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
8115 base = base_plus;
8116 offset = lo;
8120 if (BYTES_BIG_ENDIAN)
8122 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8123 plus_constant (base, offset + 1)),
8124 gen_lowpart (QImode, outval)));
8125 emit_insn (gen_lshrsi3 (scratch,
8126 gen_rtx_SUBREG (SImode, outval, 0),
8127 GEN_INT (8)));
8128 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8129 gen_lowpart (QImode, scratch)));
8131 else
8133 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
8134 gen_lowpart (QImode, outval)));
8135 emit_insn (gen_lshrsi3 (scratch,
8136 gen_rtx_SUBREG (SImode, outval, 0),
8137 GEN_INT (8)));
8138 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
8139 plus_constant (base, offset + 1)),
8140 gen_lowpart (QImode, scratch)));
8144 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
8145 (padded to the size of a word) should be passed in a register. */
8147 static bool
8148 arm_must_pass_in_stack (enum machine_mode mode, const_tree type)
8150 if (TARGET_AAPCS_BASED)
8151 return must_pass_in_stack_var_size (mode, type);
8152 else
8153 return must_pass_in_stack_var_size_or_pad (mode, type);
8157 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
8158 Return true if an argument passed on the stack should be padded upwards,
8159 i.e. if the least-significant byte has useful data.
8160 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
8161 aggregate types are placed in the lowest memory address. */
8163 bool
8164 arm_pad_arg_upward (enum machine_mode mode, const_tree type)
8166 if (!TARGET_AAPCS_BASED)
8167 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
8169 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
8170 return false;
8172 return true;
8176 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
8177 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
8178 byte of the register has useful data, and return the opposite if the
8179 most significant byte does.
8180 For AAPCS, small aggregates and small complex types are always padded
8181 upwards. */
8183 bool
8184 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
8185 tree type, int first ATTRIBUTE_UNUSED)
8187 if (TARGET_AAPCS_BASED
8188 && BYTES_BIG_ENDIAN
8189 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
8190 && int_size_in_bytes (type) <= 4)
8191 return true;
8193 /* Otherwise, use default padding. */
8194 return !BYTES_BIG_ENDIAN;
8198 /* Print a symbolic form of X to the debug file, F. */
8199 static void
8200 arm_print_value (FILE *f, rtx x)
8202 switch (GET_CODE (x))
8204 case CONST_INT:
8205 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
8206 return;
8208 case CONST_DOUBLE:
8209 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
8210 return;
8212 case CONST_VECTOR:
8214 int i;
8216 fprintf (f, "<");
8217 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
8219 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
8220 if (i < (CONST_VECTOR_NUNITS (x) - 1))
8221 fputc (',', f);
8223 fprintf (f, ">");
8225 return;
8227 case CONST_STRING:
8228 fprintf (f, "\"%s\"", XSTR (x, 0));
8229 return;
8231 case SYMBOL_REF:
8232 fprintf (f, "`%s'", XSTR (x, 0));
8233 return;
8235 case LABEL_REF:
8236 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
8237 return;
8239 case CONST:
8240 arm_print_value (f, XEXP (x, 0));
8241 return;
8243 case PLUS:
8244 arm_print_value (f, XEXP (x, 0));
8245 fprintf (f, "+");
8246 arm_print_value (f, XEXP (x, 1));
8247 return;
8249 case PC:
8250 fprintf (f, "pc");
8251 return;
8253 default:
8254 fprintf (f, "????");
8255 return;
8259 /* Routines for manipulation of the constant pool. */
8261 /* Arm instructions cannot load a large constant directly into a
8262 register; they have to come from a pc relative load. The constant
8263 must therefore be placed in the addressable range of the pc
8264 relative load. Depending on the precise pc relative load
8265 instruction the range is somewhere between 256 bytes and 4k. This
8266 means that we often have to dump a constant inside a function, and
8267 generate code to branch around it.
8269 It is important to minimize this, since the branches will slow
8270 things down and make the code larger.
8272 Normally we can hide the table after an existing unconditional
8273 branch so that there is no interruption of the flow, but in the
8274 worst case the code looks like this:
8276 ldr rn, L1
8278 b L2
8279 align
8280 L1: .long value
8284 ldr rn, L3
8286 b L4
8287 align
8288 L3: .long value
8292 We fix this by performing a scan after scheduling, which notices
8293 which instructions need to have their operands fetched from the
8294 constant table and builds the table.
8296 The algorithm starts by building a table of all the constants that
8297 need fixing up and all the natural barriers in the function (places
8298 where a constant table can be dropped without breaking the flow).
8299 For each fixup we note how far the pc-relative replacement will be
8300 able to reach and the offset of the instruction into the function.
8302 Having built the table we then group the fixes together to form
8303 tables that are as large as possible (subject to addressing
8304 constraints) and emit each table of constants after the last
8305 barrier that is within range of all the instructions in the group.
8306 If a group does not contain a barrier, then we forcibly create one
8307 by inserting a jump instruction into the flow. Once the table has
8308 been inserted, the insns are then modified to reference the
8309 relevant entry in the pool.
8311 Possible enhancements to the algorithm (not implemented) are:
8313 1) For some processors and object formats, there may be benefit in
8314 aligning the pools to the start of cache lines; this alignment
8315 would need to be taken into account when calculating addressability
8316 of a pool. */
8318 /* These typedefs are located at the start of this file, so that
8319 they can be used in the prototypes there. This comment is to
8320 remind readers of that fact so that the following structures
8321 can be understood more easily.
8323 typedef struct minipool_node Mnode;
8324 typedef struct minipool_fixup Mfix; */
8326 struct minipool_node
8328 /* Doubly linked chain of entries. */
8329 Mnode * next;
8330 Mnode * prev;
8331 /* The maximum offset into the code that this entry can be placed. While
8332 pushing fixes for forward references, all entries are sorted in order
8333 of increasing max_address. */
8334 HOST_WIDE_INT max_address;
8335 /* Similarly for an entry inserted for a backwards ref. */
8336 HOST_WIDE_INT min_address;
8337 /* The number of fixes referencing this entry. This can become zero
8338 if we "unpush" an entry. In this case we ignore the entry when we
8339 come to emit the code. */
8340 int refcount;
8341 /* The offset from the start of the minipool. */
8342 HOST_WIDE_INT offset;
8343 /* The value in table. */
8344 rtx value;
8345 /* The mode of value. */
8346 enum machine_mode mode;
8347 /* The size of the value. With iWMMXt enabled
8348 sizes > 4 also imply an alignment of 8-bytes. */
8349 int fix_size;
8352 struct minipool_fixup
8354 Mfix * next;
8355 rtx insn;
8356 HOST_WIDE_INT address;
8357 rtx * loc;
8358 enum machine_mode mode;
8359 int fix_size;
8360 rtx value;
8361 Mnode * minipool;
8362 HOST_WIDE_INT forwards;
8363 HOST_WIDE_INT backwards;
8366 /* Fixes less than a word need padding out to a word boundary. */
8367 #define MINIPOOL_FIX_SIZE(mode) \
8368 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
8370 static Mnode * minipool_vector_head;
8371 static Mnode * minipool_vector_tail;
8372 static rtx minipool_vector_label;
8373 static int minipool_pad;
8375 /* The linked list of all minipool fixes required for this function. */
8376 Mfix * minipool_fix_head;
8377 Mfix * minipool_fix_tail;
8378 /* The fix entry for the current minipool, once it has been placed. */
8379 Mfix * minipool_barrier;
8381 /* Determines if INSN is the start of a jump table. Returns the end
8382 of the TABLE or NULL_RTX. */
8383 static rtx
8384 is_jump_table (rtx insn)
8386 rtx table;
8388 if (GET_CODE (insn) == JUMP_INSN
8389 && JUMP_LABEL (insn) != NULL
8390 && ((table = next_real_insn (JUMP_LABEL (insn)))
8391 == next_real_insn (insn))
8392 && table != NULL
8393 && GET_CODE (table) == JUMP_INSN
8394 && (GET_CODE (PATTERN (table)) == ADDR_VEC
8395 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
8396 return table;
8398 return NULL_RTX;
8401 #ifndef JUMP_TABLES_IN_TEXT_SECTION
8402 #define JUMP_TABLES_IN_TEXT_SECTION 0
8403 #endif
8405 static HOST_WIDE_INT
8406 get_jump_table_size (rtx insn)
8408 /* ADDR_VECs only take room if read-only data does into the text
8409 section. */
8410 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
8412 rtx body = PATTERN (insn);
8413 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
8414 HOST_WIDE_INT size;
8415 HOST_WIDE_INT modesize;
8417 modesize = GET_MODE_SIZE (GET_MODE (body));
8418 size = modesize * XVECLEN (body, elt);
8419 switch (modesize)
8421 case 1:
8422 /* Round up size of TBB table to a halfword boundary. */
8423 size = (size + 1) & ~(HOST_WIDE_INT)1;
8424 break;
8425 case 2:
8426 /* No padding necessary for TBH. */
8427 break;
8428 case 4:
8429 /* Add two bytes for alignment on Thumb. */
8430 if (TARGET_THUMB)
8431 size += 2;
8432 break;
8433 default:
8434 gcc_unreachable ();
8436 return size;
8439 return 0;
8442 /* Move a minipool fix MP from its current location to before MAX_MP.
8443 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
8444 constraints may need updating. */
8445 static Mnode *
8446 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
8447 HOST_WIDE_INT max_address)
8449 /* The code below assumes these are different. */
8450 gcc_assert (mp != max_mp);
8452 if (max_mp == NULL)
8454 if (max_address < mp->max_address)
8455 mp->max_address = max_address;
8457 else
8459 if (max_address > max_mp->max_address - mp->fix_size)
8460 mp->max_address = max_mp->max_address - mp->fix_size;
8461 else
8462 mp->max_address = max_address;
8464 /* Unlink MP from its current position. Since max_mp is non-null,
8465 mp->prev must be non-null. */
8466 mp->prev->next = mp->next;
8467 if (mp->next != NULL)
8468 mp->next->prev = mp->prev;
8469 else
8470 minipool_vector_tail = mp->prev;
8472 /* Re-insert it before MAX_MP. */
8473 mp->next = max_mp;
8474 mp->prev = max_mp->prev;
8475 max_mp->prev = mp;
8477 if (mp->prev != NULL)
8478 mp->prev->next = mp;
8479 else
8480 minipool_vector_head = mp;
8483 /* Save the new entry. */
8484 max_mp = mp;
8486 /* Scan over the preceding entries and adjust their addresses as
8487 required. */
8488 while (mp->prev != NULL
8489 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8491 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8492 mp = mp->prev;
8495 return max_mp;
8498 /* Add a constant to the minipool for a forward reference. Returns the
8499 node added or NULL if the constant will not fit in this pool. */
8500 static Mnode *
8501 add_minipool_forward_ref (Mfix *fix)
8503 /* If set, max_mp is the first pool_entry that has a lower
8504 constraint than the one we are trying to add. */
8505 Mnode * max_mp = NULL;
8506 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
8507 Mnode * mp;
8509 /* If the minipool starts before the end of FIX->INSN then this FIX
8510 can not be placed into the current pool. Furthermore, adding the
8511 new constant pool entry may cause the pool to start FIX_SIZE bytes
8512 earlier. */
8513 if (minipool_vector_head &&
8514 (fix->address + get_attr_length (fix->insn)
8515 >= minipool_vector_head->max_address - fix->fix_size))
8516 return NULL;
8518 /* Scan the pool to see if a constant with the same value has
8519 already been added. While we are doing this, also note the
8520 location where we must insert the constant if it doesn't already
8521 exist. */
8522 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8524 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8525 && fix->mode == mp->mode
8526 && (GET_CODE (fix->value) != CODE_LABEL
8527 || (CODE_LABEL_NUMBER (fix->value)
8528 == CODE_LABEL_NUMBER (mp->value)))
8529 && rtx_equal_p (fix->value, mp->value))
8531 /* More than one fix references this entry. */
8532 mp->refcount++;
8533 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
8536 /* Note the insertion point if necessary. */
8537 if (max_mp == NULL
8538 && mp->max_address > max_address)
8539 max_mp = mp;
8541 /* If we are inserting an 8-bytes aligned quantity and
8542 we have not already found an insertion point, then
8543 make sure that all such 8-byte aligned quantities are
8544 placed at the start of the pool. */
8545 if (ARM_DOUBLEWORD_ALIGN
8546 && max_mp == NULL
8547 && fix->fix_size >= 8
8548 && mp->fix_size < 8)
8550 max_mp = mp;
8551 max_address = mp->max_address;
8555 /* The value is not currently in the minipool, so we need to create
8556 a new entry for it. If MAX_MP is NULL, the entry will be put on
8557 the end of the list since the placement is less constrained than
8558 any existing entry. Otherwise, we insert the new fix before
8559 MAX_MP and, if necessary, adjust the constraints on the other
8560 entries. */
8561 mp = XNEW (Mnode);
8562 mp->fix_size = fix->fix_size;
8563 mp->mode = fix->mode;
8564 mp->value = fix->value;
8565 mp->refcount = 1;
8566 /* Not yet required for a backwards ref. */
8567 mp->min_address = -65536;
8569 if (max_mp == NULL)
8571 mp->max_address = max_address;
8572 mp->next = NULL;
8573 mp->prev = minipool_vector_tail;
8575 if (mp->prev == NULL)
8577 minipool_vector_head = mp;
8578 minipool_vector_label = gen_label_rtx ();
8580 else
8581 mp->prev->next = mp;
8583 minipool_vector_tail = mp;
8585 else
8587 if (max_address > max_mp->max_address - mp->fix_size)
8588 mp->max_address = max_mp->max_address - mp->fix_size;
8589 else
8590 mp->max_address = max_address;
8592 mp->next = max_mp;
8593 mp->prev = max_mp->prev;
8594 max_mp->prev = mp;
8595 if (mp->prev != NULL)
8596 mp->prev->next = mp;
8597 else
8598 minipool_vector_head = mp;
8601 /* Save the new entry. */
8602 max_mp = mp;
8604 /* Scan over the preceding entries and adjust their addresses as
8605 required. */
8606 while (mp->prev != NULL
8607 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
8609 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
8610 mp = mp->prev;
8613 return max_mp;
8616 static Mnode *
8617 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
8618 HOST_WIDE_INT min_address)
8620 HOST_WIDE_INT offset;
8622 /* The code below assumes these are different. */
8623 gcc_assert (mp != min_mp);
8625 if (min_mp == NULL)
8627 if (min_address > mp->min_address)
8628 mp->min_address = min_address;
8630 else
8632 /* We will adjust this below if it is too loose. */
8633 mp->min_address = min_address;
8635 /* Unlink MP from its current position. Since min_mp is non-null,
8636 mp->next must be non-null. */
8637 mp->next->prev = mp->prev;
8638 if (mp->prev != NULL)
8639 mp->prev->next = mp->next;
8640 else
8641 minipool_vector_head = mp->next;
8643 /* Reinsert it after MIN_MP. */
8644 mp->prev = min_mp;
8645 mp->next = min_mp->next;
8646 min_mp->next = mp;
8647 if (mp->next != NULL)
8648 mp->next->prev = mp;
8649 else
8650 minipool_vector_tail = mp;
8653 min_mp = mp;
8655 offset = 0;
8656 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8658 mp->offset = offset;
8659 if (mp->refcount > 0)
8660 offset += mp->fix_size;
8662 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
8663 mp->next->min_address = mp->min_address + mp->fix_size;
8666 return min_mp;
8669 /* Add a constant to the minipool for a backward reference. Returns the
8670 node added or NULL if the constant will not fit in this pool.
8672 Note that the code for insertion for a backwards reference can be
8673 somewhat confusing because the calculated offsets for each fix do
8674 not take into account the size of the pool (which is still under
8675 construction. */
8676 static Mnode *
8677 add_minipool_backward_ref (Mfix *fix)
8679 /* If set, min_mp is the last pool_entry that has a lower constraint
8680 than the one we are trying to add. */
8681 Mnode *min_mp = NULL;
8682 /* This can be negative, since it is only a constraint. */
8683 HOST_WIDE_INT min_address = fix->address - fix->backwards;
8684 Mnode *mp;
8686 /* If we can't reach the current pool from this insn, or if we can't
8687 insert this entry at the end of the pool without pushing other
8688 fixes out of range, then we don't try. This ensures that we
8689 can't fail later on. */
8690 if (min_address >= minipool_barrier->address
8691 || (minipool_vector_tail->min_address + fix->fix_size
8692 >= minipool_barrier->address))
8693 return NULL;
8695 /* Scan the pool to see if a constant with the same value has
8696 already been added. While we are doing this, also note the
8697 location where we must insert the constant if it doesn't already
8698 exist. */
8699 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
8701 if (GET_CODE (fix->value) == GET_CODE (mp->value)
8702 && fix->mode == mp->mode
8703 && (GET_CODE (fix->value) != CODE_LABEL
8704 || (CODE_LABEL_NUMBER (fix->value)
8705 == CODE_LABEL_NUMBER (mp->value)))
8706 && rtx_equal_p (fix->value, mp->value)
8707 /* Check that there is enough slack to move this entry to the
8708 end of the table (this is conservative). */
8709 && (mp->max_address
8710 > (minipool_barrier->address
8711 + minipool_vector_tail->offset
8712 + minipool_vector_tail->fix_size)))
8714 mp->refcount++;
8715 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
8718 if (min_mp != NULL)
8719 mp->min_address += fix->fix_size;
8720 else
8722 /* Note the insertion point if necessary. */
8723 if (mp->min_address < min_address)
8725 /* For now, we do not allow the insertion of 8-byte alignment
8726 requiring nodes anywhere but at the start of the pool. */
8727 if (ARM_DOUBLEWORD_ALIGN
8728 && fix->fix_size >= 8 && mp->fix_size < 8)
8729 return NULL;
8730 else
8731 min_mp = mp;
8733 else if (mp->max_address
8734 < minipool_barrier->address + mp->offset + fix->fix_size)
8736 /* Inserting before this entry would push the fix beyond
8737 its maximum address (which can happen if we have
8738 re-located a forwards fix); force the new fix to come
8739 after it. */
8740 min_mp = mp;
8741 min_address = mp->min_address + fix->fix_size;
8743 /* If we are inserting an 8-bytes aligned quantity and
8744 we have not already found an insertion point, then
8745 make sure that all such 8-byte aligned quantities are
8746 placed at the start of the pool. */
8747 else if (ARM_DOUBLEWORD_ALIGN
8748 && min_mp == NULL
8749 && fix->fix_size >= 8
8750 && mp->fix_size < 8)
8752 min_mp = mp;
8753 min_address = mp->min_address + fix->fix_size;
8758 /* We need to create a new entry. */
8759 mp = XNEW (Mnode);
8760 mp->fix_size = fix->fix_size;
8761 mp->mode = fix->mode;
8762 mp->value = fix->value;
8763 mp->refcount = 1;
8764 mp->max_address = minipool_barrier->address + 65536;
8766 mp->min_address = min_address;
8768 if (min_mp == NULL)
8770 mp->prev = NULL;
8771 mp->next = minipool_vector_head;
8773 if (mp->next == NULL)
8775 minipool_vector_tail = mp;
8776 minipool_vector_label = gen_label_rtx ();
8778 else
8779 mp->next->prev = mp;
8781 minipool_vector_head = mp;
8783 else
8785 mp->next = min_mp->next;
8786 mp->prev = min_mp;
8787 min_mp->next = mp;
8789 if (mp->next != NULL)
8790 mp->next->prev = mp;
8791 else
8792 minipool_vector_tail = mp;
8795 /* Save the new entry. */
8796 min_mp = mp;
8798 if (mp->prev)
8799 mp = mp->prev;
8800 else
8801 mp->offset = 0;
8803 /* Scan over the following entries and adjust their offsets. */
8804 while (mp->next != NULL)
8806 if (mp->next->min_address < mp->min_address + mp->fix_size)
8807 mp->next->min_address = mp->min_address + mp->fix_size;
8809 if (mp->refcount)
8810 mp->next->offset = mp->offset + mp->fix_size;
8811 else
8812 mp->next->offset = mp->offset;
8814 mp = mp->next;
8817 return min_mp;
8820 static void
8821 assign_minipool_offsets (Mfix *barrier)
8823 HOST_WIDE_INT offset = 0;
8824 Mnode *mp;
8826 minipool_barrier = barrier;
8828 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8830 mp->offset = offset;
8832 if (mp->refcount > 0)
8833 offset += mp->fix_size;
8837 /* Output the literal table */
8838 static void
8839 dump_minipool (rtx scan)
8841 Mnode * mp;
8842 Mnode * nmp;
8843 int align64 = 0;
8845 if (ARM_DOUBLEWORD_ALIGN)
8846 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
8847 if (mp->refcount > 0 && mp->fix_size >= 8)
8849 align64 = 1;
8850 break;
8853 if (dump_file)
8854 fprintf (dump_file,
8855 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
8856 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
8858 scan = emit_label_after (gen_label_rtx (), scan);
8859 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
8860 scan = emit_label_after (minipool_vector_label, scan);
8862 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
8864 if (mp->refcount > 0)
8866 if (dump_file)
8868 fprintf (dump_file,
8869 ";; Offset %u, min %ld, max %ld ",
8870 (unsigned) mp->offset, (unsigned long) mp->min_address,
8871 (unsigned long) mp->max_address);
8872 arm_print_value (dump_file, mp->value);
8873 fputc ('\n', dump_file);
8876 switch (mp->fix_size)
8878 #ifdef HAVE_consttable_1
8879 case 1:
8880 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
8881 break;
8883 #endif
8884 #ifdef HAVE_consttable_2
8885 case 2:
8886 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
8887 break;
8889 #endif
8890 #ifdef HAVE_consttable_4
8891 case 4:
8892 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
8893 break;
8895 #endif
8896 #ifdef HAVE_consttable_8
8897 case 8:
8898 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
8899 break;
8901 #endif
8902 #ifdef HAVE_consttable_16
8903 case 16:
8904 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
8905 break;
8907 #endif
8908 default:
8909 gcc_unreachable ();
8913 nmp = mp->next;
8914 free (mp);
8917 minipool_vector_head = minipool_vector_tail = NULL;
8918 scan = emit_insn_after (gen_consttable_end (), scan);
8919 scan = emit_barrier_after (scan);
8922 /* Return the cost of forcibly inserting a barrier after INSN. */
8923 static int
8924 arm_barrier_cost (rtx insn)
8926 /* Basing the location of the pool on the loop depth is preferable,
8927 but at the moment, the basic block information seems to be
8928 corrupt by this stage of the compilation. */
8929 int base_cost = 50;
8930 rtx next = next_nonnote_insn (insn);
8932 if (next != NULL && GET_CODE (next) == CODE_LABEL)
8933 base_cost -= 20;
8935 switch (GET_CODE (insn))
8937 case CODE_LABEL:
8938 /* It will always be better to place the table before the label, rather
8939 than after it. */
8940 return 50;
8942 case INSN:
8943 case CALL_INSN:
8944 return base_cost;
8946 case JUMP_INSN:
8947 return base_cost - 10;
8949 default:
8950 return base_cost + 10;
8954 /* Find the best place in the insn stream in the range
8955 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
8956 Create the barrier by inserting a jump and add a new fix entry for
8957 it. */
8958 static Mfix *
8959 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
8961 HOST_WIDE_INT count = 0;
8962 rtx barrier;
8963 rtx from = fix->insn;
8964 /* The instruction after which we will insert the jump. */
8965 rtx selected = NULL;
8966 int selected_cost;
8967 /* The address at which the jump instruction will be placed. */
8968 HOST_WIDE_INT selected_address;
8969 Mfix * new_fix;
8970 HOST_WIDE_INT max_count = max_address - fix->address;
8971 rtx label = gen_label_rtx ();
8973 selected_cost = arm_barrier_cost (from);
8974 selected_address = fix->address;
8976 while (from && count < max_count)
8978 rtx tmp;
8979 int new_cost;
8981 /* This code shouldn't have been called if there was a natural barrier
8982 within range. */
8983 gcc_assert (GET_CODE (from) != BARRIER);
8985 /* Count the length of this insn. */
8986 count += get_attr_length (from);
8988 /* If there is a jump table, add its length. */
8989 tmp = is_jump_table (from);
8990 if (tmp != NULL)
8992 count += get_jump_table_size (tmp);
8994 /* Jump tables aren't in a basic block, so base the cost on
8995 the dispatch insn. If we select this location, we will
8996 still put the pool after the table. */
8997 new_cost = arm_barrier_cost (from);
8999 if (count < max_count
9000 && (!selected || new_cost <= selected_cost))
9002 selected = tmp;
9003 selected_cost = new_cost;
9004 selected_address = fix->address + count;
9007 /* Continue after the dispatch table. */
9008 from = NEXT_INSN (tmp);
9009 continue;
9012 new_cost = arm_barrier_cost (from);
9014 if (count < max_count
9015 && (!selected || new_cost <= selected_cost))
9017 selected = from;
9018 selected_cost = new_cost;
9019 selected_address = fix->address + count;
9022 from = NEXT_INSN (from);
9025 /* Make sure that we found a place to insert the jump. */
9026 gcc_assert (selected);
9028 /* Create a new JUMP_INSN that branches around a barrier. */
9029 from = emit_jump_insn_after (gen_jump (label), selected);
9030 JUMP_LABEL (from) = label;
9031 barrier = emit_barrier_after (from);
9032 emit_label_after (label, barrier);
9034 /* Create a minipool barrier entry for the new barrier. */
9035 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
9036 new_fix->insn = barrier;
9037 new_fix->address = selected_address;
9038 new_fix->next = fix->next;
9039 fix->next = new_fix;
9041 return new_fix;
9044 /* Record that there is a natural barrier in the insn stream at
9045 ADDRESS. */
9046 static void
9047 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
9049 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9051 fix->insn = insn;
9052 fix->address = address;
9054 fix->next = NULL;
9055 if (minipool_fix_head != NULL)
9056 minipool_fix_tail->next = fix;
9057 else
9058 minipool_fix_head = fix;
9060 minipool_fix_tail = fix;
9063 /* Record INSN, which will need fixing up to load a value from the
9064 minipool. ADDRESS is the offset of the insn since the start of the
9065 function; LOC is a pointer to the part of the insn which requires
9066 fixing; VALUE is the constant that must be loaded, which is of type
9067 MODE. */
9068 static void
9069 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
9070 enum machine_mode mode, rtx value)
9072 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
9074 fix->insn = insn;
9075 fix->address = address;
9076 fix->loc = loc;
9077 fix->mode = mode;
9078 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
9079 fix->value = value;
9080 fix->forwards = get_attr_pool_range (insn);
9081 fix->backwards = get_attr_neg_pool_range (insn);
9082 fix->minipool = NULL;
9084 /* If an insn doesn't have a range defined for it, then it isn't
9085 expecting to be reworked by this code. Better to stop now than
9086 to generate duff assembly code. */
9087 gcc_assert (fix->forwards || fix->backwards);
9089 /* If an entry requires 8-byte alignment then assume all constant pools
9090 require 4 bytes of padding. Trying to do this later on a per-pool
9091 basis is awkward because existing pool entries have to be modified. */
9092 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
9093 minipool_pad = 4;
9095 if (dump_file)
9097 fprintf (dump_file,
9098 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
9099 GET_MODE_NAME (mode),
9100 INSN_UID (insn), (unsigned long) address,
9101 -1 * (long)fix->backwards, (long)fix->forwards);
9102 arm_print_value (dump_file, fix->value);
9103 fprintf (dump_file, "\n");
9106 /* Add it to the chain of fixes. */
9107 fix->next = NULL;
9109 if (minipool_fix_head != NULL)
9110 minipool_fix_tail->next = fix;
9111 else
9112 minipool_fix_head = fix;
9114 minipool_fix_tail = fix;
9117 /* Return the cost of synthesizing a 64-bit constant VAL inline.
9118 Returns the number of insns needed, or 99 if we don't know how to
9119 do it. */
9121 arm_const_double_inline_cost (rtx val)
9123 rtx lowpart, highpart;
9124 enum machine_mode mode;
9126 mode = GET_MODE (val);
9128 if (mode == VOIDmode)
9129 mode = DImode;
9131 gcc_assert (GET_MODE_SIZE (mode) == 8);
9133 lowpart = gen_lowpart (SImode, val);
9134 highpart = gen_highpart_mode (SImode, mode, val);
9136 gcc_assert (GET_CODE (lowpart) == CONST_INT);
9137 gcc_assert (GET_CODE (highpart) == CONST_INT);
9139 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
9140 NULL_RTX, NULL_RTX, 0, 0)
9141 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
9142 NULL_RTX, NULL_RTX, 0, 0));
9145 /* Return true if it is worthwhile to split a 64-bit constant into two
9146 32-bit operations. This is the case if optimizing for size, or
9147 if we have load delay slots, or if one 32-bit part can be done with
9148 a single data operation. */
9149 bool
9150 arm_const_double_by_parts (rtx val)
9152 enum machine_mode mode = GET_MODE (val);
9153 rtx part;
9155 if (optimize_size || arm_ld_sched)
9156 return true;
9158 if (mode == VOIDmode)
9159 mode = DImode;
9161 part = gen_highpart_mode (SImode, mode, val);
9163 gcc_assert (GET_CODE (part) == CONST_INT);
9165 if (const_ok_for_arm (INTVAL (part))
9166 || const_ok_for_arm (~INTVAL (part)))
9167 return true;
9169 part = gen_lowpart (SImode, val);
9171 gcc_assert (GET_CODE (part) == CONST_INT);
9173 if (const_ok_for_arm (INTVAL (part))
9174 || const_ok_for_arm (~INTVAL (part)))
9175 return true;
9177 return false;
9180 /* Scan INSN and note any of its operands that need fixing.
9181 If DO_PUSHES is false we do not actually push any of the fixups
9182 needed. The function returns TRUE if any fixups were needed/pushed.
9183 This is used by arm_memory_load_p() which needs to know about loads
9184 of constants that will be converted into minipool loads. */
9185 static bool
9186 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
9188 bool result = false;
9189 int opno;
9191 extract_insn (insn);
9193 if (!constrain_operands (1))
9194 fatal_insn_not_found (insn);
9196 if (recog_data.n_alternatives == 0)
9197 return false;
9199 /* Fill in recog_op_alt with information about the constraints of
9200 this insn. */
9201 preprocess_constraints ();
9203 for (opno = 0; opno < recog_data.n_operands; opno++)
9205 /* Things we need to fix can only occur in inputs. */
9206 if (recog_data.operand_type[opno] != OP_IN)
9207 continue;
9209 /* If this alternative is a memory reference, then any mention
9210 of constants in this alternative is really to fool reload
9211 into allowing us to accept one there. We need to fix them up
9212 now so that we output the right code. */
9213 if (recog_op_alt[opno][which_alternative].memory_ok)
9215 rtx op = recog_data.operand[opno];
9217 if (CONSTANT_P (op))
9219 if (do_pushes)
9220 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
9221 recog_data.operand_mode[opno], op);
9222 result = true;
9224 else if (GET_CODE (op) == MEM
9225 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
9226 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
9228 if (do_pushes)
9230 rtx cop = avoid_constant_pool_reference (op);
9232 /* Casting the address of something to a mode narrower
9233 than a word can cause avoid_constant_pool_reference()
9234 to return the pool reference itself. That's no good to
9235 us here. Lets just hope that we can use the
9236 constant pool value directly. */
9237 if (op == cop)
9238 cop = get_pool_constant (XEXP (op, 0));
9240 push_minipool_fix (insn, address,
9241 recog_data.operand_loc[opno],
9242 recog_data.operand_mode[opno], cop);
9245 result = true;
9250 return result;
9253 /* Gcc puts the pool in the wrong place for ARM, since we can only
9254 load addresses a limited distance around the pc. We do some
9255 special munging to move the constant pool values to the correct
9256 point in the code. */
9257 static void
9258 arm_reorg (void)
9260 rtx insn;
9261 HOST_WIDE_INT address = 0;
9262 Mfix * fix;
9264 minipool_fix_head = minipool_fix_tail = NULL;
9266 /* The first insn must always be a note, or the code below won't
9267 scan it properly. */
9268 insn = get_insns ();
9269 gcc_assert (GET_CODE (insn) == NOTE);
9270 minipool_pad = 0;
9272 /* Scan all the insns and record the operands that will need fixing. */
9273 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
9275 if (TARGET_CIRRUS_FIX_INVALID_INSNS
9276 && (arm_cirrus_insn_p (insn)
9277 || GET_CODE (insn) == JUMP_INSN
9278 || arm_memory_load_p (insn)))
9279 cirrus_reorg (insn);
9281 if (GET_CODE (insn) == BARRIER)
9282 push_minipool_barrier (insn, address);
9283 else if (INSN_P (insn))
9285 rtx table;
9287 note_invalid_constants (insn, address, true);
9288 address += get_attr_length (insn);
9290 /* If the insn is a vector jump, add the size of the table
9291 and skip the table. */
9292 if ((table = is_jump_table (insn)) != NULL)
9294 address += get_jump_table_size (table);
9295 insn = table;
9300 fix = minipool_fix_head;
9302 /* Now scan the fixups and perform the required changes. */
9303 while (fix)
9305 Mfix * ftmp;
9306 Mfix * fdel;
9307 Mfix * last_added_fix;
9308 Mfix * last_barrier = NULL;
9309 Mfix * this_fix;
9311 /* Skip any further barriers before the next fix. */
9312 while (fix && GET_CODE (fix->insn) == BARRIER)
9313 fix = fix->next;
9315 /* No more fixes. */
9316 if (fix == NULL)
9317 break;
9319 last_added_fix = NULL;
9321 for (ftmp = fix; ftmp; ftmp = ftmp->next)
9323 if (GET_CODE (ftmp->insn) == BARRIER)
9325 if (ftmp->address >= minipool_vector_head->max_address)
9326 break;
9328 last_barrier = ftmp;
9330 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
9331 break;
9333 last_added_fix = ftmp; /* Keep track of the last fix added. */
9336 /* If we found a barrier, drop back to that; any fixes that we
9337 could have reached but come after the barrier will now go in
9338 the next mini-pool. */
9339 if (last_barrier != NULL)
9341 /* Reduce the refcount for those fixes that won't go into this
9342 pool after all. */
9343 for (fdel = last_barrier->next;
9344 fdel && fdel != ftmp;
9345 fdel = fdel->next)
9347 fdel->minipool->refcount--;
9348 fdel->minipool = NULL;
9351 ftmp = last_barrier;
9353 else
9355 /* ftmp is first fix that we can't fit into this pool and
9356 there no natural barriers that we could use. Insert a
9357 new barrier in the code somewhere between the previous
9358 fix and this one, and arrange to jump around it. */
9359 HOST_WIDE_INT max_address;
9361 /* The last item on the list of fixes must be a barrier, so
9362 we can never run off the end of the list of fixes without
9363 last_barrier being set. */
9364 gcc_assert (ftmp);
9366 max_address = minipool_vector_head->max_address;
9367 /* Check that there isn't another fix that is in range that
9368 we couldn't fit into this pool because the pool was
9369 already too large: we need to put the pool before such an
9370 instruction. The pool itself may come just after the
9371 fix because create_fix_barrier also allows space for a
9372 jump instruction. */
9373 if (ftmp->address < max_address)
9374 max_address = ftmp->address + 1;
9376 last_barrier = create_fix_barrier (last_added_fix, max_address);
9379 assign_minipool_offsets (last_barrier);
9381 while (ftmp)
9383 if (GET_CODE (ftmp->insn) != BARRIER
9384 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
9385 == NULL))
9386 break;
9388 ftmp = ftmp->next;
9391 /* Scan over the fixes we have identified for this pool, fixing them
9392 up and adding the constants to the pool itself. */
9393 for (this_fix = fix; this_fix && ftmp != this_fix;
9394 this_fix = this_fix->next)
9395 if (GET_CODE (this_fix->insn) != BARRIER)
9397 rtx addr
9398 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
9399 minipool_vector_label),
9400 this_fix->minipool->offset);
9401 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
9404 dump_minipool (last_barrier->insn);
9405 fix = ftmp;
9408 /* From now on we must synthesize any constants that we can't handle
9409 directly. This can happen if the RTL gets split during final
9410 instruction generation. */
9411 after_arm_reorg = 1;
9413 /* Free the minipool memory. */
9414 obstack_free (&minipool_obstack, minipool_startobj);
9417 /* Routines to output assembly language. */
9419 /* If the rtx is the correct value then return the string of the number.
9420 In this way we can ensure that valid double constants are generated even
9421 when cross compiling. */
9422 const char *
9423 fp_immediate_constant (rtx x)
9425 REAL_VALUE_TYPE r;
9426 int i;
9428 if (!fp_consts_inited)
9429 init_fp_table ();
9431 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9432 for (i = 0; i < 8; i++)
9433 if (REAL_VALUES_EQUAL (r, values_fp[i]))
9434 return strings_fp[i];
9436 gcc_unreachable ();
9439 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
9440 static const char *
9441 fp_const_from_val (REAL_VALUE_TYPE *r)
9443 int i;
9445 if (!fp_consts_inited)
9446 init_fp_table ();
9448 for (i = 0; i < 8; i++)
9449 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
9450 return strings_fp[i];
9452 gcc_unreachable ();
9455 /* Output the operands of a LDM/STM instruction to STREAM.
9456 MASK is the ARM register set mask of which only bits 0-15 are important.
9457 REG is the base register, either the frame pointer or the stack pointer,
9458 INSTR is the possibly suffixed load or store instruction.
9459 RFE is nonzero if the instruction should also copy spsr to cpsr. */
9461 static void
9462 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
9463 unsigned long mask, int rfe)
9465 unsigned i;
9466 bool not_first = FALSE;
9468 gcc_assert (!rfe || (mask & (1 << PC_REGNUM)));
9469 fputc ('\t', stream);
9470 asm_fprintf (stream, instr, reg);
9471 fputc ('{', stream);
9473 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9474 if (mask & (1 << i))
9476 if (not_first)
9477 fprintf (stream, ", ");
9479 asm_fprintf (stream, "%r", i);
9480 not_first = TRUE;
9483 if (rfe)
9484 fprintf (stream, "}^\n");
9485 else
9486 fprintf (stream, "}\n");
9490 /* Output a FLDMD instruction to STREAM.
9491 BASE if the register containing the address.
9492 REG and COUNT specify the register range.
9493 Extra registers may be added to avoid hardware bugs.
9495 We output FLDMD even for ARMv5 VFP implementations. Although
9496 FLDMD is technically not supported until ARMv6, it is believed
9497 that all VFP implementations support its use in this context. */
9499 static void
9500 vfp_output_fldmd (FILE * stream, unsigned int base, int reg, int count)
9502 int i;
9504 /* Workaround ARM10 VFPr1 bug. */
9505 if (count == 2 && !arm_arch6)
9507 if (reg == 15)
9508 reg--;
9509 count++;
9512 /* FLDMD may not load more than 16 doubleword registers at a time. Split the
9513 load into multiple parts if we have to handle more than 16 registers. */
9514 if (count > 16)
9516 vfp_output_fldmd (stream, base, reg, 16);
9517 vfp_output_fldmd (stream, base, reg + 16, count - 16);
9518 return;
9521 fputc ('\t', stream);
9522 asm_fprintf (stream, "fldmfdd\t%r!, {", base);
9524 for (i = reg; i < reg + count; i++)
9526 if (i > reg)
9527 fputs (", ", stream);
9528 asm_fprintf (stream, "d%d", i);
9530 fputs ("}\n", stream);
9535 /* Output the assembly for a store multiple. */
9537 const char *
9538 vfp_output_fstmd (rtx * operands)
9540 char pattern[100];
9541 int p;
9542 int base;
9543 int i;
9545 strcpy (pattern, "fstmfdd\t%m0!, {%P1");
9546 p = strlen (pattern);
9548 gcc_assert (GET_CODE (operands[1]) == REG);
9550 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
9551 for (i = 1; i < XVECLEN (operands[2], 0); i++)
9553 p += sprintf (&pattern[p], ", d%d", base + i);
9555 strcpy (&pattern[p], "}");
9557 output_asm_insn (pattern, operands);
9558 return "";
9562 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
9563 number of bytes pushed. */
9565 static int
9566 vfp_emit_fstmd (int base_reg, int count)
9568 rtx par;
9569 rtx dwarf;
9570 rtx tmp, reg;
9571 int i;
9573 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
9574 register pairs are stored by a store multiple insn. We avoid this
9575 by pushing an extra pair. */
9576 if (count == 2 && !arm_arch6)
9578 if (base_reg == LAST_VFP_REGNUM - 3)
9579 base_reg -= 2;
9580 count++;
9583 /* FSTMD may not store more than 16 doubleword registers at once. Split
9584 larger stores into multiple parts (up to a maximum of two, in
9585 practice). */
9586 if (count > 16)
9588 int saved;
9589 /* NOTE: base_reg is an internal register number, so each D register
9590 counts as 2. */
9591 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
9592 saved += vfp_emit_fstmd (base_reg, 16);
9593 return saved;
9596 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9597 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9599 reg = gen_rtx_REG (DFmode, base_reg);
9600 base_reg += 2;
9602 XVECEXP (par, 0, 0)
9603 = gen_rtx_SET (VOIDmode,
9604 gen_frame_mem (BLKmode,
9605 gen_rtx_PRE_DEC (BLKmode,
9606 stack_pointer_rtx)),
9607 gen_rtx_UNSPEC (BLKmode,
9608 gen_rtvec (1, reg),
9609 UNSPEC_PUSH_MULT));
9611 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9612 plus_constant (stack_pointer_rtx, -(count * 8)));
9613 RTX_FRAME_RELATED_P (tmp) = 1;
9614 XVECEXP (dwarf, 0, 0) = tmp;
9616 tmp = gen_rtx_SET (VOIDmode,
9617 gen_frame_mem (DFmode, stack_pointer_rtx),
9618 reg);
9619 RTX_FRAME_RELATED_P (tmp) = 1;
9620 XVECEXP (dwarf, 0, 1) = tmp;
9622 for (i = 1; i < count; i++)
9624 reg = gen_rtx_REG (DFmode, base_reg);
9625 base_reg += 2;
9626 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9628 tmp = gen_rtx_SET (VOIDmode,
9629 gen_frame_mem (DFmode,
9630 plus_constant (stack_pointer_rtx,
9631 i * 8)),
9632 reg);
9633 RTX_FRAME_RELATED_P (tmp) = 1;
9634 XVECEXP (dwarf, 0, i + 1) = tmp;
9637 par = emit_insn (par);
9638 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9639 REG_NOTES (par));
9640 RTX_FRAME_RELATED_P (par) = 1;
9642 return count * 8;
9645 /* Emit a call instruction with pattern PAT. ADDR is the address of
9646 the call target. */
9648 void
9649 arm_emit_call_insn (rtx pat, rtx addr)
9651 rtx insn;
9653 insn = emit_call_insn (pat);
9655 /* The PIC register is live on entry to VxWorks PIC PLT entries.
9656 If the call might use such an entry, add a use of the PIC register
9657 to the instruction's CALL_INSN_FUNCTION_USAGE. */
9658 if (TARGET_VXWORKS_RTP
9659 && flag_pic
9660 && GET_CODE (addr) == SYMBOL_REF
9661 && (SYMBOL_REF_DECL (addr)
9662 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
9663 : !SYMBOL_REF_LOCAL_P (addr)))
9665 require_pic_register ();
9666 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
9670 /* Output a 'call' insn. */
9671 const char *
9672 output_call (rtx *operands)
9674 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
9676 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
9677 if (REGNO (operands[0]) == LR_REGNUM)
9679 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
9680 output_asm_insn ("mov%?\t%0, %|lr", operands);
9683 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9685 if (TARGET_INTERWORK || arm_arch4t)
9686 output_asm_insn ("bx%?\t%0", operands);
9687 else
9688 output_asm_insn ("mov%?\t%|pc, %0", operands);
9690 return "";
9693 /* Output a 'call' insn that is a reference in memory. */
9694 const char *
9695 output_call_mem (rtx *operands)
9697 if (TARGET_INTERWORK && !arm_arch5)
9699 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9700 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9701 output_asm_insn ("bx%?\t%|ip", operands);
9703 else if (regno_use_in (LR_REGNUM, operands[0]))
9705 /* LR is used in the memory address. We load the address in the
9706 first instruction. It's safe to use IP as the target of the
9707 load since the call will kill it anyway. */
9708 output_asm_insn ("ldr%?\t%|ip, %0", operands);
9709 if (arm_arch5)
9710 output_asm_insn ("blx%?\t%|ip", operands);
9711 else
9713 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9714 if (arm_arch4t)
9715 output_asm_insn ("bx%?\t%|ip", operands);
9716 else
9717 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
9720 else
9722 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
9723 output_asm_insn ("ldr%?\t%|pc, %0", operands);
9726 return "";
9730 /* Output a move from arm registers to an fpa registers.
9731 OPERANDS[0] is an fpa register.
9732 OPERANDS[1] is the first registers of an arm register pair. */
9733 const char *
9734 output_mov_long_double_fpa_from_arm (rtx *operands)
9736 int arm_reg0 = REGNO (operands[1]);
9737 rtx ops[3];
9739 gcc_assert (arm_reg0 != IP_REGNUM);
9741 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9742 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9743 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9745 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9746 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
9748 return "";
9751 /* Output a move from an fpa register to arm registers.
9752 OPERANDS[0] is the first registers of an arm register pair.
9753 OPERANDS[1] is an fpa register. */
9754 const char *
9755 output_mov_long_double_arm_from_fpa (rtx *operands)
9757 int arm_reg0 = REGNO (operands[0]);
9758 rtx ops[3];
9760 gcc_assert (arm_reg0 != IP_REGNUM);
9762 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9763 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9764 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
9766 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
9767 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1, %2}", ops);
9768 return "";
9771 /* Output a move from arm registers to arm registers of a long double
9772 OPERANDS[0] is the destination.
9773 OPERANDS[1] is the source. */
9774 const char *
9775 output_mov_long_double_arm_from_arm (rtx *operands)
9777 /* We have to be careful here because the two might overlap. */
9778 int dest_start = REGNO (operands[0]);
9779 int src_start = REGNO (operands[1]);
9780 rtx ops[2];
9781 int i;
9783 if (dest_start < src_start)
9785 for (i = 0; i < 3; i++)
9787 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9788 ops[1] = gen_rtx_REG (SImode, src_start + i);
9789 output_asm_insn ("mov%?\t%0, %1", ops);
9792 else
9794 for (i = 2; i >= 0; i--)
9796 ops[0] = gen_rtx_REG (SImode, dest_start + i);
9797 ops[1] = gen_rtx_REG (SImode, src_start + i);
9798 output_asm_insn ("mov%?\t%0, %1", ops);
9802 return "";
9806 /* Output a move from arm registers to an fpa registers.
9807 OPERANDS[0] is an fpa register.
9808 OPERANDS[1] is the first registers of an arm register pair. */
9809 const char *
9810 output_mov_double_fpa_from_arm (rtx *operands)
9812 int arm_reg0 = REGNO (operands[1]);
9813 rtx ops[2];
9815 gcc_assert (arm_reg0 != IP_REGNUM);
9817 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9818 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9819 output_asm_insn ("stm%(fd%)\t%|sp!, {%0, %1}", ops);
9820 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
9821 return "";
9824 /* Output a move from an fpa register to arm registers.
9825 OPERANDS[0] is the first registers of an arm register pair.
9826 OPERANDS[1] is an fpa register. */
9827 const char *
9828 output_mov_double_arm_from_fpa (rtx *operands)
9830 int arm_reg0 = REGNO (operands[0]);
9831 rtx ops[2];
9833 gcc_assert (arm_reg0 != IP_REGNUM);
9835 ops[0] = gen_rtx_REG (SImode, arm_reg0);
9836 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
9837 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
9838 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
9839 return "";
9842 /* Output a move between double words.
9843 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
9844 or MEM<-REG and all MEMs must be offsettable addresses. */
9845 const char *
9846 output_move_double (rtx *operands)
9848 enum rtx_code code0 = GET_CODE (operands[0]);
9849 enum rtx_code code1 = GET_CODE (operands[1]);
9850 rtx otherops[3];
9852 if (code0 == REG)
9854 int reg0 = REGNO (operands[0]);
9856 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9858 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
9860 switch (GET_CODE (XEXP (operands[1], 0)))
9862 case REG:
9863 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
9864 break;
9866 case PRE_INC:
9867 gcc_assert (TARGET_LDRD);
9868 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
9869 break;
9871 case PRE_DEC:
9872 if (TARGET_LDRD)
9873 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
9874 else
9875 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
9876 break;
9878 case POST_INC:
9879 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
9880 break;
9882 case POST_DEC:
9883 gcc_assert (TARGET_LDRD);
9884 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
9885 break;
9887 case PRE_MODIFY:
9888 case POST_MODIFY:
9889 otherops[0] = operands[0];
9890 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
9891 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
9893 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
9895 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9897 /* Registers overlap so split out the increment. */
9898 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9899 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
9901 else
9903 /* IWMMXT allows offsets larger than ldrd can handle,
9904 fix these up with a pair of ldr. */
9905 if (GET_CODE (otherops[2]) == CONST_INT
9906 && (INTVAL(otherops[2]) <= -256
9907 || INTVAL(otherops[2]) >= 256))
9909 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
9910 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9911 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9913 else
9914 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
9917 else
9919 /* IWMMXT allows offsets larger than ldrd can handle,
9920 fix these up with a pair of ldr. */
9921 if (GET_CODE (otherops[2]) == CONST_INT
9922 && (INTVAL(otherops[2]) <= -256
9923 || INTVAL(otherops[2]) >= 256))
9925 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
9926 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
9927 otherops[0] = operands[0];
9928 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
9930 else
9931 /* We only allow constant increments, so this is safe. */
9932 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
9934 break;
9936 case LABEL_REF:
9937 case CONST:
9938 output_asm_insn ("adr%?\t%0, %1", operands);
9939 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
9940 break;
9942 /* ??? This needs checking for thumb2. */
9943 default:
9944 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
9945 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
9947 otherops[0] = operands[0];
9948 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
9949 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
9951 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
9953 if (GET_CODE (otherops[2]) == CONST_INT)
9955 switch ((int) INTVAL (otherops[2]))
9957 case -8:
9958 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
9959 return "";
9960 case -4:
9961 if (TARGET_THUMB2)
9962 break;
9963 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
9964 return "";
9965 case 4:
9966 if (TARGET_THUMB2)
9967 break;
9968 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
9969 return "";
9972 if (TARGET_LDRD
9973 && (GET_CODE (otherops[2]) == REG
9974 || (GET_CODE (otherops[2]) == CONST_INT
9975 && INTVAL (otherops[2]) > -256
9976 && INTVAL (otherops[2]) < 256)))
9978 if (reg_overlap_mentioned_p (otherops[0],
9979 otherops[2]))
9981 /* Swap base and index registers over to
9982 avoid a conflict. */
9983 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
9984 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
9986 /* If both registers conflict, it will usually
9987 have been fixed by a splitter. */
9988 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
9990 output_asm_insn ("add%?\t%1, %1, %2", otherops);
9991 output_asm_insn ("ldr%(d%)\t%0, [%1]",
9992 otherops);
9994 else
9995 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
9996 return "";
9999 if (GET_CODE (otherops[2]) == CONST_INT)
10001 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
10002 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
10003 else
10004 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10006 else
10007 output_asm_insn ("add%?\t%0, %1, %2", otherops);
10009 else
10010 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
10012 return "ldm%(ia%)\t%0, %M0";
10014 else
10016 otherops[1] = adjust_address (operands[1], SImode, 4);
10017 /* Take care of overlapping base/data reg. */
10018 if (reg_mentioned_p (operands[0], operands[1]))
10020 output_asm_insn ("ldr%?\t%0, %1", otherops);
10021 output_asm_insn ("ldr%?\t%0, %1", operands);
10023 else
10025 output_asm_insn ("ldr%?\t%0, %1", operands);
10026 output_asm_insn ("ldr%?\t%0, %1", otherops);
10031 else
10033 /* Constraints should ensure this. */
10034 gcc_assert (code0 == MEM && code1 == REG);
10035 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
10037 switch (GET_CODE (XEXP (operands[0], 0)))
10039 case REG:
10040 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10041 break;
10043 case PRE_INC:
10044 gcc_assert (TARGET_LDRD);
10045 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
10046 break;
10048 case PRE_DEC:
10049 if (TARGET_LDRD)
10050 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
10051 else
10052 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
10053 break;
10055 case POST_INC:
10056 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
10057 break;
10059 case POST_DEC:
10060 gcc_assert (TARGET_LDRD);
10061 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
10062 break;
10064 case PRE_MODIFY:
10065 case POST_MODIFY:
10066 otherops[0] = operands[1];
10067 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
10068 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
10070 /* IWMMXT allows offsets larger than ldrd can handle,
10071 fix these up with a pair of ldr. */
10072 if (GET_CODE (otherops[2]) == CONST_INT
10073 && (INTVAL(otherops[2]) <= -256
10074 || INTVAL(otherops[2]) >= 256))
10076 rtx reg1;
10077 reg1 = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10078 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10080 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
10081 otherops[0] = reg1;
10082 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10084 else
10086 otherops[0] = reg1;
10087 output_asm_insn ("ldr%?\t%0, [%1, #4]", otherops);
10088 otherops[0] = operands[1];
10089 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
10092 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
10093 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
10094 else
10095 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
10096 break;
10098 case PLUS:
10099 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
10100 if (GET_CODE (otherops[2]) == CONST_INT)
10102 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
10104 case -8:
10105 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
10106 return "";
10108 case -4:
10109 if (TARGET_THUMB2)
10110 break;
10111 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
10112 return "";
10114 case 4:
10115 if (TARGET_THUMB2)
10116 break;
10117 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
10118 return "";
10121 if (TARGET_LDRD
10122 && (GET_CODE (otherops[2]) == REG
10123 || (GET_CODE (otherops[2]) == CONST_INT
10124 && INTVAL (otherops[2]) > -256
10125 && INTVAL (otherops[2]) < 256)))
10127 otherops[0] = operands[1];
10128 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
10129 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
10130 return "";
10132 /* Fall through */
10134 default:
10135 otherops[0] = adjust_address (operands[0], SImode, 4);
10136 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
10137 output_asm_insn ("str%?\t%1, %0", operands);
10138 output_asm_insn ("str%?\t%1, %0", otherops);
10142 return "";
10145 /* Output a move, load or store for quad-word vectors in ARM registers. Only
10146 handles MEMs accepted by neon_vector_mem_operand with CORE=true. */
10148 const char *
10149 output_move_quad (rtx *operands)
10151 if (REG_P (operands[0]))
10153 /* Load, or reg->reg move. */
10155 if (MEM_P (operands[1]))
10157 switch (GET_CODE (XEXP (operands[1], 0)))
10159 case REG:
10160 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
10161 break;
10163 case LABEL_REF:
10164 case CONST:
10165 output_asm_insn ("adr%?\t%0, %1", operands);
10166 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
10167 break;
10169 default:
10170 gcc_unreachable ();
10173 else
10175 rtx ops[2];
10176 int dest, src, i;
10178 gcc_assert (REG_P (operands[1]));
10180 dest = REGNO (operands[0]);
10181 src = REGNO (operands[1]);
10183 /* This seems pretty dumb, but hopefully GCC won't try to do it
10184 very often. */
10185 if (dest < src)
10186 for (i = 0; i < 4; i++)
10188 ops[0] = gen_rtx_REG (SImode, dest + i);
10189 ops[1] = gen_rtx_REG (SImode, src + i);
10190 output_asm_insn ("mov%?\t%0, %1", ops);
10192 else
10193 for (i = 3; i >= 0; i--)
10195 ops[0] = gen_rtx_REG (SImode, dest + i);
10196 ops[1] = gen_rtx_REG (SImode, src + i);
10197 output_asm_insn ("mov%?\t%0, %1", ops);
10201 else
10203 gcc_assert (MEM_P (operands[0]));
10204 gcc_assert (REG_P (operands[1]));
10205 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
10207 switch (GET_CODE (XEXP (operands[0], 0)))
10209 case REG:
10210 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
10211 break;
10213 default:
10214 gcc_unreachable ();
10218 return "";
10221 /* Output a VFP load or store instruction. */
10223 const char *
10224 output_move_vfp (rtx *operands)
10226 rtx reg, mem, addr, ops[2];
10227 int load = REG_P (operands[0]);
10228 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
10229 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
10230 const char *template;
10231 char buff[50];
10232 enum machine_mode mode;
10234 reg = operands[!load];
10235 mem = operands[load];
10237 mode = GET_MODE (reg);
10239 gcc_assert (REG_P (reg));
10240 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
10241 gcc_assert (mode == SFmode
10242 || mode == DFmode
10243 || mode == SImode
10244 || mode == DImode
10245 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
10246 gcc_assert (MEM_P (mem));
10248 addr = XEXP (mem, 0);
10250 switch (GET_CODE (addr))
10252 case PRE_DEC:
10253 template = "f%smdb%c%%?\t%%0!, {%%%s1}%s";
10254 ops[0] = XEXP (addr, 0);
10255 ops[1] = reg;
10256 break;
10258 case POST_INC:
10259 template = "f%smia%c%%?\t%%0!, {%%%s1}%s";
10260 ops[0] = XEXP (addr, 0);
10261 ops[1] = reg;
10262 break;
10264 default:
10265 template = "f%s%c%%?\t%%%s0, %%1%s";
10266 ops[0] = reg;
10267 ops[1] = mem;
10268 break;
10271 sprintf (buff, template,
10272 load ? "ld" : "st",
10273 dp ? 'd' : 's',
10274 dp ? "P" : "",
10275 integer_p ? "\t%@ int" : "");
10276 output_asm_insn (buff, ops);
10278 return "";
10281 /* Output a Neon quad-word load or store, or a load or store for
10282 larger structure modes. We could also support post-modify forms using
10283 VLD1/VST1 (for the vectorizer, and perhaps otherwise), but we don't do that
10284 yet.
10285 WARNING: The ordering of elements in memory is weird in big-endian mode,
10286 because we use VSTM instead of VST1, to make it easy to make vector stores
10287 via ARM registers write values in the same order as stores direct from Neon
10288 registers. For example, the byte ordering of a quadword vector with 16-byte
10289 elements like this:
10291 [e7:e6:e5:e4:e3:e2:e1:e0] (highest-numbered element first)
10293 will be (with lowest address first, h = most-significant byte,
10294 l = least-significant byte of element):
10296 [e3h, e3l, e2h, e2l, e1h, e1l, e0h, e0l,
10297 e7h, e7l, e6h, e6l, e5h, e5l, e4h, e4l]
10299 When necessary, quadword registers (dN, dN+1) are moved to ARM registers from
10300 rN in the order:
10302 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
10304 So that STM/LDM can be used on vectors in ARM registers, and the same memory
10305 layout will result as if VSTM/VLDM were used. */
10307 const char *
10308 output_move_neon (rtx *operands)
10310 rtx reg, mem, addr, ops[2];
10311 int regno, load = REG_P (operands[0]);
10312 const char *template;
10313 char buff[50];
10314 enum machine_mode mode;
10316 reg = operands[!load];
10317 mem = operands[load];
10319 mode = GET_MODE (reg);
10321 gcc_assert (REG_P (reg));
10322 regno = REGNO (reg);
10323 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
10324 || NEON_REGNO_OK_FOR_QUAD (regno));
10325 gcc_assert (VALID_NEON_DREG_MODE (mode)
10326 || VALID_NEON_QREG_MODE (mode)
10327 || VALID_NEON_STRUCT_MODE (mode));
10328 gcc_assert (MEM_P (mem));
10330 addr = XEXP (mem, 0);
10332 /* Strip off const from addresses like (const (plus (...))). */
10333 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
10334 addr = XEXP (addr, 0);
10336 switch (GET_CODE (addr))
10338 case POST_INC:
10339 template = "v%smia%%?\t%%0!, %%h1";
10340 ops[0] = XEXP (addr, 0);
10341 ops[1] = reg;
10342 break;
10344 case POST_MODIFY:
10345 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
10346 gcc_unreachable ();
10348 case LABEL_REF:
10349 case PLUS:
10351 int nregs = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
10352 int i;
10353 int overlap = -1;
10354 for (i = 0; i < nregs; i++)
10356 /* We're only using DImode here because it's a convenient size. */
10357 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
10358 ops[1] = adjust_address (mem, SImode, 8 * i);
10359 if (reg_overlap_mentioned_p (ops[0], mem))
10361 gcc_assert (overlap == -1);
10362 overlap = i;
10364 else
10366 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10367 output_asm_insn (buff, ops);
10370 if (overlap != -1)
10372 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
10373 ops[1] = adjust_address (mem, SImode, 8 * overlap);
10374 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
10375 output_asm_insn (buff, ops);
10378 return "";
10381 default:
10382 template = "v%smia%%?\t%%m0, %%h1";
10383 ops[0] = mem;
10384 ops[1] = reg;
10387 sprintf (buff, template, load ? "ld" : "st");
10388 output_asm_insn (buff, ops);
10390 return "";
10393 /* Output an ADD r, s, #n where n may be too big for one instruction.
10394 If adding zero to one register, output nothing. */
10395 const char *
10396 output_add_immediate (rtx *operands)
10398 HOST_WIDE_INT n = INTVAL (operands[2]);
10400 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
10402 if (n < 0)
10403 output_multi_immediate (operands,
10404 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
10405 -n);
10406 else
10407 output_multi_immediate (operands,
10408 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
10412 return "";
10415 /* Output a multiple immediate operation.
10416 OPERANDS is the vector of operands referred to in the output patterns.
10417 INSTR1 is the output pattern to use for the first constant.
10418 INSTR2 is the output pattern to use for subsequent constants.
10419 IMMED_OP is the index of the constant slot in OPERANDS.
10420 N is the constant value. */
10421 static const char *
10422 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
10423 int immed_op, HOST_WIDE_INT n)
10425 #if HOST_BITS_PER_WIDE_INT > 32
10426 n &= 0xffffffff;
10427 #endif
10429 if (n == 0)
10431 /* Quick and easy output. */
10432 operands[immed_op] = const0_rtx;
10433 output_asm_insn (instr1, operands);
10435 else
10437 int i;
10438 const char * instr = instr1;
10440 /* Note that n is never zero here (which would give no output). */
10441 for (i = 0; i < 32; i += 2)
10443 if (n & (3 << i))
10445 operands[immed_op] = GEN_INT (n & (255 << i));
10446 output_asm_insn (instr, operands);
10447 instr = instr2;
10448 i += 6;
10453 return "";
10456 /* Return the name of a shifter operation. */
10457 static const char *
10458 arm_shift_nmem(enum rtx_code code)
10460 switch (code)
10462 case ASHIFT:
10463 return ARM_LSL_NAME;
10465 case ASHIFTRT:
10466 return "asr";
10468 case LSHIFTRT:
10469 return "lsr";
10471 case ROTATERT:
10472 return "ror";
10474 default:
10475 abort();
10479 /* Return the appropriate ARM instruction for the operation code.
10480 The returned result should not be overwritten. OP is the rtx of the
10481 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
10482 was shifted. */
10483 const char *
10484 arithmetic_instr (rtx op, int shift_first_arg)
10486 switch (GET_CODE (op))
10488 case PLUS:
10489 return "add";
10491 case MINUS:
10492 return shift_first_arg ? "rsb" : "sub";
10494 case IOR:
10495 return "orr";
10497 case XOR:
10498 return "eor";
10500 case AND:
10501 return "and";
10503 case ASHIFT:
10504 case ASHIFTRT:
10505 case LSHIFTRT:
10506 case ROTATERT:
10507 return arm_shift_nmem(GET_CODE(op));
10509 default:
10510 gcc_unreachable ();
10514 /* Ensure valid constant shifts and return the appropriate shift mnemonic
10515 for the operation code. The returned result should not be overwritten.
10516 OP is the rtx code of the shift.
10517 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
10518 shift. */
10519 static const char *
10520 shift_op (rtx op, HOST_WIDE_INT *amountp)
10522 const char * mnem;
10523 enum rtx_code code = GET_CODE (op);
10525 switch (GET_CODE (XEXP (op, 1)))
10527 case REG:
10528 case SUBREG:
10529 *amountp = -1;
10530 break;
10532 case CONST_INT:
10533 *amountp = INTVAL (XEXP (op, 1));
10534 break;
10536 default:
10537 gcc_unreachable ();
10540 switch (code)
10542 case ROTATE:
10543 gcc_assert (*amountp != -1);
10544 *amountp = 32 - *amountp;
10545 code = ROTATERT;
10547 /* Fall through. */
10549 case ASHIFT:
10550 case ASHIFTRT:
10551 case LSHIFTRT:
10552 case ROTATERT:
10553 mnem = arm_shift_nmem(code);
10554 break;
10556 case MULT:
10557 /* We never have to worry about the amount being other than a
10558 power of 2, since this case can never be reloaded from a reg. */
10559 gcc_assert (*amountp != -1);
10560 *amountp = int_log2 (*amountp);
10561 return ARM_LSL_NAME;
10563 default:
10564 gcc_unreachable ();
10567 if (*amountp != -1)
10569 /* This is not 100% correct, but follows from the desire to merge
10570 multiplication by a power of 2 with the recognizer for a
10571 shift. >=32 is not a valid shift for "lsl", so we must try and
10572 output a shift that produces the correct arithmetical result.
10573 Using lsr #32 is identical except for the fact that the carry bit
10574 is not set correctly if we set the flags; but we never use the
10575 carry bit from such an operation, so we can ignore that. */
10576 if (code == ROTATERT)
10577 /* Rotate is just modulo 32. */
10578 *amountp &= 31;
10579 else if (*amountp != (*amountp & 31))
10581 if (code == ASHIFT)
10582 mnem = "lsr";
10583 *amountp = 32;
10586 /* Shifts of 0 are no-ops. */
10587 if (*amountp == 0)
10588 return NULL;
10591 return mnem;
10594 /* Obtain the shift from the POWER of two. */
10596 static HOST_WIDE_INT
10597 int_log2 (HOST_WIDE_INT power)
10599 HOST_WIDE_INT shift = 0;
10601 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
10603 gcc_assert (shift <= 31);
10604 shift++;
10607 return shift;
10610 /* Output a .ascii pseudo-op, keeping track of lengths. This is
10611 because /bin/as is horribly restrictive. The judgement about
10612 whether or not each character is 'printable' (and can be output as
10613 is) or not (and must be printed with an octal escape) must be made
10614 with reference to the *host* character set -- the situation is
10615 similar to that discussed in the comments above pp_c_char in
10616 c-pretty-print.c. */
10618 #define MAX_ASCII_LEN 51
10620 void
10621 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
10623 int i;
10624 int len_so_far = 0;
10626 fputs ("\t.ascii\t\"", stream);
10628 for (i = 0; i < len; i++)
10630 int c = p[i];
10632 if (len_so_far >= MAX_ASCII_LEN)
10634 fputs ("\"\n\t.ascii\t\"", stream);
10635 len_so_far = 0;
10638 if (ISPRINT (c))
10640 if (c == '\\' || c == '\"')
10642 putc ('\\', stream);
10643 len_so_far++;
10645 putc (c, stream);
10646 len_so_far++;
10648 else
10650 fprintf (stream, "\\%03o", c);
10651 len_so_far += 4;
10655 fputs ("\"\n", stream);
10658 /* Compute the register save mask for registers 0 through 12
10659 inclusive. This code is used by arm_compute_save_reg_mask. */
10661 static unsigned long
10662 arm_compute_save_reg0_reg12_mask (void)
10664 unsigned long func_type = arm_current_func_type ();
10665 unsigned long save_reg_mask = 0;
10666 unsigned int reg;
10668 if (IS_INTERRUPT (func_type))
10670 unsigned int max_reg;
10671 /* Interrupt functions must not corrupt any registers,
10672 even call clobbered ones. If this is a leaf function
10673 we can just examine the registers used by the RTL, but
10674 otherwise we have to assume that whatever function is
10675 called might clobber anything, and so we have to save
10676 all the call-clobbered registers as well. */
10677 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
10678 /* FIQ handlers have registers r8 - r12 banked, so
10679 we only need to check r0 - r7, Normal ISRs only
10680 bank r14 and r15, so we must check up to r12.
10681 r13 is the stack pointer which is always preserved,
10682 so we do not need to consider it here. */
10683 max_reg = 7;
10684 else
10685 max_reg = 12;
10687 for (reg = 0; reg <= max_reg; reg++)
10688 if (df_regs_ever_live_p (reg)
10689 || (! current_function_is_leaf && call_used_regs[reg]))
10690 save_reg_mask |= (1 << reg);
10692 /* Also save the pic base register if necessary. */
10693 if (flag_pic
10694 && !TARGET_SINGLE_PIC_BASE
10695 && arm_pic_register != INVALID_REGNUM
10696 && current_function_uses_pic_offset_table)
10697 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10699 else
10701 /* In arm mode we handle r11 (FP) as a special case. */
10702 unsigned last_reg = TARGET_ARM ? 10 : 11;
10704 /* In the normal case we only need to save those registers
10705 which are call saved and which are used by this function. */
10706 for (reg = 0; reg <= last_reg; reg++)
10707 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
10708 save_reg_mask |= (1 << reg);
10710 /* Handle the frame pointer as a special case. */
10711 if (! TARGET_APCS_FRAME
10712 && ! frame_pointer_needed
10713 && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
10714 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
10715 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10716 else if (! TARGET_APCS_FRAME
10717 && ! frame_pointer_needed
10718 && df_regs_ever_live_p (HARD_FRAME_POINTER_REGNUM)
10719 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
10720 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
10722 /* If we aren't loading the PIC register,
10723 don't stack it even though it may be live. */
10724 if (flag_pic
10725 && !TARGET_SINGLE_PIC_BASE
10726 && arm_pic_register != INVALID_REGNUM
10727 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
10728 || current_function_uses_pic_offset_table))
10729 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10731 /* The prologue will copy SP into R0, so save it. */
10732 if (IS_STACKALIGN (func_type))
10733 save_reg_mask |= 1;
10736 /* Save registers so the exception handler can modify them. */
10737 if (current_function_calls_eh_return)
10739 unsigned int i;
10741 for (i = 0; ; i++)
10743 reg = EH_RETURN_DATA_REGNO (i);
10744 if (reg == INVALID_REGNUM)
10745 break;
10746 save_reg_mask |= 1 << reg;
10750 return save_reg_mask;
10754 /* Compute a bit mask of which registers need to be
10755 saved on the stack for the current function. */
10757 static unsigned long
10758 arm_compute_save_reg_mask (void)
10760 unsigned int save_reg_mask = 0;
10761 unsigned long func_type = arm_current_func_type ();
10762 unsigned int reg;
10764 if (IS_NAKED (func_type))
10765 /* This should never really happen. */
10766 return 0;
10768 /* If we are creating a stack frame, then we must save the frame pointer,
10769 IP (which will hold the old stack pointer), LR and the PC. */
10770 if (frame_pointer_needed && TARGET_ARM)
10771 save_reg_mask |=
10772 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
10773 | (1 << IP_REGNUM)
10774 | (1 << LR_REGNUM)
10775 | (1 << PC_REGNUM);
10777 /* Volatile functions do not return, so there
10778 is no need to save any other registers. */
10779 if (IS_VOLATILE (func_type))
10780 return save_reg_mask;
10782 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
10784 /* Decide if we need to save the link register.
10785 Interrupt routines have their own banked link register,
10786 so they never need to save it.
10787 Otherwise if we do not use the link register we do not need to save
10788 it. If we are pushing other registers onto the stack however, we
10789 can save an instruction in the epilogue by pushing the link register
10790 now and then popping it back into the PC. This incurs extra memory
10791 accesses though, so we only do it when optimizing for size, and only
10792 if we know that we will not need a fancy return sequence. */
10793 if (df_regs_ever_live_p (LR_REGNUM)
10794 || (save_reg_mask
10795 && optimize_size
10796 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
10797 && !current_function_calls_eh_return))
10798 save_reg_mask |= 1 << LR_REGNUM;
10800 if (cfun->machine->lr_save_eliminated)
10801 save_reg_mask &= ~ (1 << LR_REGNUM);
10803 if (TARGET_REALLY_IWMMXT
10804 && ((bit_count (save_reg_mask)
10805 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
10807 /* The total number of registers that are going to be pushed
10808 onto the stack is odd. We need to ensure that the stack
10809 is 64-bit aligned before we start to save iWMMXt registers,
10810 and also before we start to create locals. (A local variable
10811 might be a double or long long which we will load/store using
10812 an iWMMXt instruction). Therefore we need to push another
10813 ARM register, so that the stack will be 64-bit aligned. We
10814 try to avoid using the arg registers (r0 -r3) as they might be
10815 used to pass values in a tail call. */
10816 for (reg = 4; reg <= 12; reg++)
10817 if ((save_reg_mask & (1 << reg)) == 0)
10818 break;
10820 if (reg <= 12)
10821 save_reg_mask |= (1 << reg);
10822 else
10824 cfun->machine->sibcall_blocked = 1;
10825 save_reg_mask |= (1 << 3);
10829 /* We may need to push an additional register for use initializing the
10830 PIC base register. */
10831 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
10832 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
10834 reg = thumb_find_work_register (1 << 4);
10835 if (!call_used_regs[reg])
10836 save_reg_mask |= (1 << reg);
10839 return save_reg_mask;
10843 /* Compute a bit mask of which registers need to be
10844 saved on the stack for the current function. */
10845 static unsigned long
10846 thumb1_compute_save_reg_mask (void)
10848 unsigned long mask;
10849 unsigned reg;
10851 mask = 0;
10852 for (reg = 0; reg < 12; reg ++)
10853 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
10854 mask |= 1 << reg;
10856 if (flag_pic
10857 && !TARGET_SINGLE_PIC_BASE
10858 && arm_pic_register != INVALID_REGNUM
10859 && current_function_uses_pic_offset_table)
10860 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
10862 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
10863 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
10864 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
10866 /* LR will also be pushed if any lo regs are pushed. */
10867 if (mask & 0xff || thumb_force_lr_save ())
10868 mask |= (1 << LR_REGNUM);
10870 /* Make sure we have a low work register if we need one.
10871 We will need one if we are going to push a high register,
10872 but we are not currently intending to push a low register. */
10873 if ((mask & 0xff) == 0
10874 && ((mask & 0x0f00) || TARGET_BACKTRACE))
10876 /* Use thumb_find_work_register to choose which register
10877 we will use. If the register is live then we will
10878 have to push it. Use LAST_LO_REGNUM as our fallback
10879 choice for the register to select. */
10880 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
10881 /* Make sure the register returned by thumb_find_work_register is
10882 not part of the return value. */
10883 if (reg * UNITS_PER_WORD <= arm_size_return_regs ())
10884 reg = LAST_LO_REGNUM;
10886 if (! call_used_regs[reg])
10887 mask |= 1 << reg;
10890 return mask;
10894 /* Return the number of bytes required to save VFP registers. */
10895 static int
10896 arm_get_vfp_saved_size (void)
10898 unsigned int regno;
10899 int count;
10900 int saved;
10902 saved = 0;
10903 /* Space for saved VFP registers. */
10904 if (TARGET_HARD_FLOAT && TARGET_VFP)
10906 count = 0;
10907 for (regno = FIRST_VFP_REGNUM;
10908 regno < LAST_VFP_REGNUM;
10909 regno += 2)
10911 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
10912 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
10914 if (count > 0)
10916 /* Workaround ARM10 VFPr1 bug. */
10917 if (count == 2 && !arm_arch6)
10918 count++;
10919 saved += count * 8;
10921 count = 0;
10923 else
10924 count++;
10926 if (count > 0)
10928 if (count == 2 && !arm_arch6)
10929 count++;
10930 saved += count * 8;
10933 return saved;
10937 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
10938 everything bar the final return instruction. */
10939 const char *
10940 output_return_instruction (rtx operand, int really_return, int reverse)
10942 char conditional[10];
10943 char instr[100];
10944 unsigned reg;
10945 unsigned long live_regs_mask;
10946 unsigned long func_type;
10947 arm_stack_offsets *offsets;
10949 func_type = arm_current_func_type ();
10951 if (IS_NAKED (func_type))
10952 return "";
10954 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
10956 /* If this function was declared non-returning, and we have
10957 found a tail call, then we have to trust that the called
10958 function won't return. */
10959 if (really_return)
10961 rtx ops[2];
10963 /* Otherwise, trap an attempted return by aborting. */
10964 ops[0] = operand;
10965 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
10966 : "abort");
10967 assemble_external_libcall (ops[1]);
10968 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
10971 return "";
10974 gcc_assert (!current_function_calls_alloca || really_return);
10976 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
10978 return_used_this_function = 1;
10980 live_regs_mask = arm_compute_save_reg_mask ();
10982 if (live_regs_mask)
10984 const char * return_reg;
10986 /* If we do not have any special requirements for function exit
10987 (e.g. interworking) then we can load the return address
10988 directly into the PC. Otherwise we must load it into LR. */
10989 if (really_return
10990 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
10991 return_reg = reg_names[PC_REGNUM];
10992 else
10993 return_reg = reg_names[LR_REGNUM];
10995 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
10997 /* There are three possible reasons for the IP register
10998 being saved. 1) a stack frame was created, in which case
10999 IP contains the old stack pointer, or 2) an ISR routine
11000 corrupted it, or 3) it was saved to align the stack on
11001 iWMMXt. In case 1, restore IP into SP, otherwise just
11002 restore IP. */
11003 if (frame_pointer_needed)
11005 live_regs_mask &= ~ (1 << IP_REGNUM);
11006 live_regs_mask |= (1 << SP_REGNUM);
11008 else
11009 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
11012 /* On some ARM architectures it is faster to use LDR rather than
11013 LDM to load a single register. On other architectures, the
11014 cost is the same. In 26 bit mode, or for exception handlers,
11015 we have to use LDM to load the PC so that the CPSR is also
11016 restored. */
11017 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11018 if (live_regs_mask == (1U << reg))
11019 break;
11021 if (reg <= LAST_ARM_REGNUM
11022 && (reg != LR_REGNUM
11023 || ! really_return
11024 || ! IS_INTERRUPT (func_type)))
11026 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
11027 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
11029 else
11031 char *p;
11032 int first = 1;
11034 /* Generate the load multiple instruction to restore the
11035 registers. Note we can get here, even if
11036 frame_pointer_needed is true, but only if sp already
11037 points to the base of the saved core registers. */
11038 if (live_regs_mask & (1 << SP_REGNUM))
11040 unsigned HOST_WIDE_INT stack_adjust;
11042 offsets = arm_get_frame_offsets ();
11043 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
11044 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
11046 if (stack_adjust && arm_arch5 && TARGET_ARM)
11047 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
11048 else
11050 /* If we can't use ldmib (SA110 bug),
11051 then try to pop r3 instead. */
11052 if (stack_adjust)
11053 live_regs_mask |= 1 << 3;
11054 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
11057 else
11058 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
11060 p = instr + strlen (instr);
11062 for (reg = 0; reg <= SP_REGNUM; reg++)
11063 if (live_regs_mask & (1 << reg))
11065 int l = strlen (reg_names[reg]);
11067 if (first)
11068 first = 0;
11069 else
11071 memcpy (p, ", ", 2);
11072 p += 2;
11075 memcpy (p, "%|", 2);
11076 memcpy (p + 2, reg_names[reg], l);
11077 p += l + 2;
11080 if (live_regs_mask & (1 << LR_REGNUM))
11082 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
11083 /* If returning from an interrupt, restore the CPSR. */
11084 if (IS_INTERRUPT (func_type))
11085 strcat (p, "^");
11087 else
11088 strcpy (p, "}");
11091 output_asm_insn (instr, & operand);
11093 /* See if we need to generate an extra instruction to
11094 perform the actual function return. */
11095 if (really_return
11096 && func_type != ARM_FT_INTERWORKED
11097 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
11099 /* The return has already been handled
11100 by loading the LR into the PC. */
11101 really_return = 0;
11105 if (really_return)
11107 switch ((int) ARM_FUNC_TYPE (func_type))
11109 case ARM_FT_ISR:
11110 case ARM_FT_FIQ:
11111 /* ??? This is wrong for unified assembly syntax. */
11112 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
11113 break;
11115 case ARM_FT_INTERWORKED:
11116 sprintf (instr, "bx%s\t%%|lr", conditional);
11117 break;
11119 case ARM_FT_EXCEPTION:
11120 /* ??? This is wrong for unified assembly syntax. */
11121 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
11122 break;
11124 default:
11125 /* Use bx if it's available. */
11126 if (arm_arch5 || arm_arch4t)
11127 sprintf (instr, "bx%s\t%%|lr", conditional);
11128 else
11129 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
11130 break;
11133 output_asm_insn (instr, & operand);
11136 return "";
11139 /* Write the function name into the code section, directly preceding
11140 the function prologue.
11142 Code will be output similar to this:
11144 .ascii "arm_poke_function_name", 0
11145 .align
11147 .word 0xff000000 + (t1 - t0)
11148 arm_poke_function_name
11149 mov ip, sp
11150 stmfd sp!, {fp, ip, lr, pc}
11151 sub fp, ip, #4
11153 When performing a stack backtrace, code can inspect the value
11154 of 'pc' stored at 'fp' + 0. If the trace function then looks
11155 at location pc - 12 and the top 8 bits are set, then we know
11156 that there is a function name embedded immediately preceding this
11157 location and has length ((pc[-3]) & 0xff000000).
11159 We assume that pc is declared as a pointer to an unsigned long.
11161 It is of no benefit to output the function name if we are assembling
11162 a leaf function. These function types will not contain a stack
11163 backtrace structure, therefore it is not possible to determine the
11164 function name. */
11165 void
11166 arm_poke_function_name (FILE *stream, const char *name)
11168 unsigned long alignlength;
11169 unsigned long length;
11170 rtx x;
11172 length = strlen (name) + 1;
11173 alignlength = ROUND_UP_WORD (length);
11175 ASM_OUTPUT_ASCII (stream, name, length);
11176 ASM_OUTPUT_ALIGN (stream, 2);
11177 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
11178 assemble_aligned_integer (UNITS_PER_WORD, x);
11181 /* Place some comments into the assembler stream
11182 describing the current function. */
11183 static void
11184 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
11186 unsigned long func_type;
11188 if (TARGET_THUMB1)
11190 thumb1_output_function_prologue (f, frame_size);
11191 return;
11194 /* Sanity check. */
11195 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
11197 func_type = arm_current_func_type ();
11199 switch ((int) ARM_FUNC_TYPE (func_type))
11201 default:
11202 case ARM_FT_NORMAL:
11203 break;
11204 case ARM_FT_INTERWORKED:
11205 asm_fprintf (f, "\t%@ Function supports interworking.\n");
11206 break;
11207 case ARM_FT_ISR:
11208 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
11209 break;
11210 case ARM_FT_FIQ:
11211 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
11212 break;
11213 case ARM_FT_EXCEPTION:
11214 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
11215 break;
11218 if (IS_NAKED (func_type))
11219 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
11221 if (IS_VOLATILE (func_type))
11222 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
11224 if (IS_NESTED (func_type))
11225 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
11226 if (IS_STACKALIGN (func_type))
11227 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
11229 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
11230 current_function_args_size,
11231 current_function_pretend_args_size, frame_size);
11233 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
11234 frame_pointer_needed,
11235 cfun->machine->uses_anonymous_args);
11237 if (cfun->machine->lr_save_eliminated)
11238 asm_fprintf (f, "\t%@ link register save eliminated.\n");
11240 if (current_function_calls_eh_return)
11241 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
11243 return_used_this_function = 0;
11246 const char *
11247 arm_output_epilogue (rtx sibling)
11249 int reg;
11250 unsigned long saved_regs_mask;
11251 unsigned long func_type;
11252 /* Floats_offset is the offset from the "virtual" frame. In an APCS
11253 frame that is $fp + 4 for a non-variadic function. */
11254 int floats_offset = 0;
11255 rtx operands[3];
11256 FILE * f = asm_out_file;
11257 unsigned int lrm_count = 0;
11258 int really_return = (sibling == NULL);
11259 int start_reg;
11260 arm_stack_offsets *offsets;
11262 /* If we have already generated the return instruction
11263 then it is futile to generate anything else. */
11264 if (use_return_insn (FALSE, sibling) && return_used_this_function)
11265 return "";
11267 func_type = arm_current_func_type ();
11269 if (IS_NAKED (func_type))
11270 /* Naked functions don't have epilogues. */
11271 return "";
11273 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
11275 rtx op;
11277 /* A volatile function should never return. Call abort. */
11278 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
11279 assemble_external_libcall (op);
11280 output_asm_insn ("bl\t%a0", &op);
11282 return "";
11285 /* If we are throwing an exception, then we really must be doing a
11286 return, so we can't tail-call. */
11287 gcc_assert (!current_function_calls_eh_return || really_return);
11289 offsets = arm_get_frame_offsets ();
11290 saved_regs_mask = arm_compute_save_reg_mask ();
11292 if (TARGET_IWMMXT)
11293 lrm_count = bit_count (saved_regs_mask);
11295 floats_offset = offsets->saved_args;
11296 /* Compute how far away the floats will be. */
11297 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
11298 if (saved_regs_mask & (1 << reg))
11299 floats_offset += 4;
11301 if (frame_pointer_needed && TARGET_ARM)
11303 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
11304 int vfp_offset = offsets->frame;
11306 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11308 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11309 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11311 floats_offset += 12;
11312 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
11313 reg, FP_REGNUM, floats_offset - vfp_offset);
11316 else
11318 start_reg = LAST_FPA_REGNUM;
11320 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
11322 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11324 floats_offset += 12;
11326 /* We can't unstack more than four registers at once. */
11327 if (start_reg - reg == 3)
11329 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
11330 reg, FP_REGNUM, floats_offset - vfp_offset);
11331 start_reg = reg - 1;
11334 else
11336 if (reg != start_reg)
11337 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11338 reg + 1, start_reg - reg,
11339 FP_REGNUM, floats_offset - vfp_offset);
11340 start_reg = reg - 1;
11344 /* Just in case the last register checked also needs unstacking. */
11345 if (reg != start_reg)
11346 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
11347 reg + 1, start_reg - reg,
11348 FP_REGNUM, floats_offset - vfp_offset);
11351 if (TARGET_HARD_FLOAT && TARGET_VFP)
11353 int saved_size;
11355 /* The fldmd insns do not have base+offset addressing
11356 modes, so we use IP to hold the address. */
11357 saved_size = arm_get_vfp_saved_size ();
11359 if (saved_size > 0)
11361 floats_offset += saved_size;
11362 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
11363 FP_REGNUM, floats_offset - vfp_offset);
11365 start_reg = FIRST_VFP_REGNUM;
11366 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11368 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11369 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11371 if (start_reg != reg)
11372 vfp_output_fldmd (f, IP_REGNUM,
11373 (start_reg - FIRST_VFP_REGNUM) / 2,
11374 (reg - start_reg) / 2);
11375 start_reg = reg + 2;
11378 if (start_reg != reg)
11379 vfp_output_fldmd (f, IP_REGNUM,
11380 (start_reg - FIRST_VFP_REGNUM) / 2,
11381 (reg - start_reg) / 2);
11384 if (TARGET_IWMMXT)
11386 /* The frame pointer is guaranteed to be non-double-word aligned.
11387 This is because it is set to (old_stack_pointer - 4) and the
11388 old_stack_pointer was double word aligned. Thus the offset to
11389 the iWMMXt registers to be loaded must also be non-double-word
11390 sized, so that the resultant address *is* double-word aligned.
11391 We can ignore floats_offset since that was already included in
11392 the live_regs_mask. */
11393 lrm_count += (lrm_count % 2 ? 2 : 1);
11395 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
11396 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11398 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
11399 reg, FP_REGNUM, lrm_count * 4);
11400 lrm_count += 2;
11404 /* saved_regs_mask should contain the IP, which at the time of stack
11405 frame generation actually contains the old stack pointer. So a
11406 quick way to unwind the stack is just pop the IP register directly
11407 into the stack pointer. */
11408 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
11409 saved_regs_mask &= ~ (1 << IP_REGNUM);
11410 saved_regs_mask |= (1 << SP_REGNUM);
11412 /* There are two registers left in saved_regs_mask - LR and PC. We
11413 only need to restore the LR register (the return address), but to
11414 save time we can load it directly into the PC, unless we need a
11415 special function exit sequence, or we are not really returning. */
11416 if (really_return
11417 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
11418 && !current_function_calls_eh_return)
11419 /* Delete the LR from the register mask, so that the LR on
11420 the stack is loaded into the PC in the register mask. */
11421 saved_regs_mask &= ~ (1 << LR_REGNUM);
11422 else
11423 saved_regs_mask &= ~ (1 << PC_REGNUM);
11425 /* We must use SP as the base register, because SP is one of the
11426 registers being restored. If an interrupt or page fault
11427 happens in the ldm instruction, the SP might or might not
11428 have been restored. That would be bad, as then SP will no
11429 longer indicate the safe area of stack, and we can get stack
11430 corruption. Using SP as the base register means that it will
11431 be reset correctly to the original value, should an interrupt
11432 occur. If the stack pointer already points at the right
11433 place, then omit the subtraction. */
11434 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
11435 || current_function_calls_alloca)
11436 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
11437 4 * bit_count (saved_regs_mask));
11438 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask, 0);
11440 if (IS_INTERRUPT (func_type))
11441 /* Interrupt handlers will have pushed the
11442 IP onto the stack, so restore it now. */
11443 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, 1 << IP_REGNUM, 0);
11445 else
11447 HOST_WIDE_INT amount;
11448 int rfe;
11449 /* Restore stack pointer if necessary. */
11450 if (frame_pointer_needed)
11452 /* For Thumb-2 restore sp from the frame pointer.
11453 Operand restrictions mean we have to increment FP, then copy
11454 to SP. */
11455 amount = offsets->locals_base - offsets->saved_regs;
11456 operands[0] = hard_frame_pointer_rtx;
11458 else
11460 operands[0] = stack_pointer_rtx;
11461 amount = offsets->outgoing_args - offsets->saved_regs;
11464 if (amount)
11466 operands[1] = operands[0];
11467 operands[2] = GEN_INT (amount);
11468 output_add_immediate (operands);
11470 if (frame_pointer_needed)
11471 asm_fprintf (f, "\tmov\t%r, %r\n",
11472 SP_REGNUM, HARD_FRAME_POINTER_REGNUM);
11474 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
11476 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11477 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11478 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
11479 reg, SP_REGNUM);
11481 else
11483 start_reg = FIRST_FPA_REGNUM;
11485 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
11487 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11489 if (reg - start_reg == 3)
11491 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
11492 start_reg, SP_REGNUM);
11493 start_reg = reg + 1;
11496 else
11498 if (reg != start_reg)
11499 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11500 start_reg, reg - start_reg,
11501 SP_REGNUM);
11503 start_reg = reg + 1;
11507 /* Just in case the last register checked also needs unstacking. */
11508 if (reg != start_reg)
11509 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
11510 start_reg, reg - start_reg, SP_REGNUM);
11513 if (TARGET_HARD_FLOAT && TARGET_VFP)
11515 start_reg = FIRST_VFP_REGNUM;
11516 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
11518 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
11519 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
11521 if (start_reg != reg)
11522 vfp_output_fldmd (f, SP_REGNUM,
11523 (start_reg - FIRST_VFP_REGNUM) / 2,
11524 (reg - start_reg) / 2);
11525 start_reg = reg + 2;
11528 if (start_reg != reg)
11529 vfp_output_fldmd (f, SP_REGNUM,
11530 (start_reg - FIRST_VFP_REGNUM) / 2,
11531 (reg - start_reg) / 2);
11533 if (TARGET_IWMMXT)
11534 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
11535 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
11536 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
11538 /* If we can, restore the LR into the PC. */
11539 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
11540 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
11541 && !IS_STACKALIGN (func_type)
11542 && really_return
11543 && current_function_pretend_args_size == 0
11544 && saved_regs_mask & (1 << LR_REGNUM)
11545 && !current_function_calls_eh_return)
11547 saved_regs_mask &= ~ (1 << LR_REGNUM);
11548 saved_regs_mask |= (1 << PC_REGNUM);
11549 rfe = IS_INTERRUPT (func_type);
11551 else
11552 rfe = 0;
11554 /* Load the registers off the stack. If we only have one register
11555 to load use the LDR instruction - it is faster. For Thumb-2
11556 always use pop and the assembler will pick the best instruction.*/
11557 if (TARGET_ARM && saved_regs_mask == (1 << LR_REGNUM)
11558 && !IS_INTERRUPT(func_type))
11560 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
11562 else if (saved_regs_mask)
11564 if (saved_regs_mask & (1 << SP_REGNUM))
11565 /* Note - write back to the stack register is not enabled
11566 (i.e. "ldmfd sp!..."). We know that the stack pointer is
11567 in the list of registers and if we add writeback the
11568 instruction becomes UNPREDICTABLE. */
11569 print_multi_reg (f, "ldmfd\t%r, ", SP_REGNUM, saved_regs_mask,
11570 rfe);
11571 else if (TARGET_ARM)
11572 print_multi_reg (f, "ldmfd\t%r!, ", SP_REGNUM, saved_regs_mask,
11573 rfe);
11574 else
11575 print_multi_reg (f, "pop\t", SP_REGNUM, saved_regs_mask, 0);
11578 if (current_function_pretend_args_size)
11580 /* Unwind the pre-pushed regs. */
11581 operands[0] = operands[1] = stack_pointer_rtx;
11582 operands[2] = GEN_INT (current_function_pretend_args_size);
11583 output_add_immediate (operands);
11587 /* We may have already restored PC directly from the stack. */
11588 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
11589 return "";
11591 /* Stack adjustment for exception handler. */
11592 if (current_function_calls_eh_return)
11593 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
11594 ARM_EH_STACKADJ_REGNUM);
11596 /* Generate the return instruction. */
11597 switch ((int) ARM_FUNC_TYPE (func_type))
11599 case ARM_FT_ISR:
11600 case ARM_FT_FIQ:
11601 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
11602 break;
11604 case ARM_FT_EXCEPTION:
11605 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11606 break;
11608 case ARM_FT_INTERWORKED:
11609 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11610 break;
11612 default:
11613 if (IS_STACKALIGN (func_type))
11615 /* See comment in arm_expand_prologue. */
11616 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, 0);
11618 if (arm_arch5 || arm_arch4t)
11619 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
11620 else
11621 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
11622 break;
11625 return "";
11628 static void
11629 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11630 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
11632 arm_stack_offsets *offsets;
11634 if (TARGET_THUMB1)
11636 int regno;
11638 /* Emit any call-via-reg trampolines that are needed for v4t support
11639 of call_reg and call_value_reg type insns. */
11640 for (regno = 0; regno < LR_REGNUM; regno++)
11642 rtx label = cfun->machine->call_via[regno];
11644 if (label != NULL)
11646 switch_to_section (function_section (current_function_decl));
11647 targetm.asm_out.internal_label (asm_out_file, "L",
11648 CODE_LABEL_NUMBER (label));
11649 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
11653 /* ??? Probably not safe to set this here, since it assumes that a
11654 function will be emitted as assembly immediately after we generate
11655 RTL for it. This does not happen for inline functions. */
11656 return_used_this_function = 0;
11658 else /* TARGET_32BIT */
11660 /* We need to take into account any stack-frame rounding. */
11661 offsets = arm_get_frame_offsets ();
11663 gcc_assert (!use_return_insn (FALSE, NULL)
11664 || !return_used_this_function
11665 || offsets->saved_regs == offsets->outgoing_args
11666 || frame_pointer_needed);
11668 /* Reset the ARM-specific per-function variables. */
11669 after_arm_reorg = 0;
11673 /* Generate and emit an insn that we will recognize as a push_multi.
11674 Unfortunately, since this insn does not reflect very well the actual
11675 semantics of the operation, we need to annotate the insn for the benefit
11676 of DWARF2 frame unwind information. */
11677 static rtx
11678 emit_multi_reg_push (unsigned long mask)
11680 int num_regs = 0;
11681 int num_dwarf_regs;
11682 int i, j;
11683 rtx par;
11684 rtx dwarf;
11685 int dwarf_par_index;
11686 rtx tmp, reg;
11688 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11689 if (mask & (1 << i))
11690 num_regs++;
11692 gcc_assert (num_regs && num_regs <= 16);
11694 /* We don't record the PC in the dwarf frame information. */
11695 num_dwarf_regs = num_regs;
11696 if (mask & (1 << PC_REGNUM))
11697 num_dwarf_regs--;
11699 /* For the body of the insn we are going to generate an UNSPEC in
11700 parallel with several USEs. This allows the insn to be recognized
11701 by the push_multi pattern in the arm.md file. The insn looks
11702 something like this:
11704 (parallel [
11705 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
11706 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
11707 (use (reg:SI 11 fp))
11708 (use (reg:SI 12 ip))
11709 (use (reg:SI 14 lr))
11710 (use (reg:SI 15 pc))
11713 For the frame note however, we try to be more explicit and actually
11714 show each register being stored into the stack frame, plus a (single)
11715 decrement of the stack pointer. We do it this way in order to be
11716 friendly to the stack unwinding code, which only wants to see a single
11717 stack decrement per instruction. The RTL we generate for the note looks
11718 something like this:
11720 (sequence [
11721 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
11722 (set (mem:SI (reg:SI sp)) (reg:SI r4))
11723 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
11724 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
11725 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
11728 This sequence is used both by the code to support stack unwinding for
11729 exceptions handlers and the code to generate dwarf2 frame debugging. */
11731 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
11732 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
11733 dwarf_par_index = 1;
11735 for (i = 0; i <= LAST_ARM_REGNUM; i++)
11737 if (mask & (1 << i))
11739 reg = gen_rtx_REG (SImode, i);
11741 XVECEXP (par, 0, 0)
11742 = gen_rtx_SET (VOIDmode,
11743 gen_frame_mem (BLKmode,
11744 gen_rtx_PRE_DEC (BLKmode,
11745 stack_pointer_rtx)),
11746 gen_rtx_UNSPEC (BLKmode,
11747 gen_rtvec (1, reg),
11748 UNSPEC_PUSH_MULT));
11750 if (i != PC_REGNUM)
11752 tmp = gen_rtx_SET (VOIDmode,
11753 gen_frame_mem (SImode, stack_pointer_rtx),
11754 reg);
11755 RTX_FRAME_RELATED_P (tmp) = 1;
11756 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
11757 dwarf_par_index++;
11760 break;
11764 for (j = 1, i++; j < num_regs; i++)
11766 if (mask & (1 << i))
11768 reg = gen_rtx_REG (SImode, i);
11770 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
11772 if (i != PC_REGNUM)
11775 = gen_rtx_SET (VOIDmode,
11776 gen_frame_mem (SImode,
11777 plus_constant (stack_pointer_rtx,
11778 4 * j)),
11779 reg);
11780 RTX_FRAME_RELATED_P (tmp) = 1;
11781 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
11784 j++;
11788 par = emit_insn (par);
11790 tmp = gen_rtx_SET (VOIDmode,
11791 stack_pointer_rtx,
11792 plus_constant (stack_pointer_rtx, -4 * num_regs));
11793 RTX_FRAME_RELATED_P (tmp) = 1;
11794 XVECEXP (dwarf, 0, 0) = tmp;
11796 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11797 REG_NOTES (par));
11798 return par;
11801 /* Calculate the size of the return value that is passed in registers. */
11802 static int
11803 arm_size_return_regs (void)
11805 enum machine_mode mode;
11807 if (current_function_return_rtx != 0)
11808 mode = GET_MODE (current_function_return_rtx);
11809 else
11810 mode = DECL_MODE (DECL_RESULT (current_function_decl));
11812 return GET_MODE_SIZE (mode);
11815 static rtx
11816 emit_sfm (int base_reg, int count)
11818 rtx par;
11819 rtx dwarf;
11820 rtx tmp, reg;
11821 int i;
11823 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
11824 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
11826 reg = gen_rtx_REG (XFmode, base_reg++);
11828 XVECEXP (par, 0, 0)
11829 = gen_rtx_SET (VOIDmode,
11830 gen_frame_mem (BLKmode,
11831 gen_rtx_PRE_DEC (BLKmode,
11832 stack_pointer_rtx)),
11833 gen_rtx_UNSPEC (BLKmode,
11834 gen_rtvec (1, reg),
11835 UNSPEC_PUSH_MULT));
11836 tmp = gen_rtx_SET (VOIDmode,
11837 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
11838 RTX_FRAME_RELATED_P (tmp) = 1;
11839 XVECEXP (dwarf, 0, 1) = tmp;
11841 for (i = 1; i < count; i++)
11843 reg = gen_rtx_REG (XFmode, base_reg++);
11844 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
11846 tmp = gen_rtx_SET (VOIDmode,
11847 gen_frame_mem (XFmode,
11848 plus_constant (stack_pointer_rtx,
11849 i * 12)),
11850 reg);
11851 RTX_FRAME_RELATED_P (tmp) = 1;
11852 XVECEXP (dwarf, 0, i + 1) = tmp;
11855 tmp = gen_rtx_SET (VOIDmode,
11856 stack_pointer_rtx,
11857 plus_constant (stack_pointer_rtx, -12 * count));
11859 RTX_FRAME_RELATED_P (tmp) = 1;
11860 XVECEXP (dwarf, 0, 0) = tmp;
11862 par = emit_insn (par);
11863 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
11864 REG_NOTES (par));
11865 return par;
11869 /* Return true if the current function needs to save/restore LR. */
11871 static bool
11872 thumb_force_lr_save (void)
11874 return !cfun->machine->lr_save_eliminated
11875 && (!leaf_function_p ()
11876 || thumb_far_jump_used_p ()
11877 || df_regs_ever_live_p (LR_REGNUM));
11881 /* Compute the distance from register FROM to register TO.
11882 These can be the arg pointer (26), the soft frame pointer (25),
11883 the stack pointer (13) or the hard frame pointer (11).
11884 In thumb mode r7 is used as the soft frame pointer, if needed.
11885 Typical stack layout looks like this:
11887 old stack pointer -> | |
11888 ----
11889 | | \
11890 | | saved arguments for
11891 | | vararg functions
11892 | | /
11894 hard FP & arg pointer -> | | \
11895 | | stack
11896 | | frame
11897 | | /
11899 | | \
11900 | | call saved
11901 | | registers
11902 soft frame pointer -> | | /
11904 | | \
11905 | | local
11906 | | variables
11907 locals base pointer -> | | /
11909 | | \
11910 | | outgoing
11911 | | arguments
11912 current stack pointer -> | | /
11915 For a given function some or all of these stack components
11916 may not be needed, giving rise to the possibility of
11917 eliminating some of the registers.
11919 The values returned by this function must reflect the behavior
11920 of arm_expand_prologue() and arm_compute_save_reg_mask().
11922 The sign of the number returned reflects the direction of stack
11923 growth, so the values are positive for all eliminations except
11924 from the soft frame pointer to the hard frame pointer.
11926 SFP may point just inside the local variables block to ensure correct
11927 alignment. */
11930 /* Calculate stack offsets. These are used to calculate register elimination
11931 offsets and in prologue/epilogue code. */
11933 static arm_stack_offsets *
11934 arm_get_frame_offsets (void)
11936 struct arm_stack_offsets *offsets;
11937 unsigned long func_type;
11938 int leaf;
11939 int saved;
11940 HOST_WIDE_INT frame_size;
11942 offsets = &cfun->machine->stack_offsets;
11944 /* We need to know if we are a leaf function. Unfortunately, it
11945 is possible to be called after start_sequence has been called,
11946 which causes get_insns to return the insns for the sequence,
11947 not the function, which will cause leaf_function_p to return
11948 the incorrect result.
11950 to know about leaf functions once reload has completed, and the
11951 frame size cannot be changed after that time, so we can safely
11952 use the cached value. */
11954 if (reload_completed)
11955 return offsets;
11957 /* Initially this is the size of the local variables. It will translated
11958 into an offset once we have determined the size of preceding data. */
11959 frame_size = ROUND_UP_WORD (get_frame_size ());
11961 leaf = leaf_function_p ();
11963 /* Space for variadic functions. */
11964 offsets->saved_args = current_function_pretend_args_size;
11966 /* In Thumb mode this is incorrect, but never used. */
11967 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
11969 if (TARGET_32BIT)
11971 unsigned int regno;
11973 saved = bit_count (arm_compute_save_reg_mask ()) * 4;
11975 /* We know that SP will be doubleword aligned on entry, and we must
11976 preserve that condition at any subroutine call. We also require the
11977 soft frame pointer to be doubleword aligned. */
11979 if (TARGET_REALLY_IWMMXT)
11981 /* Check for the call-saved iWMMXt registers. */
11982 for (regno = FIRST_IWMMXT_REGNUM;
11983 regno <= LAST_IWMMXT_REGNUM;
11984 regno++)
11985 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
11986 saved += 8;
11989 func_type = arm_current_func_type ();
11990 if (! IS_VOLATILE (func_type))
11992 /* Space for saved FPA registers. */
11993 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
11994 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
11995 saved += 12;
11997 /* Space for saved VFP registers. */
11998 if (TARGET_HARD_FLOAT && TARGET_VFP)
11999 saved += arm_get_vfp_saved_size ();
12002 else /* TARGET_THUMB1 */
12004 saved = bit_count (thumb1_compute_save_reg_mask ()) * 4;
12005 if (TARGET_BACKTRACE)
12006 saved += 16;
12009 /* Saved registers include the stack frame. */
12010 offsets->saved_regs = offsets->saved_args + saved;
12011 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
12012 /* A leaf function does not need any stack alignment if it has nothing
12013 on the stack. */
12014 if (leaf && frame_size == 0)
12016 offsets->outgoing_args = offsets->soft_frame;
12017 offsets->locals_base = offsets->soft_frame;
12018 return offsets;
12021 /* Ensure SFP has the correct alignment. */
12022 if (ARM_DOUBLEWORD_ALIGN
12023 && (offsets->soft_frame & 7))
12024 offsets->soft_frame += 4;
12026 offsets->locals_base = offsets->soft_frame + frame_size;
12027 offsets->outgoing_args = (offsets->locals_base
12028 + current_function_outgoing_args_size);
12030 if (ARM_DOUBLEWORD_ALIGN)
12032 /* Ensure SP remains doubleword aligned. */
12033 if (offsets->outgoing_args & 7)
12034 offsets->outgoing_args += 4;
12035 gcc_assert (!(offsets->outgoing_args & 7));
12038 return offsets;
12042 /* Calculate the relative offsets for the different stack pointers. Positive
12043 offsets are in the direction of stack growth. */
12045 HOST_WIDE_INT
12046 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
12048 arm_stack_offsets *offsets;
12050 offsets = arm_get_frame_offsets ();
12052 /* OK, now we have enough information to compute the distances.
12053 There must be an entry in these switch tables for each pair
12054 of registers in ELIMINABLE_REGS, even if some of the entries
12055 seem to be redundant or useless. */
12056 switch (from)
12058 case ARG_POINTER_REGNUM:
12059 switch (to)
12061 case THUMB_HARD_FRAME_POINTER_REGNUM:
12062 return 0;
12064 case FRAME_POINTER_REGNUM:
12065 /* This is the reverse of the soft frame pointer
12066 to hard frame pointer elimination below. */
12067 return offsets->soft_frame - offsets->saved_args;
12069 case ARM_HARD_FRAME_POINTER_REGNUM:
12070 /* If there is no stack frame then the hard
12071 frame pointer and the arg pointer coincide. */
12072 if (offsets->frame == offsets->saved_regs)
12073 return 0;
12074 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
12075 return (frame_pointer_needed
12076 && cfun->static_chain_decl != NULL
12077 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
12079 case STACK_POINTER_REGNUM:
12080 /* If nothing has been pushed on the stack at all
12081 then this will return -4. This *is* correct! */
12082 return offsets->outgoing_args - (offsets->saved_args + 4);
12084 default:
12085 gcc_unreachable ();
12087 gcc_unreachable ();
12089 case FRAME_POINTER_REGNUM:
12090 switch (to)
12092 case THUMB_HARD_FRAME_POINTER_REGNUM:
12093 return 0;
12095 case ARM_HARD_FRAME_POINTER_REGNUM:
12096 /* The hard frame pointer points to the top entry in the
12097 stack frame. The soft frame pointer to the bottom entry
12098 in the stack frame. If there is no stack frame at all,
12099 then they are identical. */
12101 return offsets->frame - offsets->soft_frame;
12103 case STACK_POINTER_REGNUM:
12104 return offsets->outgoing_args - offsets->soft_frame;
12106 default:
12107 gcc_unreachable ();
12109 gcc_unreachable ();
12111 default:
12112 /* You cannot eliminate from the stack pointer.
12113 In theory you could eliminate from the hard frame
12114 pointer to the stack pointer, but this will never
12115 happen, since if a stack frame is not needed the
12116 hard frame pointer will never be used. */
12117 gcc_unreachable ();
12122 /* Emit RTL to save coprocessor registers on function entry. Returns the
12123 number of bytes pushed. */
12125 static int
12126 arm_save_coproc_regs(void)
12128 int saved_size = 0;
12129 unsigned reg;
12130 unsigned start_reg;
12131 rtx insn;
12133 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
12134 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
12136 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
12137 insn = gen_rtx_MEM (V2SImode, insn);
12138 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
12139 RTX_FRAME_RELATED_P (insn) = 1;
12140 saved_size += 8;
12143 /* Save any floating point call-saved registers used by this
12144 function. */
12145 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
12147 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12148 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12150 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
12151 insn = gen_rtx_MEM (XFmode, insn);
12152 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
12153 RTX_FRAME_RELATED_P (insn) = 1;
12154 saved_size += 12;
12157 else
12159 start_reg = LAST_FPA_REGNUM;
12161 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
12163 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
12165 if (start_reg - reg == 3)
12167 insn = emit_sfm (reg, 4);
12168 RTX_FRAME_RELATED_P (insn) = 1;
12169 saved_size += 48;
12170 start_reg = reg - 1;
12173 else
12175 if (start_reg != reg)
12177 insn = emit_sfm (reg + 1, start_reg - reg);
12178 RTX_FRAME_RELATED_P (insn) = 1;
12179 saved_size += (start_reg - reg) * 12;
12181 start_reg = reg - 1;
12185 if (start_reg != reg)
12187 insn = emit_sfm (reg + 1, start_reg - reg);
12188 saved_size += (start_reg - reg) * 12;
12189 RTX_FRAME_RELATED_P (insn) = 1;
12192 if (TARGET_HARD_FLOAT && TARGET_VFP)
12194 start_reg = FIRST_VFP_REGNUM;
12196 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
12198 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
12199 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
12201 if (start_reg != reg)
12202 saved_size += vfp_emit_fstmd (start_reg,
12203 (reg - start_reg) / 2);
12204 start_reg = reg + 2;
12207 if (start_reg != reg)
12208 saved_size += vfp_emit_fstmd (start_reg,
12209 (reg - start_reg) / 2);
12211 return saved_size;
12215 /* Set the Thumb frame pointer from the stack pointer. */
12217 static void
12218 thumb_set_frame_pointer (arm_stack_offsets *offsets)
12220 HOST_WIDE_INT amount;
12221 rtx insn, dwarf;
12223 amount = offsets->outgoing_args - offsets->locals_base;
12224 if (amount < 1024)
12225 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12226 stack_pointer_rtx, GEN_INT (amount)));
12227 else
12229 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
12230 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
12231 hard_frame_pointer_rtx,
12232 stack_pointer_rtx));
12233 dwarf = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
12234 plus_constant (stack_pointer_rtx, amount));
12235 RTX_FRAME_RELATED_P (dwarf) = 1;
12236 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
12237 REG_NOTES (insn));
12240 RTX_FRAME_RELATED_P (insn) = 1;
12243 /* Generate the prologue instructions for entry into an ARM or Thumb-2
12244 function. */
12245 void
12246 arm_expand_prologue (void)
12248 rtx amount;
12249 rtx insn;
12250 rtx ip_rtx;
12251 unsigned long live_regs_mask;
12252 unsigned long func_type;
12253 int fp_offset = 0;
12254 int saved_pretend_args = 0;
12255 int saved_regs = 0;
12256 unsigned HOST_WIDE_INT args_to_push;
12257 arm_stack_offsets *offsets;
12259 func_type = arm_current_func_type ();
12261 /* Naked functions don't have prologues. */
12262 if (IS_NAKED (func_type))
12263 return;
12265 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
12266 args_to_push = current_function_pretend_args_size;
12268 /* Compute which register we will have to save onto the stack. */
12269 live_regs_mask = arm_compute_save_reg_mask ();
12271 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
12273 if (IS_STACKALIGN (func_type))
12275 rtx dwarf;
12276 rtx r0;
12277 rtx r1;
12278 /* Handle a word-aligned stack pointer. We generate the following:
12280 mov r0, sp
12281 bic r1, r0, #7
12282 mov sp, r1
12283 <save and restore r0 in normal prologue/epilogue>
12284 mov sp, r0
12285 bx lr
12287 The unwinder doesn't need to know about the stack realignment.
12288 Just tell it we saved SP in r0. */
12289 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
12291 r0 = gen_rtx_REG (SImode, 0);
12292 r1 = gen_rtx_REG (SImode, 1);
12293 dwarf = gen_rtx_UNSPEC (SImode, NULL_RTVEC, UNSPEC_STACK_ALIGN);
12294 dwarf = gen_rtx_SET (VOIDmode, r0, dwarf);
12295 insn = gen_movsi (r0, stack_pointer_rtx);
12296 RTX_FRAME_RELATED_P (insn) = 1;
12297 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12298 dwarf, REG_NOTES (insn));
12299 emit_insn (insn);
12300 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
12301 emit_insn (gen_movsi (stack_pointer_rtx, r1));
12304 if (frame_pointer_needed && TARGET_ARM)
12306 if (IS_INTERRUPT (func_type))
12308 /* Interrupt functions must not corrupt any registers.
12309 Creating a frame pointer however, corrupts the IP
12310 register, so we must push it first. */
12311 insn = emit_multi_reg_push (1 << IP_REGNUM);
12313 /* Do not set RTX_FRAME_RELATED_P on this insn.
12314 The dwarf stack unwinding code only wants to see one
12315 stack decrement per function, and this is not it. If
12316 this instruction is labeled as being part of the frame
12317 creation sequence then dwarf2out_frame_debug_expr will
12318 die when it encounters the assignment of IP to FP
12319 later on, since the use of SP here establishes SP as
12320 the CFA register and not IP.
12322 Anyway this instruction is not really part of the stack
12323 frame creation although it is part of the prologue. */
12325 else if (IS_NESTED (func_type))
12327 /* The Static chain register is the same as the IP register
12328 used as a scratch register during stack frame creation.
12329 To get around this need to find somewhere to store IP
12330 whilst the frame is being created. We try the following
12331 places in order:
12333 1. The last argument register.
12334 2. A slot on the stack above the frame. (This only
12335 works if the function is not a varargs function).
12336 3. Register r3, after pushing the argument registers
12337 onto the stack.
12339 Note - we only need to tell the dwarf2 backend about the SP
12340 adjustment in the second variant; the static chain register
12341 doesn't need to be unwound, as it doesn't contain a value
12342 inherited from the caller. */
12344 if (df_regs_ever_live_p (3) == false)
12345 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12346 else if (args_to_push == 0)
12348 rtx dwarf;
12350 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
12351 insn = emit_set_insn (gen_frame_mem (SImode, insn), ip_rtx);
12352 fp_offset = 4;
12354 /* Just tell the dwarf backend that we adjusted SP. */
12355 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
12356 plus_constant (stack_pointer_rtx,
12357 -fp_offset));
12358 RTX_FRAME_RELATED_P (insn) = 1;
12359 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
12360 dwarf, REG_NOTES (insn));
12362 else
12364 /* Store the args on the stack. */
12365 if (cfun->machine->uses_anonymous_args)
12366 insn = emit_multi_reg_push
12367 ((0xf0 >> (args_to_push / 4)) & 0xf);
12368 else
12369 insn = emit_insn
12370 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12371 GEN_INT (- args_to_push)));
12373 RTX_FRAME_RELATED_P (insn) = 1;
12375 saved_pretend_args = 1;
12376 fp_offset = args_to_push;
12377 args_to_push = 0;
12379 /* Now reuse r3 to preserve IP. */
12380 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
12384 insn = emit_set_insn (ip_rtx,
12385 plus_constant (stack_pointer_rtx, fp_offset));
12386 RTX_FRAME_RELATED_P (insn) = 1;
12389 if (args_to_push)
12391 /* Push the argument registers, or reserve space for them. */
12392 if (cfun->machine->uses_anonymous_args)
12393 insn = emit_multi_reg_push
12394 ((0xf0 >> (args_to_push / 4)) & 0xf);
12395 else
12396 insn = emit_insn
12397 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12398 GEN_INT (- args_to_push)));
12399 RTX_FRAME_RELATED_P (insn) = 1;
12402 /* If this is an interrupt service routine, and the link register
12403 is going to be pushed, and we are not creating a stack frame,
12404 (which would involve an extra push of IP and a pop in the epilogue)
12405 subtracting four from LR now will mean that the function return
12406 can be done with a single instruction. */
12407 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
12408 && (live_regs_mask & (1 << LR_REGNUM)) != 0
12409 && ! frame_pointer_needed
12410 && TARGET_ARM)
12412 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
12414 emit_set_insn (lr, plus_constant (lr, -4));
12417 if (live_regs_mask)
12419 insn = emit_multi_reg_push (live_regs_mask);
12420 saved_regs += bit_count (live_regs_mask) * 4;
12421 RTX_FRAME_RELATED_P (insn) = 1;
12424 if (! IS_VOLATILE (func_type))
12425 saved_regs += arm_save_coproc_regs ();
12427 if (frame_pointer_needed && TARGET_ARM)
12429 /* Create the new frame pointer. */
12431 insn = GEN_INT (-(4 + args_to_push + fp_offset));
12432 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
12433 RTX_FRAME_RELATED_P (insn) = 1;
12435 if (IS_NESTED (func_type))
12437 /* Recover the static chain register. */
12438 if (!df_regs_ever_live_p (3)
12439 || saved_pretend_args)
12440 insn = gen_rtx_REG (SImode, 3);
12441 else /* if (current_function_pretend_args_size == 0) */
12443 insn = plus_constant (hard_frame_pointer_rtx, 4);
12444 insn = gen_frame_mem (SImode, insn);
12446 emit_set_insn (ip_rtx, insn);
12447 /* Add a USE to stop propagate_one_insn() from barfing. */
12448 emit_insn (gen_prologue_use (ip_rtx));
12453 offsets = arm_get_frame_offsets ();
12454 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
12456 /* This add can produce multiple insns for a large constant, so we
12457 need to get tricky. */
12458 rtx last = get_last_insn ();
12460 amount = GEN_INT (offsets->saved_args + saved_regs
12461 - offsets->outgoing_args);
12463 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
12464 amount));
12467 last = last ? NEXT_INSN (last) : get_insns ();
12468 RTX_FRAME_RELATED_P (last) = 1;
12470 while (last != insn);
12472 /* If the frame pointer is needed, emit a special barrier that
12473 will prevent the scheduler from moving stores to the frame
12474 before the stack adjustment. */
12475 if (frame_pointer_needed)
12476 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
12477 hard_frame_pointer_rtx));
12481 if (frame_pointer_needed && TARGET_THUMB2)
12482 thumb_set_frame_pointer (offsets);
12484 if (flag_pic && arm_pic_register != INVALID_REGNUM)
12486 unsigned long mask;
12488 mask = live_regs_mask;
12489 mask &= THUMB2_WORK_REGS;
12490 if (!IS_NESTED (func_type))
12491 mask |= (1 << IP_REGNUM);
12492 arm_load_pic_register (mask);
12495 /* If we are profiling, make sure no instructions are scheduled before
12496 the call to mcount. Similarly if the user has requested no
12497 scheduling in the prolog. Similarly if we want non-call exceptions
12498 using the EABI unwinder, to prevent faulting instructions from being
12499 swapped with a stack adjustment. */
12500 if (current_function_profile || !TARGET_SCHED_PROLOG
12501 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
12502 emit_insn (gen_blockage ());
12504 /* If the link register is being kept alive, with the return address in it,
12505 then make sure that it does not get reused by the ce2 pass. */
12506 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
12507 cfun->machine->lr_save_eliminated = 1;
12510 /* Print condition code to STREAM. Helper function for arm_print_operand. */
12511 static void
12512 arm_print_condition (FILE *stream)
12514 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
12516 /* Branch conversion is not implemented for Thumb-2. */
12517 if (TARGET_THUMB)
12519 output_operand_lossage ("predicated Thumb instruction");
12520 return;
12522 if (current_insn_predicate != NULL)
12524 output_operand_lossage
12525 ("predicated instruction in conditional sequence");
12526 return;
12529 fputs (arm_condition_codes[arm_current_cc], stream);
12531 else if (current_insn_predicate)
12533 enum arm_cond_code code;
12535 if (TARGET_THUMB1)
12537 output_operand_lossage ("predicated Thumb instruction");
12538 return;
12541 code = get_arm_condition_code (current_insn_predicate);
12542 fputs (arm_condition_codes[code], stream);
12547 /* If CODE is 'd', then the X is a condition operand and the instruction
12548 should only be executed if the condition is true.
12549 if CODE is 'D', then the X is a condition operand and the instruction
12550 should only be executed if the condition is false: however, if the mode
12551 of the comparison is CCFPEmode, then always execute the instruction -- we
12552 do this because in these circumstances !GE does not necessarily imply LT;
12553 in these cases the instruction pattern will take care to make sure that
12554 an instruction containing %d will follow, thereby undoing the effects of
12555 doing this instruction unconditionally.
12556 If CODE is 'N' then X is a floating point operand that must be negated
12557 before output.
12558 If CODE is 'B' then output a bitwise inverted value of X (a const int).
12559 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
12560 void
12561 arm_print_operand (FILE *stream, rtx x, int code)
12563 switch (code)
12565 case '@':
12566 fputs (ASM_COMMENT_START, stream);
12567 return;
12569 case '_':
12570 fputs (user_label_prefix, stream);
12571 return;
12573 case '|':
12574 fputs (REGISTER_PREFIX, stream);
12575 return;
12577 case '?':
12578 arm_print_condition (stream);
12579 return;
12581 case '(':
12582 /* Nothing in unified syntax, otherwise the current condition code. */
12583 if (!TARGET_UNIFIED_ASM)
12584 arm_print_condition (stream);
12585 break;
12587 case ')':
12588 /* The current condition code in unified syntax, otherwise nothing. */
12589 if (TARGET_UNIFIED_ASM)
12590 arm_print_condition (stream);
12591 break;
12593 case '.':
12594 /* The current condition code for a condition code setting instruction.
12595 Preceded by 's' in unified syntax, otherwise followed by 's'. */
12596 if (TARGET_UNIFIED_ASM)
12598 fputc('s', stream);
12599 arm_print_condition (stream);
12601 else
12603 arm_print_condition (stream);
12604 fputc('s', stream);
12606 return;
12608 case '!':
12609 /* If the instruction is conditionally executed then print
12610 the current condition code, otherwise print 's'. */
12611 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
12612 if (current_insn_predicate)
12613 arm_print_condition (stream);
12614 else
12615 fputc('s', stream);
12616 break;
12618 /* %# is a "break" sequence. It doesn't output anything, but is used to
12619 separate e.g. operand numbers from following text, if that text consists
12620 of further digits which we don't want to be part of the operand
12621 number. */
12622 case '#':
12623 return;
12625 case 'N':
12627 REAL_VALUE_TYPE r;
12628 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12629 r = REAL_VALUE_NEGATE (r);
12630 fprintf (stream, "%s", fp_const_from_val (&r));
12632 return;
12634 /* An integer without a preceding # sign. */
12635 case 'c':
12636 gcc_assert (GET_CODE (x) == CONST_INT);
12637 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12638 return;
12640 case 'B':
12641 if (GET_CODE (x) == CONST_INT)
12643 HOST_WIDE_INT val;
12644 val = ARM_SIGN_EXTEND (~INTVAL (x));
12645 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
12647 else
12649 putc ('~', stream);
12650 output_addr_const (stream, x);
12652 return;
12654 case 'L':
12655 /* The low 16 bits of an immediate constant. */
12656 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
12657 return;
12659 case 'i':
12660 fprintf (stream, "%s", arithmetic_instr (x, 1));
12661 return;
12663 /* Truncate Cirrus shift counts. */
12664 case 's':
12665 if (GET_CODE (x) == CONST_INT)
12667 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
12668 return;
12670 arm_print_operand (stream, x, 0);
12671 return;
12673 case 'I':
12674 fprintf (stream, "%s", arithmetic_instr (x, 0));
12675 return;
12677 case 'S':
12679 HOST_WIDE_INT val;
12680 const char *shift;
12682 if (!shift_operator (x, SImode))
12684 output_operand_lossage ("invalid shift operand");
12685 break;
12688 shift = shift_op (x, &val);
12690 if (shift)
12692 fprintf (stream, ", %s ", shift);
12693 if (val == -1)
12694 arm_print_operand (stream, XEXP (x, 1), 0);
12695 else
12696 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
12699 return;
12701 /* An explanation of the 'Q', 'R' and 'H' register operands:
12703 In a pair of registers containing a DI or DF value the 'Q'
12704 operand returns the register number of the register containing
12705 the least significant part of the value. The 'R' operand returns
12706 the register number of the register containing the most
12707 significant part of the value.
12709 The 'H' operand returns the higher of the two register numbers.
12710 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
12711 same as the 'Q' operand, since the most significant part of the
12712 value is held in the lower number register. The reverse is true
12713 on systems where WORDS_BIG_ENDIAN is false.
12715 The purpose of these operands is to distinguish between cases
12716 where the endian-ness of the values is important (for example
12717 when they are added together), and cases where the endian-ness
12718 is irrelevant, but the order of register operations is important.
12719 For example when loading a value from memory into a register
12720 pair, the endian-ness does not matter. Provided that the value
12721 from the lower memory address is put into the lower numbered
12722 register, and the value from the higher address is put into the
12723 higher numbered register, the load will work regardless of whether
12724 the value being loaded is big-wordian or little-wordian. The
12725 order of the two register loads can matter however, if the address
12726 of the memory location is actually held in one of the registers
12727 being overwritten by the load. */
12728 case 'Q':
12729 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12731 output_operand_lossage ("invalid operand for code '%c'", code);
12732 return;
12735 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
12736 return;
12738 case 'R':
12739 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12741 output_operand_lossage ("invalid operand for code '%c'", code);
12742 return;
12745 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
12746 return;
12748 case 'H':
12749 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12751 output_operand_lossage ("invalid operand for code '%c'", code);
12752 return;
12755 asm_fprintf (stream, "%r", REGNO (x) + 1);
12756 return;
12758 case 'J':
12759 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12761 output_operand_lossage ("invalid operand for code '%c'", code);
12762 return;
12765 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
12766 return;
12768 case 'K':
12769 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
12771 output_operand_lossage ("invalid operand for code '%c'", code);
12772 return;
12775 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
12776 return;
12778 case 'm':
12779 asm_fprintf (stream, "%r",
12780 GET_CODE (XEXP (x, 0)) == REG
12781 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
12782 return;
12784 case 'M':
12785 asm_fprintf (stream, "{%r-%r}",
12786 REGNO (x),
12787 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
12788 return;
12790 /* Like 'M', but writing doubleword vector registers, for use by Neon
12791 insns. */
12792 case 'h':
12794 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
12795 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
12796 if (numregs == 1)
12797 asm_fprintf (stream, "{d%d}", regno);
12798 else
12799 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
12801 return;
12803 case 'd':
12804 /* CONST_TRUE_RTX means always -- that's the default. */
12805 if (x == const_true_rtx)
12806 return;
12808 if (!COMPARISON_P (x))
12810 output_operand_lossage ("invalid operand for code '%c'", code);
12811 return;
12814 fputs (arm_condition_codes[get_arm_condition_code (x)],
12815 stream);
12816 return;
12818 case 'D':
12819 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
12820 want to do that. */
12821 if (x == const_true_rtx)
12823 output_operand_lossage ("instruction never executed");
12824 return;
12826 if (!COMPARISON_P (x))
12828 output_operand_lossage ("invalid operand for code '%c'", code);
12829 return;
12832 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
12833 (get_arm_condition_code (x))],
12834 stream);
12835 return;
12837 /* Cirrus registers can be accessed in a variety of ways:
12838 single floating point (f)
12839 double floating point (d)
12840 32bit integer (fx)
12841 64bit integer (dx). */
12842 case 'W': /* Cirrus register in F mode. */
12843 case 'X': /* Cirrus register in D mode. */
12844 case 'Y': /* Cirrus register in FX mode. */
12845 case 'Z': /* Cirrus register in DX mode. */
12846 gcc_assert (GET_CODE (x) == REG
12847 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
12849 fprintf (stream, "mv%s%s",
12850 code == 'W' ? "f"
12851 : code == 'X' ? "d"
12852 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
12854 return;
12856 /* Print cirrus register in the mode specified by the register's mode. */
12857 case 'V':
12859 int mode = GET_MODE (x);
12861 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
12863 output_operand_lossage ("invalid operand for code '%c'", code);
12864 return;
12867 fprintf (stream, "mv%s%s",
12868 mode == DFmode ? "d"
12869 : mode == SImode ? "fx"
12870 : mode == DImode ? "dx"
12871 : "f", reg_names[REGNO (x)] + 2);
12873 return;
12876 case 'U':
12877 if (GET_CODE (x) != REG
12878 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
12879 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
12880 /* Bad value for wCG register number. */
12882 output_operand_lossage ("invalid operand for code '%c'", code);
12883 return;
12886 else
12887 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
12888 return;
12890 /* Print an iWMMXt control register name. */
12891 case 'w':
12892 if (GET_CODE (x) != CONST_INT
12893 || INTVAL (x) < 0
12894 || INTVAL (x) >= 16)
12895 /* Bad value for wC register number. */
12897 output_operand_lossage ("invalid operand for code '%c'", code);
12898 return;
12901 else
12903 static const char * wc_reg_names [16] =
12905 "wCID", "wCon", "wCSSF", "wCASF",
12906 "wC4", "wC5", "wC6", "wC7",
12907 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
12908 "wC12", "wC13", "wC14", "wC15"
12911 fprintf (stream, wc_reg_names [INTVAL (x)]);
12913 return;
12915 /* Print a VFP/Neon double precision or quad precision register name. */
12916 case 'P':
12917 case 'q':
12919 int mode = GET_MODE (x);
12920 int is_quad = (code == 'q');
12921 int regno;
12923 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
12925 output_operand_lossage ("invalid operand for code '%c'", code);
12926 return;
12929 if (GET_CODE (x) != REG
12930 || !IS_VFP_REGNUM (REGNO (x)))
12932 output_operand_lossage ("invalid operand for code '%c'", code);
12933 return;
12936 regno = REGNO (x);
12937 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
12938 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
12940 output_operand_lossage ("invalid operand for code '%c'", code);
12941 return;
12944 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
12945 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
12947 return;
12949 /* These two codes print the low/high doubleword register of a Neon quad
12950 register, respectively. For pair-structure types, can also print
12951 low/high quadword registers. */
12952 case 'e':
12953 case 'f':
12955 int mode = GET_MODE (x);
12956 int regno;
12958 if ((GET_MODE_SIZE (mode) != 16
12959 && GET_MODE_SIZE (mode) != 32) || GET_CODE (x) != REG)
12961 output_operand_lossage ("invalid operand for code '%c'", code);
12962 return;
12965 regno = REGNO (x);
12966 if (!NEON_REGNO_OK_FOR_QUAD (regno))
12968 output_operand_lossage ("invalid operand for code '%c'", code);
12969 return;
12972 if (GET_MODE_SIZE (mode) == 16)
12973 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
12974 + (code == 'f' ? 1 : 0));
12975 else
12976 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
12977 + (code == 'f' ? 1 : 0));
12979 return;
12981 /* Print a VFPv3 floating-point constant, represented as an integer
12982 index. */
12983 case 'G':
12985 int index = vfp3_const_double_index (x);
12986 gcc_assert (index != -1);
12987 fprintf (stream, "%d", index);
12989 return;
12991 /* Print bits representing opcode features for Neon.
12993 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
12994 and polynomials as unsigned.
12996 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
12998 Bit 2 is 1 for rounding functions, 0 otherwise. */
13000 /* Identify the type as 's', 'u', 'p' or 'f'. */
13001 case 'T':
13003 HOST_WIDE_INT bits = INTVAL (x);
13004 fputc ("uspf"[bits & 3], stream);
13006 return;
13008 /* Likewise, but signed and unsigned integers are both 'i'. */
13009 case 'F':
13011 HOST_WIDE_INT bits = INTVAL (x);
13012 fputc ("iipf"[bits & 3], stream);
13014 return;
13016 /* As for 'T', but emit 'u' instead of 'p'. */
13017 case 't':
13019 HOST_WIDE_INT bits = INTVAL (x);
13020 fputc ("usuf"[bits & 3], stream);
13022 return;
13024 /* Bit 2: rounding (vs none). */
13025 case 'O':
13027 HOST_WIDE_INT bits = INTVAL (x);
13028 fputs ((bits & 4) != 0 ? "r" : "", stream);
13030 return;
13032 default:
13033 if (x == 0)
13035 output_operand_lossage ("missing operand");
13036 return;
13039 switch (GET_CODE (x))
13041 case REG:
13042 asm_fprintf (stream, "%r", REGNO (x));
13043 break;
13045 case MEM:
13046 output_memory_reference_mode = GET_MODE (x);
13047 output_address (XEXP (x, 0));
13048 break;
13050 case CONST_DOUBLE:
13051 if (TARGET_NEON)
13053 char fpstr[20];
13054 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
13055 sizeof (fpstr), 0, 1);
13056 fprintf (stream, "#%s", fpstr);
13058 else
13059 fprintf (stream, "#%s", fp_immediate_constant (x));
13060 break;
13062 default:
13063 gcc_assert (GET_CODE (x) != NEG);
13064 fputc ('#', stream);
13065 output_addr_const (stream, x);
13066 break;
13071 /* Target hook for assembling integer objects. The ARM version needs to
13072 handle word-sized values specially. */
13073 static bool
13074 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
13076 enum machine_mode mode;
13078 if (size == UNITS_PER_WORD && aligned_p)
13080 fputs ("\t.word\t", asm_out_file);
13081 output_addr_const (asm_out_file, x);
13083 /* Mark symbols as position independent. We only do this in the
13084 .text segment, not in the .data segment. */
13085 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
13086 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
13088 /* See legitimize_pic_address for an explanation of the
13089 TARGET_VXWORKS_RTP check. */
13090 if (TARGET_VXWORKS_RTP
13091 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
13092 fputs ("(GOT)", asm_out_file);
13093 else
13094 fputs ("(GOTOFF)", asm_out_file);
13096 fputc ('\n', asm_out_file);
13097 return true;
13100 mode = GET_MODE (x);
13102 if (arm_vector_mode_supported_p (mode))
13104 int i, units;
13105 unsigned int invmask = 0, parts_per_word;
13107 gcc_assert (GET_CODE (x) == CONST_VECTOR);
13109 units = CONST_VECTOR_NUNITS (x);
13110 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
13112 /* For big-endian Neon vectors, we must permute the vector to the form
13113 which, when loaded by a VLDR or VLDM instruction, will give a vector
13114 with the elements in the right order. */
13115 if (TARGET_NEON && WORDS_BIG_ENDIAN)
13117 parts_per_word = UNITS_PER_WORD / size;
13118 /* FIXME: This might be wrong for 64-bit vector elements, but we don't
13119 support those anywhere yet. */
13120 invmask = (parts_per_word == 0) ? 0 : (1 << (parts_per_word - 1)) - 1;
13123 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13124 for (i = 0; i < units; i++)
13126 rtx elt = CONST_VECTOR_ELT (x, i ^ invmask);
13127 assemble_integer
13128 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
13130 else
13131 for (i = 0; i < units; i++)
13133 rtx elt = CONST_VECTOR_ELT (x, i);
13134 REAL_VALUE_TYPE rval;
13136 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
13138 assemble_real
13139 (rval, GET_MODE_INNER (mode),
13140 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
13143 return true;
13146 return default_assemble_integer (x, size, aligned_p);
13149 static void
13150 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
13152 section *s;
13154 if (!TARGET_AAPCS_BASED)
13156 (is_ctor ?
13157 default_named_section_asm_out_constructor
13158 : default_named_section_asm_out_destructor) (symbol, priority);
13159 return;
13162 /* Put these in the .init_array section, using a special relocation. */
13163 if (priority != DEFAULT_INIT_PRIORITY)
13165 char buf[18];
13166 sprintf (buf, "%s.%.5u",
13167 is_ctor ? ".init_array" : ".fini_array",
13168 priority);
13169 s = get_section (buf, SECTION_WRITE, NULL_TREE);
13171 else if (is_ctor)
13172 s = ctors_section;
13173 else
13174 s = dtors_section;
13176 switch_to_section (s);
13177 assemble_align (POINTER_SIZE);
13178 fputs ("\t.word\t", asm_out_file);
13179 output_addr_const (asm_out_file, symbol);
13180 fputs ("(target1)\n", asm_out_file);
13183 /* Add a function to the list of static constructors. */
13185 static void
13186 arm_elf_asm_constructor (rtx symbol, int priority)
13188 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
13191 /* Add a function to the list of static destructors. */
13193 static void
13194 arm_elf_asm_destructor (rtx symbol, int priority)
13196 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
13199 /* A finite state machine takes care of noticing whether or not instructions
13200 can be conditionally executed, and thus decrease execution time and code
13201 size by deleting branch instructions. The fsm is controlled by
13202 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
13204 /* The state of the fsm controlling condition codes are:
13205 0: normal, do nothing special
13206 1: make ASM_OUTPUT_OPCODE not output this instruction
13207 2: make ASM_OUTPUT_OPCODE not output this instruction
13208 3: make instructions conditional
13209 4: make instructions conditional
13211 State transitions (state->state by whom under condition):
13212 0 -> 1 final_prescan_insn if the `target' is a label
13213 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
13214 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
13215 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
13216 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
13217 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
13218 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
13219 (the target insn is arm_target_insn).
13221 If the jump clobbers the conditions then we use states 2 and 4.
13223 A similar thing can be done with conditional return insns.
13225 XXX In case the `target' is an unconditional branch, this conditionalising
13226 of the instructions always reduces code size, but not always execution
13227 time. But then, I want to reduce the code size to somewhere near what
13228 /bin/cc produces. */
13230 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
13231 instructions. When a COND_EXEC instruction is seen the subsequent
13232 instructions are scanned so that multiple conditional instructions can be
13233 combined into a single IT block. arm_condexec_count and arm_condexec_mask
13234 specify the length and true/false mask for the IT block. These will be
13235 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
13237 /* Returns the index of the ARM condition code string in
13238 `arm_condition_codes'. COMPARISON should be an rtx like
13239 `(eq (...) (...))'. */
13240 static enum arm_cond_code
13241 get_arm_condition_code (rtx comparison)
13243 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
13244 int code;
13245 enum rtx_code comp_code = GET_CODE (comparison);
13247 if (GET_MODE_CLASS (mode) != MODE_CC)
13248 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
13249 XEXP (comparison, 1));
13251 switch (mode)
13253 case CC_DNEmode: code = ARM_NE; goto dominance;
13254 case CC_DEQmode: code = ARM_EQ; goto dominance;
13255 case CC_DGEmode: code = ARM_GE; goto dominance;
13256 case CC_DGTmode: code = ARM_GT; goto dominance;
13257 case CC_DLEmode: code = ARM_LE; goto dominance;
13258 case CC_DLTmode: code = ARM_LT; goto dominance;
13259 case CC_DGEUmode: code = ARM_CS; goto dominance;
13260 case CC_DGTUmode: code = ARM_HI; goto dominance;
13261 case CC_DLEUmode: code = ARM_LS; goto dominance;
13262 case CC_DLTUmode: code = ARM_CC;
13264 dominance:
13265 gcc_assert (comp_code == EQ || comp_code == NE);
13267 if (comp_code == EQ)
13268 return ARM_INVERSE_CONDITION_CODE (code);
13269 return code;
13271 case CC_NOOVmode:
13272 switch (comp_code)
13274 case NE: return ARM_NE;
13275 case EQ: return ARM_EQ;
13276 case GE: return ARM_PL;
13277 case LT: return ARM_MI;
13278 default: gcc_unreachable ();
13281 case CC_Zmode:
13282 switch (comp_code)
13284 case NE: return ARM_NE;
13285 case EQ: return ARM_EQ;
13286 default: gcc_unreachable ();
13289 case CC_Nmode:
13290 switch (comp_code)
13292 case NE: return ARM_MI;
13293 case EQ: return ARM_PL;
13294 default: gcc_unreachable ();
13297 case CCFPEmode:
13298 case CCFPmode:
13299 /* These encodings assume that AC=1 in the FPA system control
13300 byte. This allows us to handle all cases except UNEQ and
13301 LTGT. */
13302 switch (comp_code)
13304 case GE: return ARM_GE;
13305 case GT: return ARM_GT;
13306 case LE: return ARM_LS;
13307 case LT: return ARM_MI;
13308 case NE: return ARM_NE;
13309 case EQ: return ARM_EQ;
13310 case ORDERED: return ARM_VC;
13311 case UNORDERED: return ARM_VS;
13312 case UNLT: return ARM_LT;
13313 case UNLE: return ARM_LE;
13314 case UNGT: return ARM_HI;
13315 case UNGE: return ARM_PL;
13316 /* UNEQ and LTGT do not have a representation. */
13317 case UNEQ: /* Fall through. */
13318 case LTGT: /* Fall through. */
13319 default: gcc_unreachable ();
13322 case CC_SWPmode:
13323 switch (comp_code)
13325 case NE: return ARM_NE;
13326 case EQ: return ARM_EQ;
13327 case GE: return ARM_LE;
13328 case GT: return ARM_LT;
13329 case LE: return ARM_GE;
13330 case LT: return ARM_GT;
13331 case GEU: return ARM_LS;
13332 case GTU: return ARM_CC;
13333 case LEU: return ARM_CS;
13334 case LTU: return ARM_HI;
13335 default: gcc_unreachable ();
13338 case CC_Cmode:
13339 switch (comp_code)
13341 case LTU: return ARM_CS;
13342 case GEU: return ARM_CC;
13343 default: gcc_unreachable ();
13346 case CCmode:
13347 switch (comp_code)
13349 case NE: return ARM_NE;
13350 case EQ: return ARM_EQ;
13351 case GE: return ARM_GE;
13352 case GT: return ARM_GT;
13353 case LE: return ARM_LE;
13354 case LT: return ARM_LT;
13355 case GEU: return ARM_CS;
13356 case GTU: return ARM_HI;
13357 case LEU: return ARM_LS;
13358 case LTU: return ARM_CC;
13359 default: gcc_unreachable ();
13362 default: gcc_unreachable ();
13366 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
13367 instructions. */
13368 void
13369 thumb2_final_prescan_insn (rtx insn)
13371 rtx first_insn = insn;
13372 rtx body = PATTERN (insn);
13373 rtx predicate;
13374 enum arm_cond_code code;
13375 int n;
13376 int mask;
13378 /* Remove the previous insn from the count of insns to be output. */
13379 if (arm_condexec_count)
13380 arm_condexec_count--;
13382 /* Nothing to do if we are already inside a conditional block. */
13383 if (arm_condexec_count)
13384 return;
13386 if (GET_CODE (body) != COND_EXEC)
13387 return;
13389 /* Conditional jumps are implemented directly. */
13390 if (GET_CODE (insn) == JUMP_INSN)
13391 return;
13393 predicate = COND_EXEC_TEST (body);
13394 arm_current_cc = get_arm_condition_code (predicate);
13396 n = get_attr_ce_count (insn);
13397 arm_condexec_count = 1;
13398 arm_condexec_mask = (1 << n) - 1;
13399 arm_condexec_masklen = n;
13400 /* See if subsequent instructions can be combined into the same block. */
13401 for (;;)
13403 insn = next_nonnote_insn (insn);
13405 /* Jumping into the middle of an IT block is illegal, so a label or
13406 barrier terminates the block. */
13407 if (GET_CODE (insn) != INSN && GET_CODE(insn) != JUMP_INSN)
13408 break;
13410 body = PATTERN (insn);
13411 /* USE and CLOBBER aren't really insns, so just skip them. */
13412 if (GET_CODE (body) == USE
13413 || GET_CODE (body) == CLOBBER)
13414 continue;
13416 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
13417 if (GET_CODE (body) != COND_EXEC)
13418 break;
13419 /* Allow up to 4 conditionally executed instructions in a block. */
13420 n = get_attr_ce_count (insn);
13421 if (arm_condexec_masklen + n > 4)
13422 break;
13424 predicate = COND_EXEC_TEST (body);
13425 code = get_arm_condition_code (predicate);
13426 mask = (1 << n) - 1;
13427 if (arm_current_cc == code)
13428 arm_condexec_mask |= (mask << arm_condexec_masklen);
13429 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
13430 break;
13432 arm_condexec_count++;
13433 arm_condexec_masklen += n;
13435 /* A jump must be the last instruction in a conditional block. */
13436 if (GET_CODE(insn) == JUMP_INSN)
13437 break;
13439 /* Restore recog_data (getting the attributes of other insns can
13440 destroy this array, but final.c assumes that it remains intact
13441 across this call). */
13442 extract_constrain_insn_cached (first_insn);
13445 void
13446 arm_final_prescan_insn (rtx insn)
13448 /* BODY will hold the body of INSN. */
13449 rtx body = PATTERN (insn);
13451 /* This will be 1 if trying to repeat the trick, and things need to be
13452 reversed if it appears to fail. */
13453 int reverse = 0;
13455 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
13456 taken are clobbered, even if the rtl suggests otherwise. It also
13457 means that we have to grub around within the jump expression to find
13458 out what the conditions are when the jump isn't taken. */
13459 int jump_clobbers = 0;
13461 /* If we start with a return insn, we only succeed if we find another one. */
13462 int seeking_return = 0;
13464 /* START_INSN will hold the insn from where we start looking. This is the
13465 first insn after the following code_label if REVERSE is true. */
13466 rtx start_insn = insn;
13468 /* If in state 4, check if the target branch is reached, in order to
13469 change back to state 0. */
13470 if (arm_ccfsm_state == 4)
13472 if (insn == arm_target_insn)
13474 arm_target_insn = NULL;
13475 arm_ccfsm_state = 0;
13477 return;
13480 /* If in state 3, it is possible to repeat the trick, if this insn is an
13481 unconditional branch to a label, and immediately following this branch
13482 is the previous target label which is only used once, and the label this
13483 branch jumps to is not too far off. */
13484 if (arm_ccfsm_state == 3)
13486 if (simplejump_p (insn))
13488 start_insn = next_nonnote_insn (start_insn);
13489 if (GET_CODE (start_insn) == BARRIER)
13491 /* XXX Isn't this always a barrier? */
13492 start_insn = next_nonnote_insn (start_insn);
13494 if (GET_CODE (start_insn) == CODE_LABEL
13495 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13496 && LABEL_NUSES (start_insn) == 1)
13497 reverse = TRUE;
13498 else
13499 return;
13501 else if (GET_CODE (body) == RETURN)
13503 start_insn = next_nonnote_insn (start_insn);
13504 if (GET_CODE (start_insn) == BARRIER)
13505 start_insn = next_nonnote_insn (start_insn);
13506 if (GET_CODE (start_insn) == CODE_LABEL
13507 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
13508 && LABEL_NUSES (start_insn) == 1)
13510 reverse = TRUE;
13511 seeking_return = 1;
13513 else
13514 return;
13516 else
13517 return;
13520 gcc_assert (!arm_ccfsm_state || reverse);
13521 if (GET_CODE (insn) != JUMP_INSN)
13522 return;
13524 /* This jump might be paralleled with a clobber of the condition codes
13525 the jump should always come first */
13526 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
13527 body = XVECEXP (body, 0, 0);
13529 if (reverse
13530 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
13531 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
13533 int insns_skipped;
13534 int fail = FALSE, succeed = FALSE;
13535 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
13536 int then_not_else = TRUE;
13537 rtx this_insn = start_insn, label = 0;
13539 /* If the jump cannot be done with one instruction, we cannot
13540 conditionally execute the instruction in the inverse case. */
13541 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
13543 jump_clobbers = 1;
13544 return;
13547 /* Register the insn jumped to. */
13548 if (reverse)
13550 if (!seeking_return)
13551 label = XEXP (SET_SRC (body), 0);
13553 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
13554 label = XEXP (XEXP (SET_SRC (body), 1), 0);
13555 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
13557 label = XEXP (XEXP (SET_SRC (body), 2), 0);
13558 then_not_else = FALSE;
13560 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
13561 seeking_return = 1;
13562 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
13564 seeking_return = 1;
13565 then_not_else = FALSE;
13567 else
13568 gcc_unreachable ();
13570 /* See how many insns this branch skips, and what kind of insns. If all
13571 insns are okay, and the label or unconditional branch to the same
13572 label is not too far away, succeed. */
13573 for (insns_skipped = 0;
13574 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
13576 rtx scanbody;
13578 this_insn = next_nonnote_insn (this_insn);
13579 if (!this_insn)
13580 break;
13582 switch (GET_CODE (this_insn))
13584 case CODE_LABEL:
13585 /* Succeed if it is the target label, otherwise fail since
13586 control falls in from somewhere else. */
13587 if (this_insn == label)
13589 if (jump_clobbers)
13591 arm_ccfsm_state = 2;
13592 this_insn = next_nonnote_insn (this_insn);
13594 else
13595 arm_ccfsm_state = 1;
13596 succeed = TRUE;
13598 else
13599 fail = TRUE;
13600 break;
13602 case BARRIER:
13603 /* Succeed if the following insn is the target label.
13604 Otherwise fail.
13605 If return insns are used then the last insn in a function
13606 will be a barrier. */
13607 this_insn = next_nonnote_insn (this_insn);
13608 if (this_insn && this_insn == label)
13610 if (jump_clobbers)
13612 arm_ccfsm_state = 2;
13613 this_insn = next_nonnote_insn (this_insn);
13615 else
13616 arm_ccfsm_state = 1;
13617 succeed = TRUE;
13619 else
13620 fail = TRUE;
13621 break;
13623 case CALL_INSN:
13624 /* The AAPCS says that conditional calls should not be
13625 used since they make interworking inefficient (the
13626 linker can't transform BL<cond> into BLX). That's
13627 only a problem if the machine has BLX. */
13628 if (arm_arch5)
13630 fail = TRUE;
13631 break;
13634 /* Succeed if the following insn is the target label, or
13635 if the following two insns are a barrier and the
13636 target label. */
13637 this_insn = next_nonnote_insn (this_insn);
13638 if (this_insn && GET_CODE (this_insn) == BARRIER)
13639 this_insn = next_nonnote_insn (this_insn);
13641 if (this_insn && this_insn == label
13642 && insns_skipped < max_insns_skipped)
13644 if (jump_clobbers)
13646 arm_ccfsm_state = 2;
13647 this_insn = next_nonnote_insn (this_insn);
13649 else
13650 arm_ccfsm_state = 1;
13651 succeed = TRUE;
13653 else
13654 fail = TRUE;
13655 break;
13657 case JUMP_INSN:
13658 /* If this is an unconditional branch to the same label, succeed.
13659 If it is to another label, do nothing. If it is conditional,
13660 fail. */
13661 /* XXX Probably, the tests for SET and the PC are
13662 unnecessary. */
13664 scanbody = PATTERN (this_insn);
13665 if (GET_CODE (scanbody) == SET
13666 && GET_CODE (SET_DEST (scanbody)) == PC)
13668 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
13669 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
13671 arm_ccfsm_state = 2;
13672 succeed = TRUE;
13674 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
13675 fail = TRUE;
13677 /* Fail if a conditional return is undesirable (e.g. on a
13678 StrongARM), but still allow this if optimizing for size. */
13679 else if (GET_CODE (scanbody) == RETURN
13680 && !use_return_insn (TRUE, NULL)
13681 && !optimize_size)
13682 fail = TRUE;
13683 else if (GET_CODE (scanbody) == RETURN
13684 && seeking_return)
13686 arm_ccfsm_state = 2;
13687 succeed = TRUE;
13689 else if (GET_CODE (scanbody) == PARALLEL)
13691 switch (get_attr_conds (this_insn))
13693 case CONDS_NOCOND:
13694 break;
13695 default:
13696 fail = TRUE;
13697 break;
13700 else
13701 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
13703 break;
13705 case INSN:
13706 /* Instructions using or affecting the condition codes make it
13707 fail. */
13708 scanbody = PATTERN (this_insn);
13709 if (!(GET_CODE (scanbody) == SET
13710 || GET_CODE (scanbody) == PARALLEL)
13711 || get_attr_conds (this_insn) != CONDS_NOCOND)
13712 fail = TRUE;
13714 /* A conditional cirrus instruction must be followed by
13715 a non Cirrus instruction. However, since we
13716 conditionalize instructions in this function and by
13717 the time we get here we can't add instructions
13718 (nops), because shorten_branches() has already been
13719 called, we will disable conditionalizing Cirrus
13720 instructions to be safe. */
13721 if (GET_CODE (scanbody) != USE
13722 && GET_CODE (scanbody) != CLOBBER
13723 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
13724 fail = TRUE;
13725 break;
13727 default:
13728 break;
13731 if (succeed)
13733 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
13734 arm_target_label = CODE_LABEL_NUMBER (label);
13735 else
13737 gcc_assert (seeking_return || arm_ccfsm_state == 2);
13739 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
13741 this_insn = next_nonnote_insn (this_insn);
13742 gcc_assert (!this_insn
13743 || (GET_CODE (this_insn) != BARRIER
13744 && GET_CODE (this_insn) != CODE_LABEL));
13746 if (!this_insn)
13748 /* Oh, dear! we ran off the end.. give up. */
13749 extract_constrain_insn_cached (insn);
13750 arm_ccfsm_state = 0;
13751 arm_target_insn = NULL;
13752 return;
13754 arm_target_insn = this_insn;
13756 if (jump_clobbers)
13758 gcc_assert (!reverse);
13759 arm_current_cc =
13760 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
13761 0), 0), 1));
13762 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
13763 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13764 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
13765 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13767 else
13769 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
13770 what it was. */
13771 if (!reverse)
13772 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
13773 0));
13776 if (reverse || then_not_else)
13777 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
13780 /* Restore recog_data (getting the attributes of other insns can
13781 destroy this array, but final.c assumes that it remains intact
13782 across this call. */
13783 extract_constrain_insn_cached (insn);
13787 /* Output IT instructions. */
13788 void
13789 thumb2_asm_output_opcode (FILE * stream)
13791 char buff[5];
13792 int n;
13794 if (arm_condexec_mask)
13796 for (n = 0; n < arm_condexec_masklen; n++)
13797 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
13798 buff[n] = 0;
13799 asm_fprintf(stream, "i%s\t%s\n\t", buff,
13800 arm_condition_codes[arm_current_cc]);
13801 arm_condexec_mask = 0;
13805 /* Returns true if REGNO is a valid register
13806 for holding a quantity of type MODE. */
13808 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
13810 if (GET_MODE_CLASS (mode) == MODE_CC)
13811 return (regno == CC_REGNUM
13812 || (TARGET_HARD_FLOAT && TARGET_VFP
13813 && regno == VFPCC_REGNUM));
13815 if (TARGET_THUMB1)
13816 /* For the Thumb we only allow values bigger than SImode in
13817 registers 0 - 6, so that there is always a second low
13818 register available to hold the upper part of the value.
13819 We probably we ought to ensure that the register is the
13820 start of an even numbered register pair. */
13821 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
13823 if (TARGET_HARD_FLOAT && TARGET_MAVERICK
13824 && IS_CIRRUS_REGNUM (regno))
13825 /* We have outlawed SI values in Cirrus registers because they
13826 reside in the lower 32 bits, but SF values reside in the
13827 upper 32 bits. This causes gcc all sorts of grief. We can't
13828 even split the registers into pairs because Cirrus SI values
13829 get sign extended to 64bits-- aldyh. */
13830 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
13832 if (TARGET_HARD_FLOAT && TARGET_VFP
13833 && IS_VFP_REGNUM (regno))
13835 if (mode == SFmode || mode == SImode)
13836 return VFP_REGNO_OK_FOR_SINGLE (regno);
13838 if (mode == DFmode)
13839 return VFP_REGNO_OK_FOR_DOUBLE (regno);
13841 if (TARGET_NEON)
13842 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
13843 || (VALID_NEON_QREG_MODE (mode)
13844 && NEON_REGNO_OK_FOR_QUAD (regno))
13845 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
13846 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
13847 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
13848 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
13849 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
13851 return FALSE;
13854 if (TARGET_REALLY_IWMMXT)
13856 if (IS_IWMMXT_GR_REGNUM (regno))
13857 return mode == SImode;
13859 if (IS_IWMMXT_REGNUM (regno))
13860 return VALID_IWMMXT_REG_MODE (mode);
13863 /* We allow any value to be stored in the general registers.
13864 Restrict doubleword quantities to even register pairs so that we can
13865 use ldrd. Do not allow Neon structure opaque modes in general registers;
13866 they would use too many. */
13867 if (regno <= LAST_ARM_REGNUM)
13868 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0)
13869 && !VALID_NEON_STRUCT_MODE (mode);
13871 if (regno == FRAME_POINTER_REGNUM
13872 || regno == ARG_POINTER_REGNUM)
13873 /* We only allow integers in the fake hard registers. */
13874 return GET_MODE_CLASS (mode) == MODE_INT;
13876 /* The only registers left are the FPA registers
13877 which we only allow to hold FP values. */
13878 return (TARGET_HARD_FLOAT && TARGET_FPA
13879 && GET_MODE_CLASS (mode) == MODE_FLOAT
13880 && regno >= FIRST_FPA_REGNUM
13881 && regno <= LAST_FPA_REGNUM);
13884 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
13885 not used in arm mode. */
13887 arm_regno_class (int regno)
13889 if (TARGET_THUMB1)
13891 if (regno == STACK_POINTER_REGNUM)
13892 return STACK_REG;
13893 if (regno == CC_REGNUM)
13894 return CC_REG;
13895 if (regno < 8)
13896 return LO_REGS;
13897 return HI_REGS;
13900 if (TARGET_THUMB2 && regno < 8)
13901 return LO_REGS;
13903 if ( regno <= LAST_ARM_REGNUM
13904 || regno == FRAME_POINTER_REGNUM
13905 || regno == ARG_POINTER_REGNUM)
13906 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
13908 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
13909 return TARGET_THUMB2 ? CC_REG : NO_REGS;
13911 if (IS_CIRRUS_REGNUM (regno))
13912 return CIRRUS_REGS;
13914 if (IS_VFP_REGNUM (regno))
13916 if (regno <= D7_VFP_REGNUM)
13917 return VFP_D0_D7_REGS;
13918 else if (regno <= LAST_LO_VFP_REGNUM)
13919 return VFP_LO_REGS;
13920 else
13921 return VFP_HI_REGS;
13924 if (IS_IWMMXT_REGNUM (regno))
13925 return IWMMXT_REGS;
13927 if (IS_IWMMXT_GR_REGNUM (regno))
13928 return IWMMXT_GR_REGS;
13930 return FPA_REGS;
13933 /* Handle a special case when computing the offset
13934 of an argument from the frame pointer. */
13936 arm_debugger_arg_offset (int value, rtx addr)
13938 rtx insn;
13940 /* We are only interested if dbxout_parms() failed to compute the offset. */
13941 if (value != 0)
13942 return 0;
13944 /* We can only cope with the case where the address is held in a register. */
13945 if (GET_CODE (addr) != REG)
13946 return 0;
13948 /* If we are using the frame pointer to point at the argument, then
13949 an offset of 0 is correct. */
13950 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
13951 return 0;
13953 /* If we are using the stack pointer to point at the
13954 argument, then an offset of 0 is correct. */
13955 /* ??? Check this is consistent with thumb2 frame layout. */
13956 if ((TARGET_THUMB || !frame_pointer_needed)
13957 && REGNO (addr) == SP_REGNUM)
13958 return 0;
13960 /* Oh dear. The argument is pointed to by a register rather
13961 than being held in a register, or being stored at a known
13962 offset from the frame pointer. Since GDB only understands
13963 those two kinds of argument we must translate the address
13964 held in the register into an offset from the frame pointer.
13965 We do this by searching through the insns for the function
13966 looking to see where this register gets its value. If the
13967 register is initialized from the frame pointer plus an offset
13968 then we are in luck and we can continue, otherwise we give up.
13970 This code is exercised by producing debugging information
13971 for a function with arguments like this:
13973 double func (double a, double b, int c, double d) {return d;}
13975 Without this code the stab for parameter 'd' will be set to
13976 an offset of 0 from the frame pointer, rather than 8. */
13978 /* The if() statement says:
13980 If the insn is a normal instruction
13981 and if the insn is setting the value in a register
13982 and if the register being set is the register holding the address of the argument
13983 and if the address is computing by an addition
13984 that involves adding to a register
13985 which is the frame pointer
13986 a constant integer
13988 then... */
13990 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13992 if ( GET_CODE (insn) == INSN
13993 && GET_CODE (PATTERN (insn)) == SET
13994 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
13995 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
13996 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
13997 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
13998 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
14001 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
14003 break;
14007 if (value == 0)
14009 debug_rtx (addr);
14010 warning (0, "unable to compute real location of stacked parameter");
14011 value = 8; /* XXX magic hack */
14014 return value;
14017 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
14018 do \
14020 if ((MASK) & insn_flags) \
14021 add_builtin_function ((NAME), (TYPE), (CODE), \
14022 BUILT_IN_MD, NULL, NULL_TREE); \
14024 while (0)
14026 struct builtin_description
14028 const unsigned int mask;
14029 const enum insn_code icode;
14030 const char * const name;
14031 const enum arm_builtins code;
14032 const enum rtx_code comparison;
14033 const unsigned int flag;
14036 static const struct builtin_description bdesc_2arg[] =
14038 #define IWMMXT_BUILTIN(code, string, builtin) \
14039 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
14040 ARM_BUILTIN_##builtin, 0, 0 },
14042 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
14043 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
14044 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
14045 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
14046 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
14047 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
14048 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
14049 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
14050 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
14051 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
14052 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
14053 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
14054 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
14055 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
14056 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
14057 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
14058 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
14059 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
14060 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
14061 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
14062 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
14063 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
14064 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
14065 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
14066 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
14067 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
14068 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
14069 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
14070 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
14071 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
14072 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
14073 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
14074 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
14075 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
14076 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
14077 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
14078 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
14079 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
14080 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
14081 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
14082 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
14083 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
14084 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
14085 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
14086 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
14087 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
14088 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
14089 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
14090 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
14091 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
14092 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
14093 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
14094 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
14095 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
14096 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
14097 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
14098 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
14099 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
14101 #define IWMMXT_BUILTIN2(code, builtin) \
14102 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
14104 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
14105 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
14106 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
14107 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
14108 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
14109 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
14110 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
14111 IWMMXT_BUILTIN2 (ashlv4hi3_iwmmxt, WSLLHI)
14112 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
14113 IWMMXT_BUILTIN2 (ashlv2si3_iwmmxt, WSLLWI)
14114 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
14115 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
14116 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
14117 IWMMXT_BUILTIN2 (lshrv4hi3_iwmmxt, WSRLHI)
14118 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
14119 IWMMXT_BUILTIN2 (lshrv2si3_iwmmxt, WSRLWI)
14120 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
14121 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
14122 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
14123 IWMMXT_BUILTIN2 (ashrv4hi3_iwmmxt, WSRAHI)
14124 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
14125 IWMMXT_BUILTIN2 (ashrv2si3_iwmmxt, WSRAWI)
14126 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
14127 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
14128 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
14129 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
14130 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
14131 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
14132 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
14133 IWMMXT_BUILTIN2 (rordi3, WRORDI)
14134 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
14135 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
14138 static const struct builtin_description bdesc_1arg[] =
14140 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
14141 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
14142 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
14143 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
14144 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
14145 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
14146 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
14147 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
14148 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
14149 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
14150 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
14151 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
14152 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
14153 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
14154 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
14155 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
14156 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
14157 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
14160 /* Set up all the iWMMXt builtins. This is
14161 not called if TARGET_IWMMXT is zero. */
14163 static void
14164 arm_init_iwmmxt_builtins (void)
14166 const struct builtin_description * d;
14167 size_t i;
14168 tree endlink = void_list_node;
14170 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14171 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14172 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14174 tree int_ftype_int
14175 = build_function_type (integer_type_node,
14176 tree_cons (NULL_TREE, integer_type_node, endlink));
14177 tree v8qi_ftype_v8qi_v8qi_int
14178 = build_function_type (V8QI_type_node,
14179 tree_cons (NULL_TREE, V8QI_type_node,
14180 tree_cons (NULL_TREE, V8QI_type_node,
14181 tree_cons (NULL_TREE,
14182 integer_type_node,
14183 endlink))));
14184 tree v4hi_ftype_v4hi_int
14185 = build_function_type (V4HI_type_node,
14186 tree_cons (NULL_TREE, V4HI_type_node,
14187 tree_cons (NULL_TREE, integer_type_node,
14188 endlink)));
14189 tree v2si_ftype_v2si_int
14190 = build_function_type (V2SI_type_node,
14191 tree_cons (NULL_TREE, V2SI_type_node,
14192 tree_cons (NULL_TREE, integer_type_node,
14193 endlink)));
14194 tree v2si_ftype_di_di
14195 = build_function_type (V2SI_type_node,
14196 tree_cons (NULL_TREE, long_long_integer_type_node,
14197 tree_cons (NULL_TREE, long_long_integer_type_node,
14198 endlink)));
14199 tree di_ftype_di_int
14200 = build_function_type (long_long_integer_type_node,
14201 tree_cons (NULL_TREE, long_long_integer_type_node,
14202 tree_cons (NULL_TREE, integer_type_node,
14203 endlink)));
14204 tree di_ftype_di_int_int
14205 = build_function_type (long_long_integer_type_node,
14206 tree_cons (NULL_TREE, long_long_integer_type_node,
14207 tree_cons (NULL_TREE, integer_type_node,
14208 tree_cons (NULL_TREE,
14209 integer_type_node,
14210 endlink))));
14211 tree int_ftype_v8qi
14212 = build_function_type (integer_type_node,
14213 tree_cons (NULL_TREE, V8QI_type_node,
14214 endlink));
14215 tree int_ftype_v4hi
14216 = build_function_type (integer_type_node,
14217 tree_cons (NULL_TREE, V4HI_type_node,
14218 endlink));
14219 tree int_ftype_v2si
14220 = build_function_type (integer_type_node,
14221 tree_cons (NULL_TREE, V2SI_type_node,
14222 endlink));
14223 tree int_ftype_v8qi_int
14224 = build_function_type (integer_type_node,
14225 tree_cons (NULL_TREE, V8QI_type_node,
14226 tree_cons (NULL_TREE, integer_type_node,
14227 endlink)));
14228 tree int_ftype_v4hi_int
14229 = build_function_type (integer_type_node,
14230 tree_cons (NULL_TREE, V4HI_type_node,
14231 tree_cons (NULL_TREE, integer_type_node,
14232 endlink)));
14233 tree int_ftype_v2si_int
14234 = build_function_type (integer_type_node,
14235 tree_cons (NULL_TREE, V2SI_type_node,
14236 tree_cons (NULL_TREE, integer_type_node,
14237 endlink)));
14238 tree v8qi_ftype_v8qi_int_int
14239 = build_function_type (V8QI_type_node,
14240 tree_cons (NULL_TREE, V8QI_type_node,
14241 tree_cons (NULL_TREE, integer_type_node,
14242 tree_cons (NULL_TREE,
14243 integer_type_node,
14244 endlink))));
14245 tree v4hi_ftype_v4hi_int_int
14246 = build_function_type (V4HI_type_node,
14247 tree_cons (NULL_TREE, V4HI_type_node,
14248 tree_cons (NULL_TREE, integer_type_node,
14249 tree_cons (NULL_TREE,
14250 integer_type_node,
14251 endlink))));
14252 tree v2si_ftype_v2si_int_int
14253 = build_function_type (V2SI_type_node,
14254 tree_cons (NULL_TREE, V2SI_type_node,
14255 tree_cons (NULL_TREE, integer_type_node,
14256 tree_cons (NULL_TREE,
14257 integer_type_node,
14258 endlink))));
14259 /* Miscellaneous. */
14260 tree v8qi_ftype_v4hi_v4hi
14261 = build_function_type (V8QI_type_node,
14262 tree_cons (NULL_TREE, V4HI_type_node,
14263 tree_cons (NULL_TREE, V4HI_type_node,
14264 endlink)));
14265 tree v4hi_ftype_v2si_v2si
14266 = build_function_type (V4HI_type_node,
14267 tree_cons (NULL_TREE, V2SI_type_node,
14268 tree_cons (NULL_TREE, V2SI_type_node,
14269 endlink)));
14270 tree v2si_ftype_v4hi_v4hi
14271 = build_function_type (V2SI_type_node,
14272 tree_cons (NULL_TREE, V4HI_type_node,
14273 tree_cons (NULL_TREE, V4HI_type_node,
14274 endlink)));
14275 tree v2si_ftype_v8qi_v8qi
14276 = build_function_type (V2SI_type_node,
14277 tree_cons (NULL_TREE, V8QI_type_node,
14278 tree_cons (NULL_TREE, V8QI_type_node,
14279 endlink)));
14280 tree v4hi_ftype_v4hi_di
14281 = build_function_type (V4HI_type_node,
14282 tree_cons (NULL_TREE, V4HI_type_node,
14283 tree_cons (NULL_TREE,
14284 long_long_integer_type_node,
14285 endlink)));
14286 tree v2si_ftype_v2si_di
14287 = build_function_type (V2SI_type_node,
14288 tree_cons (NULL_TREE, V2SI_type_node,
14289 tree_cons (NULL_TREE,
14290 long_long_integer_type_node,
14291 endlink)));
14292 tree void_ftype_int_int
14293 = build_function_type (void_type_node,
14294 tree_cons (NULL_TREE, integer_type_node,
14295 tree_cons (NULL_TREE, integer_type_node,
14296 endlink)));
14297 tree di_ftype_void
14298 = build_function_type (long_long_unsigned_type_node, endlink);
14299 tree di_ftype_v8qi
14300 = build_function_type (long_long_integer_type_node,
14301 tree_cons (NULL_TREE, V8QI_type_node,
14302 endlink));
14303 tree di_ftype_v4hi
14304 = build_function_type (long_long_integer_type_node,
14305 tree_cons (NULL_TREE, V4HI_type_node,
14306 endlink));
14307 tree di_ftype_v2si
14308 = build_function_type (long_long_integer_type_node,
14309 tree_cons (NULL_TREE, V2SI_type_node,
14310 endlink));
14311 tree v2si_ftype_v4hi
14312 = build_function_type (V2SI_type_node,
14313 tree_cons (NULL_TREE, V4HI_type_node,
14314 endlink));
14315 tree v4hi_ftype_v8qi
14316 = build_function_type (V4HI_type_node,
14317 tree_cons (NULL_TREE, V8QI_type_node,
14318 endlink));
14320 tree di_ftype_di_v4hi_v4hi
14321 = build_function_type (long_long_unsigned_type_node,
14322 tree_cons (NULL_TREE,
14323 long_long_unsigned_type_node,
14324 tree_cons (NULL_TREE, V4HI_type_node,
14325 tree_cons (NULL_TREE,
14326 V4HI_type_node,
14327 endlink))));
14329 tree di_ftype_v4hi_v4hi
14330 = build_function_type (long_long_unsigned_type_node,
14331 tree_cons (NULL_TREE, V4HI_type_node,
14332 tree_cons (NULL_TREE, V4HI_type_node,
14333 endlink)));
14335 /* Normal vector binops. */
14336 tree v8qi_ftype_v8qi_v8qi
14337 = build_function_type (V8QI_type_node,
14338 tree_cons (NULL_TREE, V8QI_type_node,
14339 tree_cons (NULL_TREE, V8QI_type_node,
14340 endlink)));
14341 tree v4hi_ftype_v4hi_v4hi
14342 = build_function_type (V4HI_type_node,
14343 tree_cons (NULL_TREE, V4HI_type_node,
14344 tree_cons (NULL_TREE, V4HI_type_node,
14345 endlink)));
14346 tree v2si_ftype_v2si_v2si
14347 = build_function_type (V2SI_type_node,
14348 tree_cons (NULL_TREE, V2SI_type_node,
14349 tree_cons (NULL_TREE, V2SI_type_node,
14350 endlink)));
14351 tree di_ftype_di_di
14352 = build_function_type (long_long_unsigned_type_node,
14353 tree_cons (NULL_TREE, long_long_unsigned_type_node,
14354 tree_cons (NULL_TREE,
14355 long_long_unsigned_type_node,
14356 endlink)));
14358 /* Add all builtins that are more or less simple operations on two
14359 operands. */
14360 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14362 /* Use one of the operands; the target can have a different mode for
14363 mask-generating compares. */
14364 enum machine_mode mode;
14365 tree type;
14367 if (d->name == 0)
14368 continue;
14370 mode = insn_data[d->icode].operand[1].mode;
14372 switch (mode)
14374 case V8QImode:
14375 type = v8qi_ftype_v8qi_v8qi;
14376 break;
14377 case V4HImode:
14378 type = v4hi_ftype_v4hi_v4hi;
14379 break;
14380 case V2SImode:
14381 type = v2si_ftype_v2si_v2si;
14382 break;
14383 case DImode:
14384 type = di_ftype_di_di;
14385 break;
14387 default:
14388 gcc_unreachable ();
14391 def_mbuiltin (d->mask, d->name, type, d->code);
14394 /* Add the remaining MMX insns with somewhat more complicated types. */
14395 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
14396 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
14397 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
14399 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
14400 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
14401 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
14402 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
14403 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
14404 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
14406 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
14407 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
14408 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
14409 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
14410 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
14411 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
14413 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
14414 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
14415 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
14416 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
14417 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
14418 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
14420 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
14421 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
14422 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
14423 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
14424 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
14425 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
14427 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
14429 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
14430 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
14431 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
14432 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
14434 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
14435 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
14436 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
14437 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
14438 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
14439 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
14440 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
14441 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
14442 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
14444 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
14445 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
14446 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
14448 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
14449 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
14450 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
14452 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
14453 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
14454 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
14455 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
14456 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
14457 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
14459 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
14460 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
14461 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
14462 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
14463 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
14464 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
14465 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
14466 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
14467 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
14468 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
14469 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
14470 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
14472 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
14473 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
14474 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
14475 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
14477 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
14478 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
14479 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
14480 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
14481 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
14482 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
14483 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
14486 static void
14487 arm_init_tls_builtins (void)
14489 tree ftype;
14490 tree nothrow = tree_cons (get_identifier ("nothrow"), NULL, NULL);
14491 tree const_nothrow = tree_cons (get_identifier ("const"), NULL, nothrow);
14493 ftype = build_function_type (ptr_type_node, void_list_node);
14494 add_builtin_function ("__builtin_thread_pointer", ftype,
14495 ARM_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
14496 NULL, const_nothrow);
14499 typedef enum {
14500 T_V8QI = 0x0001,
14501 T_V4HI = 0x0002,
14502 T_V2SI = 0x0004,
14503 T_V2SF = 0x0008,
14504 T_DI = 0x0010,
14505 T_V16QI = 0x0020,
14506 T_V8HI = 0x0040,
14507 T_V4SI = 0x0080,
14508 T_V4SF = 0x0100,
14509 T_V2DI = 0x0200,
14510 T_TI = 0x0400,
14511 T_EI = 0x0800,
14512 T_OI = 0x1000
14513 } neon_builtin_type_bits;
14515 #define v8qi_UP T_V8QI
14516 #define v4hi_UP T_V4HI
14517 #define v2si_UP T_V2SI
14518 #define v2sf_UP T_V2SF
14519 #define di_UP T_DI
14520 #define v16qi_UP T_V16QI
14521 #define v8hi_UP T_V8HI
14522 #define v4si_UP T_V4SI
14523 #define v4sf_UP T_V4SF
14524 #define v2di_UP T_V2DI
14525 #define ti_UP T_TI
14526 #define ei_UP T_EI
14527 #define oi_UP T_OI
14529 #define UP(X) X##_UP
14531 #define T_MAX 13
14533 typedef enum {
14534 NEON_BINOP,
14535 NEON_TERNOP,
14536 NEON_UNOP,
14537 NEON_GETLANE,
14538 NEON_SETLANE,
14539 NEON_CREATE,
14540 NEON_DUP,
14541 NEON_DUPLANE,
14542 NEON_COMBINE,
14543 NEON_SPLIT,
14544 NEON_LANEMUL,
14545 NEON_LANEMULL,
14546 NEON_LANEMULH,
14547 NEON_LANEMAC,
14548 NEON_SCALARMUL,
14549 NEON_SCALARMULL,
14550 NEON_SCALARMULH,
14551 NEON_SCALARMAC,
14552 NEON_CONVERT,
14553 NEON_FIXCONV,
14554 NEON_SELECT,
14555 NEON_RESULTPAIR,
14556 NEON_REINTERP,
14557 NEON_VTBL,
14558 NEON_VTBX,
14559 NEON_LOAD1,
14560 NEON_LOAD1LANE,
14561 NEON_STORE1,
14562 NEON_STORE1LANE,
14563 NEON_LOADSTRUCT,
14564 NEON_LOADSTRUCTLANE,
14565 NEON_STORESTRUCT,
14566 NEON_STORESTRUCTLANE,
14567 NEON_LOGICBINOP,
14568 NEON_SHIFTINSERT,
14569 NEON_SHIFTIMM,
14570 NEON_SHIFTACC
14571 } neon_itype;
14573 typedef struct {
14574 const char *name;
14575 const neon_itype itype;
14576 const neon_builtin_type_bits bits;
14577 const enum insn_code codes[T_MAX];
14578 const unsigned int num_vars;
14579 unsigned int base_fcode;
14580 } neon_builtin_datum;
14582 #define CF(N,X) CODE_FOR_neon_##N##X
14584 #define VAR1(T, N, A) \
14585 #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
14586 #define VAR2(T, N, A, B) \
14587 #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
14588 #define VAR3(T, N, A, B, C) \
14589 #N, NEON_##T, UP (A) | UP (B) | UP (C), \
14590 { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
14591 #define VAR4(T, N, A, B, C, D) \
14592 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
14593 { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
14594 #define VAR5(T, N, A, B, C, D, E) \
14595 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
14596 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
14597 #define VAR6(T, N, A, B, C, D, E, F) \
14598 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
14599 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
14600 #define VAR7(T, N, A, B, C, D, E, F, G) \
14601 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
14602 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14603 CF (N, G) }, 7, 0
14604 #define VAR8(T, N, A, B, C, D, E, F, G, H) \
14605 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14606 | UP (H), \
14607 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14608 CF (N, G), CF (N, H) }, 8, 0
14609 #define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
14610 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14611 | UP (H) | UP (I), \
14612 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14613 CF (N, G), CF (N, H), CF (N, I) }, 9, 0
14614 #define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
14615 #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
14616 | UP (H) | UP (I) | UP (J), \
14617 { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
14618 CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
14620 /* The mode entries in the following table correspond to the "key" type of the
14621 instruction variant, i.e. equivalent to that which would be specified after
14622 the assembler mnemonic, which usually refers to the last vector operand.
14623 (Signed/unsigned/polynomial types are not differentiated between though, and
14624 are all mapped onto the same mode for a given element size.) The modes
14625 listed per instruction should be the same as those defined for that
14626 instruction's pattern in neon.md.
14627 WARNING: Variants should be listed in the same increasing order as
14628 neon_builtin_type_bits. */
14630 static neon_builtin_datum neon_builtin_data[] =
14632 { VAR10 (BINOP, vadd,
14633 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14634 { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
14635 { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
14636 { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14637 { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14638 { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
14639 { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14640 { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14641 { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
14642 { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14643 { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
14644 { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
14645 { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
14646 { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
14647 { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
14648 { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
14649 { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
14650 { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
14651 { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
14652 { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
14653 { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
14654 { VAR2 (BINOP, vqdmull, v4hi, v2si) },
14655 { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14656 { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14657 { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14658 { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
14659 { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
14660 { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
14661 { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14662 { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14663 { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14664 { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
14665 { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14666 { VAR10 (BINOP, vsub,
14667 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14668 { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
14669 { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
14670 { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14671 { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14672 { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
14673 { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14674 { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14675 { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14676 { VAR2 (BINOP, vcage, v2sf, v4sf) },
14677 { VAR2 (BINOP, vcagt, v2sf, v4sf) },
14678 { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14679 { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14680 { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
14681 { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14682 { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
14683 { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14684 { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14685 { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
14686 { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14687 { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14688 { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
14689 { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
14690 { VAR2 (BINOP, vrecps, v2sf, v4sf) },
14691 { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
14692 { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14693 { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
14694 { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14695 { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14696 { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14697 { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14698 { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14699 { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14700 { VAR2 (UNOP, vcnt, v8qi, v16qi) },
14701 { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
14702 { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
14703 { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
14704 /* FIXME: vget_lane supports more variants than this! */
14705 { VAR10 (GETLANE, vget_lane,
14706 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14707 { VAR10 (SETLANE, vset_lane,
14708 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14709 { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
14710 { VAR10 (DUP, vdup_n,
14711 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14712 { VAR10 (DUPLANE, vdup_lane,
14713 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14714 { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
14715 { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
14716 { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
14717 { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
14718 { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
14719 { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
14720 { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
14721 { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14722 { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14723 { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
14724 { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
14725 { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14726 { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
14727 { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
14728 { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14729 { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14730 { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
14731 { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
14732 { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14733 { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
14734 { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
14735 { VAR10 (BINOP, vext,
14736 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14737 { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14738 { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
14739 { VAR2 (UNOP, vrev16, v8qi, v16qi) },
14740 { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
14741 { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
14742 { VAR10 (SELECT, vbsl,
14743 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14744 { VAR1 (VTBL, vtbl1, v8qi) },
14745 { VAR1 (VTBL, vtbl2, v8qi) },
14746 { VAR1 (VTBL, vtbl3, v8qi) },
14747 { VAR1 (VTBL, vtbl4, v8qi) },
14748 { VAR1 (VTBX, vtbx1, v8qi) },
14749 { VAR1 (VTBX, vtbx2, v8qi) },
14750 { VAR1 (VTBX, vtbx3, v8qi) },
14751 { VAR1 (VTBX, vtbx4, v8qi) },
14752 { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14753 { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14754 { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
14755 { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
14756 { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
14757 { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
14758 { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
14759 { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
14760 { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
14761 { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
14762 { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
14763 { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
14764 { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
14765 { VAR10 (LOAD1, vld1,
14766 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14767 { VAR10 (LOAD1LANE, vld1_lane,
14768 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14769 { VAR10 (LOAD1, vld1_dup,
14770 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14771 { VAR10 (STORE1, vst1,
14772 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14773 { VAR10 (STORE1LANE, vst1_lane,
14774 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14775 { VAR9 (LOADSTRUCT,
14776 vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14777 { VAR7 (LOADSTRUCTLANE, vld2_lane,
14778 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14779 { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
14780 { VAR9 (STORESTRUCT, vst2,
14781 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14782 { VAR7 (STORESTRUCTLANE, vst2_lane,
14783 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14784 { VAR9 (LOADSTRUCT,
14785 vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14786 { VAR7 (LOADSTRUCTLANE, vld3_lane,
14787 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14788 { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
14789 { VAR9 (STORESTRUCT, vst3,
14790 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14791 { VAR7 (STORESTRUCTLANE, vst3_lane,
14792 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14793 { VAR9 (LOADSTRUCT, vld4,
14794 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14795 { VAR7 (LOADSTRUCTLANE, vld4_lane,
14796 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14797 { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
14798 { VAR9 (STORESTRUCT, vst4,
14799 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
14800 { VAR7 (STORESTRUCTLANE, vst4_lane,
14801 v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
14802 { VAR10 (LOGICBINOP, vand,
14803 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14804 { VAR10 (LOGICBINOP, vorr,
14805 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14806 { VAR10 (BINOP, veor,
14807 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14808 { VAR10 (LOGICBINOP, vbic,
14809 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
14810 { VAR10 (LOGICBINOP, vorn,
14811 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
14814 #undef CF
14815 #undef VAR1
14816 #undef VAR2
14817 #undef VAR3
14818 #undef VAR4
14819 #undef VAR5
14820 #undef VAR6
14821 #undef VAR7
14822 #undef VAR8
14823 #undef VAR9
14824 #undef VAR10
14826 static void
14827 arm_init_neon_builtins (void)
14829 unsigned int i, fcode = ARM_BUILTIN_NEON_BASE;
14831 /* Create distinguished type nodes for NEON vector element types,
14832 and pointers to values of such types, so we can detect them later. */
14833 tree neon_intQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14834 tree neon_intHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14835 tree neon_polyQI_type_node = make_signed_type (GET_MODE_PRECISION (QImode));
14836 tree neon_polyHI_type_node = make_signed_type (GET_MODE_PRECISION (HImode));
14837 tree neon_intSI_type_node = make_signed_type (GET_MODE_PRECISION (SImode));
14838 tree neon_intDI_type_node = make_signed_type (GET_MODE_PRECISION (DImode));
14839 tree neon_float_type_node = make_node (REAL_TYPE);
14840 TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
14841 layout_type (neon_float_type_node);
14843 /* Define typedefs which exactly correspond to the modes we are basing vector
14844 types on. If you change these names you'll need to change
14845 the table used by arm_mangle_type too. */
14846 (*lang_hooks.types.register_builtin_type) (neon_intQI_type_node,
14847 "__builtin_neon_qi");
14848 (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
14849 "__builtin_neon_hi");
14850 (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
14851 "__builtin_neon_si");
14852 (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
14853 "__builtin_neon_sf");
14854 (*lang_hooks.types.register_builtin_type) (neon_intDI_type_node,
14855 "__builtin_neon_di");
14857 (*lang_hooks.types.register_builtin_type) (neon_polyQI_type_node,
14858 "__builtin_neon_poly8");
14859 (*lang_hooks.types.register_builtin_type) (neon_polyHI_type_node,
14860 "__builtin_neon_poly16");
14862 tree intQI_pointer_node = build_pointer_type (neon_intQI_type_node);
14863 tree intHI_pointer_node = build_pointer_type (neon_intHI_type_node);
14864 tree intSI_pointer_node = build_pointer_type (neon_intSI_type_node);
14865 tree intDI_pointer_node = build_pointer_type (neon_intDI_type_node);
14866 tree float_pointer_node = build_pointer_type (neon_float_type_node);
14868 /* Next create constant-qualified versions of the above types. */
14869 tree const_intQI_node = build_qualified_type (neon_intQI_type_node,
14870 TYPE_QUAL_CONST);
14871 tree const_intHI_node = build_qualified_type (neon_intHI_type_node,
14872 TYPE_QUAL_CONST);
14873 tree const_intSI_node = build_qualified_type (neon_intSI_type_node,
14874 TYPE_QUAL_CONST);
14875 tree const_intDI_node = build_qualified_type (neon_intDI_type_node,
14876 TYPE_QUAL_CONST);
14877 tree const_float_node = build_qualified_type (neon_float_type_node,
14878 TYPE_QUAL_CONST);
14880 tree const_intQI_pointer_node = build_pointer_type (const_intQI_node);
14881 tree const_intHI_pointer_node = build_pointer_type (const_intHI_node);
14882 tree const_intSI_pointer_node = build_pointer_type (const_intSI_node);
14883 tree const_intDI_pointer_node = build_pointer_type (const_intDI_node);
14884 tree const_float_pointer_node = build_pointer_type (const_float_node);
14886 /* Now create vector types based on our NEON element types. */
14887 /* 64-bit vectors. */
14888 tree V8QI_type_node =
14889 build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
14890 tree V4HI_type_node =
14891 build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
14892 tree V2SI_type_node =
14893 build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
14894 tree V2SF_type_node =
14895 build_vector_type_for_mode (neon_float_type_node, V2SFmode);
14896 /* 128-bit vectors. */
14897 tree V16QI_type_node =
14898 build_vector_type_for_mode (neon_intQI_type_node, V16QImode);
14899 tree V8HI_type_node =
14900 build_vector_type_for_mode (neon_intHI_type_node, V8HImode);
14901 tree V4SI_type_node =
14902 build_vector_type_for_mode (neon_intSI_type_node, V4SImode);
14903 tree V4SF_type_node =
14904 build_vector_type_for_mode (neon_float_type_node, V4SFmode);
14905 tree V2DI_type_node =
14906 build_vector_type_for_mode (neon_intDI_type_node, V2DImode);
14908 /* Unsigned integer types for various mode sizes. */
14909 tree intUQI_type_node = make_unsigned_type (GET_MODE_PRECISION (QImode));
14910 tree intUHI_type_node = make_unsigned_type (GET_MODE_PRECISION (HImode));
14911 tree intUSI_type_node = make_unsigned_type (GET_MODE_PRECISION (SImode));
14912 tree intUDI_type_node = make_unsigned_type (GET_MODE_PRECISION (DImode));
14914 (*lang_hooks.types.register_builtin_type) (intUQI_type_node,
14915 "__builtin_neon_uqi");
14916 (*lang_hooks.types.register_builtin_type) (intUHI_type_node,
14917 "__builtin_neon_uhi");
14918 (*lang_hooks.types.register_builtin_type) (intUSI_type_node,
14919 "__builtin_neon_usi");
14920 (*lang_hooks.types.register_builtin_type) (intUDI_type_node,
14921 "__builtin_neon_udi");
14923 /* Opaque integer types for structures of vectors. */
14924 tree intEI_type_node = make_signed_type (GET_MODE_PRECISION (EImode));
14925 tree intOI_type_node = make_signed_type (GET_MODE_PRECISION (OImode));
14926 tree intCI_type_node = make_signed_type (GET_MODE_PRECISION (CImode));
14927 tree intXI_type_node = make_signed_type (GET_MODE_PRECISION (XImode));
14929 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
14930 "__builtin_neon_ti");
14931 (*lang_hooks.types.register_builtin_type) (intEI_type_node,
14932 "__builtin_neon_ei");
14933 (*lang_hooks.types.register_builtin_type) (intOI_type_node,
14934 "__builtin_neon_oi");
14935 (*lang_hooks.types.register_builtin_type) (intCI_type_node,
14936 "__builtin_neon_ci");
14937 (*lang_hooks.types.register_builtin_type) (intXI_type_node,
14938 "__builtin_neon_xi");
14940 /* Pointers to vector types. */
14941 tree V8QI_pointer_node = build_pointer_type (V8QI_type_node);
14942 tree V4HI_pointer_node = build_pointer_type (V4HI_type_node);
14943 tree V2SI_pointer_node = build_pointer_type (V2SI_type_node);
14944 tree V2SF_pointer_node = build_pointer_type (V2SF_type_node);
14945 tree V16QI_pointer_node = build_pointer_type (V16QI_type_node);
14946 tree V8HI_pointer_node = build_pointer_type (V8HI_type_node);
14947 tree V4SI_pointer_node = build_pointer_type (V4SI_type_node);
14948 tree V4SF_pointer_node = build_pointer_type (V4SF_type_node);
14949 tree V2DI_pointer_node = build_pointer_type (V2DI_type_node);
14951 /* Operations which return results as pairs. */
14952 tree void_ftype_pv8qi_v8qi_v8qi =
14953 build_function_type_list (void_type_node, V8QI_pointer_node, V8QI_type_node,
14954 V8QI_type_node, NULL);
14955 tree void_ftype_pv4hi_v4hi_v4hi =
14956 build_function_type_list (void_type_node, V4HI_pointer_node, V4HI_type_node,
14957 V4HI_type_node, NULL);
14958 tree void_ftype_pv2si_v2si_v2si =
14959 build_function_type_list (void_type_node, V2SI_pointer_node, V2SI_type_node,
14960 V2SI_type_node, NULL);
14961 tree void_ftype_pv2sf_v2sf_v2sf =
14962 build_function_type_list (void_type_node, V2SF_pointer_node, V2SF_type_node,
14963 V2SF_type_node, NULL);
14964 tree void_ftype_pdi_di_di =
14965 build_function_type_list (void_type_node, intDI_pointer_node,
14966 neon_intDI_type_node, neon_intDI_type_node, NULL);
14967 tree void_ftype_pv16qi_v16qi_v16qi =
14968 build_function_type_list (void_type_node, V16QI_pointer_node,
14969 V16QI_type_node, V16QI_type_node, NULL);
14970 tree void_ftype_pv8hi_v8hi_v8hi =
14971 build_function_type_list (void_type_node, V8HI_pointer_node, V8HI_type_node,
14972 V8HI_type_node, NULL);
14973 tree void_ftype_pv4si_v4si_v4si =
14974 build_function_type_list (void_type_node, V4SI_pointer_node, V4SI_type_node,
14975 V4SI_type_node, NULL);
14976 tree void_ftype_pv4sf_v4sf_v4sf =
14977 build_function_type_list (void_type_node, V4SF_pointer_node, V4SF_type_node,
14978 V4SF_type_node, NULL);
14979 tree void_ftype_pv2di_v2di_v2di =
14980 build_function_type_list (void_type_node, V2DI_pointer_node, V2DI_type_node,
14981 V2DI_type_node, NULL);
14983 tree reinterp_ftype_dreg[5][5];
14984 tree reinterp_ftype_qreg[5][5];
14985 tree dreg_types[5], qreg_types[5];
14987 dreg_types[0] = V8QI_type_node;
14988 dreg_types[1] = V4HI_type_node;
14989 dreg_types[2] = V2SI_type_node;
14990 dreg_types[3] = V2SF_type_node;
14991 dreg_types[4] = neon_intDI_type_node;
14993 qreg_types[0] = V16QI_type_node;
14994 qreg_types[1] = V8HI_type_node;
14995 qreg_types[2] = V4SI_type_node;
14996 qreg_types[3] = V4SF_type_node;
14997 qreg_types[4] = V2DI_type_node;
14999 for (i = 0; i < 5; i++)
15001 int j;
15002 for (j = 0; j < 5; j++)
15004 reinterp_ftype_dreg[i][j]
15005 = build_function_type_list (dreg_types[i], dreg_types[j], NULL);
15006 reinterp_ftype_qreg[i][j]
15007 = build_function_type_list (qreg_types[i], qreg_types[j], NULL);
15011 for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
15013 neon_builtin_datum *d = &neon_builtin_data[i];
15014 unsigned int j, codeidx = 0;
15016 d->base_fcode = fcode;
15018 for (j = 0; j < T_MAX; j++)
15020 const char* const modenames[] = {
15021 "v8qi", "v4hi", "v2si", "v2sf", "di",
15022 "v16qi", "v8hi", "v4si", "v4sf", "v2di"
15024 char namebuf[60];
15025 tree ftype = NULL;
15026 enum insn_code icode;
15027 int is_load = 0, is_store = 0;
15029 if ((d->bits & (1 << j)) == 0)
15030 continue;
15032 icode = d->codes[codeidx++];
15034 switch (d->itype)
15036 case NEON_LOAD1:
15037 case NEON_LOAD1LANE:
15038 case NEON_LOADSTRUCT:
15039 case NEON_LOADSTRUCTLANE:
15040 is_load = 1;
15041 /* Fall through. */
15042 case NEON_STORE1:
15043 case NEON_STORE1LANE:
15044 case NEON_STORESTRUCT:
15045 case NEON_STORESTRUCTLANE:
15046 if (!is_load)
15047 is_store = 1;
15048 /* Fall through. */
15049 case NEON_UNOP:
15050 case NEON_BINOP:
15051 case NEON_LOGICBINOP:
15052 case NEON_SHIFTINSERT:
15053 case NEON_TERNOP:
15054 case NEON_GETLANE:
15055 case NEON_SETLANE:
15056 case NEON_CREATE:
15057 case NEON_DUP:
15058 case NEON_DUPLANE:
15059 case NEON_SHIFTIMM:
15060 case NEON_SHIFTACC:
15061 case NEON_COMBINE:
15062 case NEON_SPLIT:
15063 case NEON_CONVERT:
15064 case NEON_FIXCONV:
15065 case NEON_LANEMUL:
15066 case NEON_LANEMULL:
15067 case NEON_LANEMULH:
15068 case NEON_LANEMAC:
15069 case NEON_SCALARMUL:
15070 case NEON_SCALARMULL:
15071 case NEON_SCALARMULH:
15072 case NEON_SCALARMAC:
15073 case NEON_SELECT:
15074 case NEON_VTBL:
15075 case NEON_VTBX:
15077 int k;
15078 tree return_type = void_type_node, args = void_list_node;
15080 /* Build a function type directly from the insn_data for this
15081 builtin. The build_function_type() function takes care of
15082 removing duplicates for us. */
15083 for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
15085 tree eltype;
15087 if (is_load && k == 1)
15089 /* Neon load patterns always have the memory operand
15090 (a SImode pointer) in the operand 1 position. We
15091 want a const pointer to the element type in that
15092 position. */
15093 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15095 switch (1 << j)
15097 case T_V8QI:
15098 case T_V16QI:
15099 eltype = const_intQI_pointer_node;
15100 break;
15102 case T_V4HI:
15103 case T_V8HI:
15104 eltype = const_intHI_pointer_node;
15105 break;
15107 case T_V2SI:
15108 case T_V4SI:
15109 eltype = const_intSI_pointer_node;
15110 break;
15112 case T_V2SF:
15113 case T_V4SF:
15114 eltype = const_float_pointer_node;
15115 break;
15117 case T_DI:
15118 case T_V2DI:
15119 eltype = const_intDI_pointer_node;
15120 break;
15122 default: gcc_unreachable ();
15125 else if (is_store && k == 0)
15127 /* Similarly, Neon store patterns use operand 0 as
15128 the memory location to store to (a SImode pointer).
15129 Use a pointer to the element type of the store in
15130 that position. */
15131 gcc_assert (insn_data[icode].operand[k].mode == SImode);
15133 switch (1 << j)
15135 case T_V8QI:
15136 case T_V16QI:
15137 eltype = intQI_pointer_node;
15138 break;
15140 case T_V4HI:
15141 case T_V8HI:
15142 eltype = intHI_pointer_node;
15143 break;
15145 case T_V2SI:
15146 case T_V4SI:
15147 eltype = intSI_pointer_node;
15148 break;
15150 case T_V2SF:
15151 case T_V4SF:
15152 eltype = float_pointer_node;
15153 break;
15155 case T_DI:
15156 case T_V2DI:
15157 eltype = intDI_pointer_node;
15158 break;
15160 default: gcc_unreachable ();
15163 else
15165 switch (insn_data[icode].operand[k].mode)
15167 case VOIDmode: eltype = void_type_node; break;
15168 /* Scalars. */
15169 case QImode: eltype = neon_intQI_type_node; break;
15170 case HImode: eltype = neon_intHI_type_node; break;
15171 case SImode: eltype = neon_intSI_type_node; break;
15172 case SFmode: eltype = neon_float_type_node; break;
15173 case DImode: eltype = neon_intDI_type_node; break;
15174 case TImode: eltype = intTI_type_node; break;
15175 case EImode: eltype = intEI_type_node; break;
15176 case OImode: eltype = intOI_type_node; break;
15177 case CImode: eltype = intCI_type_node; break;
15178 case XImode: eltype = intXI_type_node; break;
15179 /* 64-bit vectors. */
15180 case V8QImode: eltype = V8QI_type_node; break;
15181 case V4HImode: eltype = V4HI_type_node; break;
15182 case V2SImode: eltype = V2SI_type_node; break;
15183 case V2SFmode: eltype = V2SF_type_node; break;
15184 /* 128-bit vectors. */
15185 case V16QImode: eltype = V16QI_type_node; break;
15186 case V8HImode: eltype = V8HI_type_node; break;
15187 case V4SImode: eltype = V4SI_type_node; break;
15188 case V4SFmode: eltype = V4SF_type_node; break;
15189 case V2DImode: eltype = V2DI_type_node; break;
15190 default: gcc_unreachable ();
15194 if (k == 0 && !is_store)
15195 return_type = eltype;
15196 else
15197 args = tree_cons (NULL_TREE, eltype, args);
15200 ftype = build_function_type (return_type, args);
15202 break;
15204 case NEON_RESULTPAIR:
15206 switch (insn_data[icode].operand[1].mode)
15208 case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
15209 case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
15210 case V2SImode: ftype = void_ftype_pv2si_v2si_v2si; break;
15211 case V2SFmode: ftype = void_ftype_pv2sf_v2sf_v2sf; break;
15212 case DImode: ftype = void_ftype_pdi_di_di; break;
15213 case V16QImode: ftype = void_ftype_pv16qi_v16qi_v16qi; break;
15214 case V8HImode: ftype = void_ftype_pv8hi_v8hi_v8hi; break;
15215 case V4SImode: ftype = void_ftype_pv4si_v4si_v4si; break;
15216 case V4SFmode: ftype = void_ftype_pv4sf_v4sf_v4sf; break;
15217 case V2DImode: ftype = void_ftype_pv2di_v2di_v2di; break;
15218 default: gcc_unreachable ();
15221 break;
15223 case NEON_REINTERP:
15225 /* We iterate over 5 doubleword types, then 5 quadword
15226 types. */
15227 int rhs = j % 5;
15228 switch (insn_data[icode].operand[0].mode)
15230 case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
15231 case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
15232 case V2SImode: ftype = reinterp_ftype_dreg[2][rhs]; break;
15233 case V2SFmode: ftype = reinterp_ftype_dreg[3][rhs]; break;
15234 case DImode: ftype = reinterp_ftype_dreg[4][rhs]; break;
15235 case V16QImode: ftype = reinterp_ftype_qreg[0][rhs]; break;
15236 case V8HImode: ftype = reinterp_ftype_qreg[1][rhs]; break;
15237 case V4SImode: ftype = reinterp_ftype_qreg[2][rhs]; break;
15238 case V4SFmode: ftype = reinterp_ftype_qreg[3][rhs]; break;
15239 case V2DImode: ftype = reinterp_ftype_qreg[4][rhs]; break;
15240 default: gcc_unreachable ();
15243 break;
15245 default:
15246 gcc_unreachable ();
15249 gcc_assert (ftype != NULL);
15251 sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
15253 add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
15254 NULL_TREE);
15259 static void
15260 arm_init_builtins (void)
15262 arm_init_tls_builtins ();
15264 if (TARGET_REALLY_IWMMXT)
15265 arm_init_iwmmxt_builtins ();
15267 if (TARGET_NEON)
15268 arm_init_neon_builtins ();
15271 /* Errors in the source file can cause expand_expr to return const0_rtx
15272 where we expect a vector. To avoid crashing, use one of the vector
15273 clear instructions. */
15275 static rtx
15276 safe_vector_operand (rtx x, enum machine_mode mode)
15278 if (x != const0_rtx)
15279 return x;
15280 x = gen_reg_rtx (mode);
15282 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
15283 : gen_rtx_SUBREG (DImode, x, 0)));
15284 return x;
15287 /* Subroutine of arm_expand_builtin to take care of binop insns. */
15289 static rtx
15290 arm_expand_binop_builtin (enum insn_code icode,
15291 tree exp, rtx target)
15293 rtx pat;
15294 tree arg0 = CALL_EXPR_ARG (exp, 0);
15295 tree arg1 = CALL_EXPR_ARG (exp, 1);
15296 rtx op0 = expand_normal (arg0);
15297 rtx op1 = expand_normal (arg1);
15298 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15299 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15300 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
15302 if (VECTOR_MODE_P (mode0))
15303 op0 = safe_vector_operand (op0, mode0);
15304 if (VECTOR_MODE_P (mode1))
15305 op1 = safe_vector_operand (op1, mode1);
15307 if (! target
15308 || GET_MODE (target) != tmode
15309 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15310 target = gen_reg_rtx (tmode);
15312 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
15314 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15315 op0 = copy_to_mode_reg (mode0, op0);
15316 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15317 op1 = copy_to_mode_reg (mode1, op1);
15319 pat = GEN_FCN (icode) (target, op0, op1);
15320 if (! pat)
15321 return 0;
15322 emit_insn (pat);
15323 return target;
15326 /* Subroutine of arm_expand_builtin to take care of unop insns. */
15328 static rtx
15329 arm_expand_unop_builtin (enum insn_code icode,
15330 tree exp, rtx target, int do_load)
15332 rtx pat;
15333 tree arg0 = CALL_EXPR_ARG (exp, 0);
15334 rtx op0 = expand_normal (arg0);
15335 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15336 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15338 if (! target
15339 || GET_MODE (target) != tmode
15340 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15341 target = gen_reg_rtx (tmode);
15342 if (do_load)
15343 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15344 else
15346 if (VECTOR_MODE_P (mode0))
15347 op0 = safe_vector_operand (op0, mode0);
15349 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15350 op0 = copy_to_mode_reg (mode0, op0);
15353 pat = GEN_FCN (icode) (target, op0);
15354 if (! pat)
15355 return 0;
15356 emit_insn (pat);
15357 return target;
15360 static int
15361 neon_builtin_compare (const void *a, const void *b)
15363 const neon_builtin_datum *key = a;
15364 const neon_builtin_datum *memb = b;
15365 unsigned int soughtcode = key->base_fcode;
15367 if (soughtcode >= memb->base_fcode
15368 && soughtcode < memb->base_fcode + memb->num_vars)
15369 return 0;
15370 else if (soughtcode < memb->base_fcode)
15371 return -1;
15372 else
15373 return 1;
15376 static enum insn_code
15377 locate_neon_builtin_icode (int fcode, neon_itype *itype)
15379 neon_builtin_datum key, *found;
15380 int idx;
15382 key.base_fcode = fcode;
15383 found = bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
15384 sizeof (neon_builtin_data[0]), neon_builtin_compare);
15385 gcc_assert (found);
15386 idx = fcode - (int) found->base_fcode;
15387 gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
15389 if (itype)
15390 *itype = found->itype;
15392 return found->codes[idx];
15395 typedef enum {
15396 NEON_ARG_COPY_TO_REG,
15397 NEON_ARG_CONSTANT,
15398 NEON_ARG_STOP
15399 } builtin_arg;
15401 #define NEON_MAX_BUILTIN_ARGS 5
15403 /* Expand a Neon builtin. */
15404 static rtx
15405 arm_expand_neon_args (rtx target, int icode, int have_retval,
15406 tree exp, ...)
15408 va_list ap;
15409 rtx pat;
15410 tree arg[NEON_MAX_BUILTIN_ARGS];
15411 rtx op[NEON_MAX_BUILTIN_ARGS];
15412 enum machine_mode tmode = insn_data[icode].operand[0].mode;
15413 enum machine_mode mode[NEON_MAX_BUILTIN_ARGS];
15414 int argc = 0;
15416 if (have_retval
15417 && (!target
15418 || GET_MODE (target) != tmode
15419 || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
15420 target = gen_reg_rtx (tmode);
15422 va_start (ap, exp);
15424 for (;;)
15426 builtin_arg thisarg = va_arg (ap, int);
15428 if (thisarg == NEON_ARG_STOP)
15429 break;
15430 else
15432 arg[argc] = CALL_EXPR_ARG (exp, argc);
15433 op[argc] = expand_normal (arg[argc]);
15434 mode[argc] = insn_data[icode].operand[argc + have_retval].mode;
15436 switch (thisarg)
15438 case NEON_ARG_COPY_TO_REG:
15439 /*gcc_assert (GET_MODE (op[argc]) == mode[argc]);*/
15440 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15441 (op[argc], mode[argc]))
15442 op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
15443 break;
15445 case NEON_ARG_CONSTANT:
15446 /* FIXME: This error message is somewhat unhelpful. */
15447 if (!(*insn_data[icode].operand[argc + have_retval].predicate)
15448 (op[argc], mode[argc]))
15449 error ("argument must be a constant");
15450 break;
15452 case NEON_ARG_STOP:
15453 gcc_unreachable ();
15456 argc++;
15460 va_end (ap);
15462 if (have_retval)
15463 switch (argc)
15465 case 1:
15466 pat = GEN_FCN (icode) (target, op[0]);
15467 break;
15469 case 2:
15470 pat = GEN_FCN (icode) (target, op[0], op[1]);
15471 break;
15473 case 3:
15474 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
15475 break;
15477 case 4:
15478 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
15479 break;
15481 case 5:
15482 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
15483 break;
15485 default:
15486 gcc_unreachable ();
15488 else
15489 switch (argc)
15491 case 1:
15492 pat = GEN_FCN (icode) (op[0]);
15493 break;
15495 case 2:
15496 pat = GEN_FCN (icode) (op[0], op[1]);
15497 break;
15499 case 3:
15500 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
15501 break;
15503 case 4:
15504 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
15505 break;
15507 case 5:
15508 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
15509 break;
15511 default:
15512 gcc_unreachable ();
15515 if (!pat)
15516 return 0;
15518 emit_insn (pat);
15520 return target;
15523 /* Expand a Neon builtin. These are "special" because they don't have symbolic
15524 constants defined per-instruction or per instruction-variant. Instead, the
15525 required info is looked up in the table neon_builtin_data. */
15526 static rtx
15527 arm_expand_neon_builtin (int fcode, tree exp, rtx target)
15529 neon_itype itype;
15530 enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
15532 switch (itype)
15534 case NEON_UNOP:
15535 case NEON_CONVERT:
15536 case NEON_DUPLANE:
15537 return arm_expand_neon_args (target, icode, 1, exp,
15538 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15540 case NEON_BINOP:
15541 case NEON_SETLANE:
15542 case NEON_SCALARMUL:
15543 case NEON_SCALARMULL:
15544 case NEON_SCALARMULH:
15545 case NEON_SHIFTINSERT:
15546 case NEON_LOGICBINOP:
15547 return arm_expand_neon_args (target, icode, 1, exp,
15548 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15549 NEON_ARG_STOP);
15551 case NEON_TERNOP:
15552 return arm_expand_neon_args (target, icode, 1, exp,
15553 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15554 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15556 case NEON_GETLANE:
15557 case NEON_FIXCONV:
15558 case NEON_SHIFTIMM:
15559 return arm_expand_neon_args (target, icode, 1, exp,
15560 NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT, NEON_ARG_CONSTANT,
15561 NEON_ARG_STOP);
15563 case NEON_CREATE:
15564 return arm_expand_neon_args (target, icode, 1, exp,
15565 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15567 case NEON_DUP:
15568 case NEON_SPLIT:
15569 case NEON_REINTERP:
15570 return arm_expand_neon_args (target, icode, 1, exp,
15571 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15573 case NEON_COMBINE:
15574 case NEON_VTBL:
15575 return arm_expand_neon_args (target, icode, 1, exp,
15576 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15578 case NEON_RESULTPAIR:
15579 return arm_expand_neon_args (target, icode, 0, exp,
15580 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15581 NEON_ARG_STOP);
15583 case NEON_LANEMUL:
15584 case NEON_LANEMULL:
15585 case NEON_LANEMULH:
15586 return arm_expand_neon_args (target, icode, 1, exp,
15587 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15588 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15590 case NEON_LANEMAC:
15591 return arm_expand_neon_args (target, icode, 1, exp,
15592 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15593 NEON_ARG_CONSTANT, NEON_ARG_CONSTANT, NEON_ARG_STOP);
15595 case NEON_SHIFTACC:
15596 return arm_expand_neon_args (target, icode, 1, exp,
15597 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15598 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15600 case NEON_SCALARMAC:
15601 return arm_expand_neon_args (target, icode, 1, exp,
15602 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15603 NEON_ARG_CONSTANT, NEON_ARG_STOP);
15605 case NEON_SELECT:
15606 case NEON_VTBX:
15607 return arm_expand_neon_args (target, icode, 1, exp,
15608 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG,
15609 NEON_ARG_STOP);
15611 case NEON_LOAD1:
15612 case NEON_LOADSTRUCT:
15613 return arm_expand_neon_args (target, icode, 1, exp,
15614 NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15616 case NEON_LOAD1LANE:
15617 case NEON_LOADSTRUCTLANE:
15618 return arm_expand_neon_args (target, icode, 1, exp,
15619 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15620 NEON_ARG_STOP);
15622 case NEON_STORE1:
15623 case NEON_STORESTRUCT:
15624 return arm_expand_neon_args (target, icode, 0, exp,
15625 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
15627 case NEON_STORE1LANE:
15628 case NEON_STORESTRUCTLANE:
15629 return arm_expand_neon_args (target, icode, 0, exp,
15630 NEON_ARG_COPY_TO_REG, NEON_ARG_COPY_TO_REG, NEON_ARG_CONSTANT,
15631 NEON_ARG_STOP);
15634 gcc_unreachable ();
15637 /* Emit code to reinterpret one Neon type as another, without altering bits. */
15638 void
15639 neon_reinterpret (rtx dest, rtx src)
15641 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
15644 /* Emit code to place a Neon pair result in memory locations (with equal
15645 registers). */
15646 void
15647 neon_emit_pair_result_insn (enum machine_mode mode,
15648 rtx (*intfn) (rtx, rtx, rtx, rtx), rtx destaddr,
15649 rtx op1, rtx op2)
15651 rtx mem = gen_rtx_MEM (mode, destaddr);
15652 rtx tmp1 = gen_reg_rtx (mode);
15653 rtx tmp2 = gen_reg_rtx (mode);
15655 emit_insn (intfn (tmp1, op1, tmp2, op2));
15657 emit_move_insn (mem, tmp1);
15658 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
15659 emit_move_insn (mem, tmp2);
15662 /* Set up operands for a register copy from src to dest, taking care not to
15663 clobber registers in the process.
15664 FIXME: This has rather high polynomial complexity (O(n^3)?) but shouldn't
15665 be called with a large N, so that should be OK. */
15667 void
15668 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
15670 unsigned int copied = 0, opctr = 0;
15671 unsigned int done = (1 << count) - 1;
15672 unsigned int i, j;
15674 while (copied != done)
15676 for (i = 0; i < count; i++)
15678 int good = 1;
15680 for (j = 0; good && j < count; j++)
15681 if (i != j && (copied & (1 << j)) == 0
15682 && reg_overlap_mentioned_p (src[j], dest[i]))
15683 good = 0;
15685 if (good)
15687 operands[opctr++] = dest[i];
15688 operands[opctr++] = src[i];
15689 copied |= 1 << i;
15694 gcc_assert (opctr == count * 2);
15697 /* Expand an expression EXP that calls a built-in function,
15698 with result going to TARGET if that's convenient
15699 (and in mode MODE if that's convenient).
15700 SUBTARGET may be used as the target for computing one of EXP's operands.
15701 IGNORE is nonzero if the value is to be ignored. */
15703 static rtx
15704 arm_expand_builtin (tree exp,
15705 rtx target,
15706 rtx subtarget ATTRIBUTE_UNUSED,
15707 enum machine_mode mode ATTRIBUTE_UNUSED,
15708 int ignore ATTRIBUTE_UNUSED)
15710 const struct builtin_description * d;
15711 enum insn_code icode;
15712 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
15713 tree arg0;
15714 tree arg1;
15715 tree arg2;
15716 rtx op0;
15717 rtx op1;
15718 rtx op2;
15719 rtx pat;
15720 int fcode = DECL_FUNCTION_CODE (fndecl);
15721 size_t i;
15722 enum machine_mode tmode;
15723 enum machine_mode mode0;
15724 enum machine_mode mode1;
15725 enum machine_mode mode2;
15727 if (fcode >= ARM_BUILTIN_NEON_BASE)
15728 return arm_expand_neon_builtin (fcode, exp, target);
15730 switch (fcode)
15732 case ARM_BUILTIN_TEXTRMSB:
15733 case ARM_BUILTIN_TEXTRMUB:
15734 case ARM_BUILTIN_TEXTRMSH:
15735 case ARM_BUILTIN_TEXTRMUH:
15736 case ARM_BUILTIN_TEXTRMSW:
15737 case ARM_BUILTIN_TEXTRMUW:
15738 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
15739 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
15740 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
15741 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
15742 : CODE_FOR_iwmmxt_textrmw);
15744 arg0 = CALL_EXPR_ARG (exp, 0);
15745 arg1 = CALL_EXPR_ARG (exp, 1);
15746 op0 = expand_normal (arg0);
15747 op1 = expand_normal (arg1);
15748 tmode = insn_data[icode].operand[0].mode;
15749 mode0 = insn_data[icode].operand[1].mode;
15750 mode1 = insn_data[icode].operand[2].mode;
15752 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15753 op0 = copy_to_mode_reg (mode0, op0);
15754 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15756 /* @@@ better error message */
15757 error ("selector must be an immediate");
15758 return gen_reg_rtx (tmode);
15760 if (target == 0
15761 || GET_MODE (target) != tmode
15762 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15763 target = gen_reg_rtx (tmode);
15764 pat = GEN_FCN (icode) (target, op0, op1);
15765 if (! pat)
15766 return 0;
15767 emit_insn (pat);
15768 return target;
15770 case ARM_BUILTIN_TINSRB:
15771 case ARM_BUILTIN_TINSRH:
15772 case ARM_BUILTIN_TINSRW:
15773 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
15774 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
15775 : CODE_FOR_iwmmxt_tinsrw);
15776 arg0 = CALL_EXPR_ARG (exp, 0);
15777 arg1 = CALL_EXPR_ARG (exp, 1);
15778 arg2 = CALL_EXPR_ARG (exp, 2);
15779 op0 = expand_normal (arg0);
15780 op1 = expand_normal (arg1);
15781 op2 = expand_normal (arg2);
15782 tmode = insn_data[icode].operand[0].mode;
15783 mode0 = insn_data[icode].operand[1].mode;
15784 mode1 = insn_data[icode].operand[2].mode;
15785 mode2 = insn_data[icode].operand[3].mode;
15787 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15788 op0 = copy_to_mode_reg (mode0, op0);
15789 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15790 op1 = copy_to_mode_reg (mode1, op1);
15791 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15793 /* @@@ better error message */
15794 error ("selector must be an immediate");
15795 return const0_rtx;
15797 if (target == 0
15798 || GET_MODE (target) != tmode
15799 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15800 target = gen_reg_rtx (tmode);
15801 pat = GEN_FCN (icode) (target, op0, op1, op2);
15802 if (! pat)
15803 return 0;
15804 emit_insn (pat);
15805 return target;
15807 case ARM_BUILTIN_SETWCX:
15808 arg0 = CALL_EXPR_ARG (exp, 0);
15809 arg1 = CALL_EXPR_ARG (exp, 1);
15810 op0 = force_reg (SImode, expand_normal (arg0));
15811 op1 = expand_normal (arg1);
15812 emit_insn (gen_iwmmxt_tmcr (op1, op0));
15813 return 0;
15815 case ARM_BUILTIN_GETWCX:
15816 arg0 = CALL_EXPR_ARG (exp, 0);
15817 op0 = expand_normal (arg0);
15818 target = gen_reg_rtx (SImode);
15819 emit_insn (gen_iwmmxt_tmrc (target, op0));
15820 return target;
15822 case ARM_BUILTIN_WSHUFH:
15823 icode = CODE_FOR_iwmmxt_wshufh;
15824 arg0 = CALL_EXPR_ARG (exp, 0);
15825 arg1 = CALL_EXPR_ARG (exp, 1);
15826 op0 = expand_normal (arg0);
15827 op1 = expand_normal (arg1);
15828 tmode = insn_data[icode].operand[0].mode;
15829 mode1 = insn_data[icode].operand[1].mode;
15830 mode2 = insn_data[icode].operand[2].mode;
15832 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15833 op0 = copy_to_mode_reg (mode1, op0);
15834 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15836 /* @@@ better error message */
15837 error ("mask must be an immediate");
15838 return const0_rtx;
15840 if (target == 0
15841 || GET_MODE (target) != tmode
15842 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15843 target = gen_reg_rtx (tmode);
15844 pat = GEN_FCN (icode) (target, op0, op1);
15845 if (! pat)
15846 return 0;
15847 emit_insn (pat);
15848 return target;
15850 case ARM_BUILTIN_WSADB:
15851 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, exp, target);
15852 case ARM_BUILTIN_WSADH:
15853 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, exp, target);
15854 case ARM_BUILTIN_WSADBZ:
15855 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
15856 case ARM_BUILTIN_WSADHZ:
15857 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
15859 /* Several three-argument builtins. */
15860 case ARM_BUILTIN_WMACS:
15861 case ARM_BUILTIN_WMACU:
15862 case ARM_BUILTIN_WALIGN:
15863 case ARM_BUILTIN_TMIA:
15864 case ARM_BUILTIN_TMIAPH:
15865 case ARM_BUILTIN_TMIATT:
15866 case ARM_BUILTIN_TMIATB:
15867 case ARM_BUILTIN_TMIABT:
15868 case ARM_BUILTIN_TMIABB:
15869 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
15870 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
15871 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
15872 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
15873 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
15874 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
15875 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
15876 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
15877 : CODE_FOR_iwmmxt_walign);
15878 arg0 = CALL_EXPR_ARG (exp, 0);
15879 arg1 = CALL_EXPR_ARG (exp, 1);
15880 arg2 = CALL_EXPR_ARG (exp, 2);
15881 op0 = expand_normal (arg0);
15882 op1 = expand_normal (arg1);
15883 op2 = expand_normal (arg2);
15884 tmode = insn_data[icode].operand[0].mode;
15885 mode0 = insn_data[icode].operand[1].mode;
15886 mode1 = insn_data[icode].operand[2].mode;
15887 mode2 = insn_data[icode].operand[3].mode;
15889 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15890 op0 = copy_to_mode_reg (mode0, op0);
15891 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
15892 op1 = copy_to_mode_reg (mode1, op1);
15893 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15894 op2 = copy_to_mode_reg (mode2, op2);
15895 if (target == 0
15896 || GET_MODE (target) != tmode
15897 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15898 target = gen_reg_rtx (tmode);
15899 pat = GEN_FCN (icode) (target, op0, op1, op2);
15900 if (! pat)
15901 return 0;
15902 emit_insn (pat);
15903 return target;
15905 case ARM_BUILTIN_WZERO:
15906 target = gen_reg_rtx (DImode);
15907 emit_insn (gen_iwmmxt_clrdi (target));
15908 return target;
15910 case ARM_BUILTIN_THREAD_POINTER:
15911 return arm_load_tp (target);
15913 default:
15914 break;
15917 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15918 if (d->code == (const enum arm_builtins) fcode)
15919 return arm_expand_binop_builtin (d->icode, exp, target);
15921 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15922 if (d->code == (const enum arm_builtins) fcode)
15923 return arm_expand_unop_builtin (d->icode, exp, target, 0);
15925 /* @@@ Should really do something sensible here. */
15926 return NULL_RTX;
15929 /* Return the number (counting from 0) of
15930 the least significant set bit in MASK. */
15932 inline static int
15933 number_of_first_bit_set (unsigned mask)
15935 int bit;
15937 for (bit = 0;
15938 (mask & (1 << bit)) == 0;
15939 ++bit)
15940 continue;
15942 return bit;
15945 /* Emit code to push or pop registers to or from the stack. F is the
15946 assembly file. MASK is the registers to push or pop. PUSH is
15947 nonzero if we should push, and zero if we should pop. For debugging
15948 output, if pushing, adjust CFA_OFFSET by the amount of space added
15949 to the stack. REAL_REGS should have the same number of bits set as
15950 MASK, and will be used instead (in the same order) to describe which
15951 registers were saved - this is used to mark the save slots when we
15952 push high registers after moving them to low registers. */
15953 static void
15954 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
15955 unsigned long real_regs)
15957 int regno;
15958 int lo_mask = mask & 0xFF;
15959 int pushed_words = 0;
15961 gcc_assert (mask);
15963 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
15965 /* Special case. Do not generate a POP PC statement here, do it in
15966 thumb_exit() */
15967 thumb_exit (f, -1);
15968 return;
15971 if (ARM_EABI_UNWIND_TABLES && push)
15973 fprintf (f, "\t.save\t{");
15974 for (regno = 0; regno < 15; regno++)
15976 if (real_regs & (1 << regno))
15978 if (real_regs & ((1 << regno) -1))
15979 fprintf (f, ", ");
15980 asm_fprintf (f, "%r", regno);
15983 fprintf (f, "}\n");
15986 fprintf (f, "\t%s\t{", push ? "push" : "pop");
15988 /* Look at the low registers first. */
15989 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
15991 if (lo_mask & 1)
15993 asm_fprintf (f, "%r", regno);
15995 if ((lo_mask & ~1) != 0)
15996 fprintf (f, ", ");
15998 pushed_words++;
16002 if (push && (mask & (1 << LR_REGNUM)))
16004 /* Catch pushing the LR. */
16005 if (mask & 0xFF)
16006 fprintf (f, ", ");
16008 asm_fprintf (f, "%r", LR_REGNUM);
16010 pushed_words++;
16012 else if (!push && (mask & (1 << PC_REGNUM)))
16014 /* Catch popping the PC. */
16015 if (TARGET_INTERWORK || TARGET_BACKTRACE
16016 || current_function_calls_eh_return)
16018 /* The PC is never poped directly, instead
16019 it is popped into r3 and then BX is used. */
16020 fprintf (f, "}\n");
16022 thumb_exit (f, -1);
16024 return;
16026 else
16028 if (mask & 0xFF)
16029 fprintf (f, ", ");
16031 asm_fprintf (f, "%r", PC_REGNUM);
16035 fprintf (f, "}\n");
16037 if (push && pushed_words && dwarf2out_do_frame ())
16039 char *l = dwarf2out_cfi_label ();
16040 int pushed_mask = real_regs;
16042 *cfa_offset += pushed_words * 4;
16043 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
16045 pushed_words = 0;
16046 pushed_mask = real_regs;
16047 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
16049 if (pushed_mask & 1)
16050 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
16055 /* Generate code to return from a thumb function.
16056 If 'reg_containing_return_addr' is -1, then the return address is
16057 actually on the stack, at the stack pointer. */
16058 static void
16059 thumb_exit (FILE *f, int reg_containing_return_addr)
16061 unsigned regs_available_for_popping;
16062 unsigned regs_to_pop;
16063 int pops_needed;
16064 unsigned available;
16065 unsigned required;
16066 int mode;
16067 int size;
16068 int restore_a4 = FALSE;
16070 /* Compute the registers we need to pop. */
16071 regs_to_pop = 0;
16072 pops_needed = 0;
16074 if (reg_containing_return_addr == -1)
16076 regs_to_pop |= 1 << LR_REGNUM;
16077 ++pops_needed;
16080 if (TARGET_BACKTRACE)
16082 /* Restore the (ARM) frame pointer and stack pointer. */
16083 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
16084 pops_needed += 2;
16087 /* If there is nothing to pop then just emit the BX instruction and
16088 return. */
16089 if (pops_needed == 0)
16091 if (current_function_calls_eh_return)
16092 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16094 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16095 return;
16097 /* Otherwise if we are not supporting interworking and we have not created
16098 a backtrace structure and the function was not entered in ARM mode then
16099 just pop the return address straight into the PC. */
16100 else if (!TARGET_INTERWORK
16101 && !TARGET_BACKTRACE
16102 && !is_called_in_ARM_mode (current_function_decl)
16103 && !current_function_calls_eh_return)
16105 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
16106 return;
16109 /* Find out how many of the (return) argument registers we can corrupt. */
16110 regs_available_for_popping = 0;
16112 /* If returning via __builtin_eh_return, the bottom three registers
16113 all contain information needed for the return. */
16114 if (current_function_calls_eh_return)
16115 size = 12;
16116 else
16118 /* If we can deduce the registers used from the function's
16119 return value. This is more reliable that examining
16120 df_regs_ever_live_p () because that will be set if the register is
16121 ever used in the function, not just if the register is used
16122 to hold a return value. */
16124 if (current_function_return_rtx != 0)
16125 mode = GET_MODE (current_function_return_rtx);
16126 else
16127 mode = DECL_MODE (DECL_RESULT (current_function_decl));
16129 size = GET_MODE_SIZE (mode);
16131 if (size == 0)
16133 /* In a void function we can use any argument register.
16134 In a function that returns a structure on the stack
16135 we can use the second and third argument registers. */
16136 if (mode == VOIDmode)
16137 regs_available_for_popping =
16138 (1 << ARG_REGISTER (1))
16139 | (1 << ARG_REGISTER (2))
16140 | (1 << ARG_REGISTER (3));
16141 else
16142 regs_available_for_popping =
16143 (1 << ARG_REGISTER (2))
16144 | (1 << ARG_REGISTER (3));
16146 else if (size <= 4)
16147 regs_available_for_popping =
16148 (1 << ARG_REGISTER (2))
16149 | (1 << ARG_REGISTER (3));
16150 else if (size <= 8)
16151 regs_available_for_popping =
16152 (1 << ARG_REGISTER (3));
16155 /* Match registers to be popped with registers into which we pop them. */
16156 for (available = regs_available_for_popping,
16157 required = regs_to_pop;
16158 required != 0 && available != 0;
16159 available &= ~(available & - available),
16160 required &= ~(required & - required))
16161 -- pops_needed;
16163 /* If we have any popping registers left over, remove them. */
16164 if (available > 0)
16165 regs_available_for_popping &= ~available;
16167 /* Otherwise if we need another popping register we can use
16168 the fourth argument register. */
16169 else if (pops_needed)
16171 /* If we have not found any free argument registers and
16172 reg a4 contains the return address, we must move it. */
16173 if (regs_available_for_popping == 0
16174 && reg_containing_return_addr == LAST_ARG_REGNUM)
16176 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16177 reg_containing_return_addr = LR_REGNUM;
16179 else if (size > 12)
16181 /* Register a4 is being used to hold part of the return value,
16182 but we have dire need of a free, low register. */
16183 restore_a4 = TRUE;
16185 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
16188 if (reg_containing_return_addr != LAST_ARG_REGNUM)
16190 /* The fourth argument register is available. */
16191 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
16193 --pops_needed;
16197 /* Pop as many registers as we can. */
16198 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16199 regs_available_for_popping);
16201 /* Process the registers we popped. */
16202 if (reg_containing_return_addr == -1)
16204 /* The return address was popped into the lowest numbered register. */
16205 regs_to_pop &= ~(1 << LR_REGNUM);
16207 reg_containing_return_addr =
16208 number_of_first_bit_set (regs_available_for_popping);
16210 /* Remove this register for the mask of available registers, so that
16211 the return address will not be corrupted by further pops. */
16212 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
16215 /* If we popped other registers then handle them here. */
16216 if (regs_available_for_popping)
16218 int frame_pointer;
16220 /* Work out which register currently contains the frame pointer. */
16221 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
16223 /* Move it into the correct place. */
16224 asm_fprintf (f, "\tmov\t%r, %r\n",
16225 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
16227 /* (Temporarily) remove it from the mask of popped registers. */
16228 regs_available_for_popping &= ~(1 << frame_pointer);
16229 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
16231 if (regs_available_for_popping)
16233 int stack_pointer;
16235 /* We popped the stack pointer as well,
16236 find the register that contains it. */
16237 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
16239 /* Move it into the stack register. */
16240 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
16242 /* At this point we have popped all necessary registers, so
16243 do not worry about restoring regs_available_for_popping
16244 to its correct value:
16246 assert (pops_needed == 0)
16247 assert (regs_available_for_popping == (1 << frame_pointer))
16248 assert (regs_to_pop == (1 << STACK_POINTER)) */
16250 else
16252 /* Since we have just move the popped value into the frame
16253 pointer, the popping register is available for reuse, and
16254 we know that we still have the stack pointer left to pop. */
16255 regs_available_for_popping |= (1 << frame_pointer);
16259 /* If we still have registers left on the stack, but we no longer have
16260 any registers into which we can pop them, then we must move the return
16261 address into the link register and make available the register that
16262 contained it. */
16263 if (regs_available_for_popping == 0 && pops_needed > 0)
16265 regs_available_for_popping |= 1 << reg_containing_return_addr;
16267 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
16268 reg_containing_return_addr);
16270 reg_containing_return_addr = LR_REGNUM;
16273 /* If we have registers left on the stack then pop some more.
16274 We know that at most we will want to pop FP and SP. */
16275 if (pops_needed > 0)
16277 int popped_into;
16278 int move_to;
16280 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16281 regs_available_for_popping);
16283 /* We have popped either FP or SP.
16284 Move whichever one it is into the correct register. */
16285 popped_into = number_of_first_bit_set (regs_available_for_popping);
16286 move_to = number_of_first_bit_set (regs_to_pop);
16288 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
16290 regs_to_pop &= ~(1 << move_to);
16292 --pops_needed;
16295 /* If we still have not popped everything then we must have only
16296 had one register available to us and we are now popping the SP. */
16297 if (pops_needed > 0)
16299 int popped_into;
16301 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
16302 regs_available_for_popping);
16304 popped_into = number_of_first_bit_set (regs_available_for_popping);
16306 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
16308 assert (regs_to_pop == (1 << STACK_POINTER))
16309 assert (pops_needed == 1)
16313 /* If necessary restore the a4 register. */
16314 if (restore_a4)
16316 if (reg_containing_return_addr != LR_REGNUM)
16318 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
16319 reg_containing_return_addr = LR_REGNUM;
16322 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
16325 if (current_function_calls_eh_return)
16326 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
16328 /* Return to caller. */
16329 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
16333 void
16334 thumb1_final_prescan_insn (rtx insn)
16336 if (flag_print_asm_name)
16337 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
16338 INSN_ADDRESSES (INSN_UID (insn)));
16342 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
16344 unsigned HOST_WIDE_INT mask = 0xff;
16345 int i;
16347 if (val == 0) /* XXX */
16348 return 0;
16350 for (i = 0; i < 25; i++)
16351 if ((val & (mask << i)) == val)
16352 return 1;
16354 return 0;
16357 /* Returns nonzero if the current function contains,
16358 or might contain a far jump. */
16359 static int
16360 thumb_far_jump_used_p (void)
16362 rtx insn;
16364 /* This test is only important for leaf functions. */
16365 /* assert (!leaf_function_p ()); */
16367 /* If we have already decided that far jumps may be used,
16368 do not bother checking again, and always return true even if
16369 it turns out that they are not being used. Once we have made
16370 the decision that far jumps are present (and that hence the link
16371 register will be pushed onto the stack) we cannot go back on it. */
16372 if (cfun->machine->far_jump_used)
16373 return 1;
16375 /* If this function is not being called from the prologue/epilogue
16376 generation code then it must be being called from the
16377 INITIAL_ELIMINATION_OFFSET macro. */
16378 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
16380 /* In this case we know that we are being asked about the elimination
16381 of the arg pointer register. If that register is not being used,
16382 then there are no arguments on the stack, and we do not have to
16383 worry that a far jump might force the prologue to push the link
16384 register, changing the stack offsets. In this case we can just
16385 return false, since the presence of far jumps in the function will
16386 not affect stack offsets.
16388 If the arg pointer is live (or if it was live, but has now been
16389 eliminated and so set to dead) then we do have to test to see if
16390 the function might contain a far jump. This test can lead to some
16391 false negatives, since before reload is completed, then length of
16392 branch instructions is not known, so gcc defaults to returning their
16393 longest length, which in turn sets the far jump attribute to true.
16395 A false negative will not result in bad code being generated, but it
16396 will result in a needless push and pop of the link register. We
16397 hope that this does not occur too often.
16399 If we need doubleword stack alignment this could affect the other
16400 elimination offsets so we can't risk getting it wrong. */
16401 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
16402 cfun->machine->arg_pointer_live = 1;
16403 else if (!cfun->machine->arg_pointer_live)
16404 return 0;
16407 /* Check to see if the function contains a branch
16408 insn with the far jump attribute set. */
16409 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
16411 if (GET_CODE (insn) == JUMP_INSN
16412 /* Ignore tablejump patterns. */
16413 && GET_CODE (PATTERN (insn)) != ADDR_VEC
16414 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
16415 && get_attr_far_jump (insn) == FAR_JUMP_YES
16418 /* Record the fact that we have decided that
16419 the function does use far jumps. */
16420 cfun->machine->far_jump_used = 1;
16421 return 1;
16425 return 0;
16428 /* Return nonzero if FUNC must be entered in ARM mode. */
16430 is_called_in_ARM_mode (tree func)
16432 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
16434 /* Ignore the problem about functions whose address is taken. */
16435 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
16436 return TRUE;
16438 #ifdef ARM_PE
16439 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
16440 #else
16441 return FALSE;
16442 #endif
16445 /* The bits which aren't usefully expanded as rtl. */
16446 const char *
16447 thumb_unexpanded_epilogue (void)
16449 int regno;
16450 unsigned long live_regs_mask = 0;
16451 int high_regs_pushed = 0;
16452 int had_to_push_lr;
16453 int size;
16455 if (return_used_this_function)
16456 return "";
16458 if (IS_NAKED (arm_current_func_type ()))
16459 return "";
16461 live_regs_mask = thumb1_compute_save_reg_mask ();
16462 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16464 /* If we can deduce the registers used from the function's return value.
16465 This is more reliable that examining df_regs_ever_live_p () because that
16466 will be set if the register is ever used in the function, not just if
16467 the register is used to hold a return value. */
16468 size = arm_size_return_regs ();
16470 /* The prolog may have pushed some high registers to use as
16471 work registers. e.g. the testsuite file:
16472 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
16473 compiles to produce:
16474 push {r4, r5, r6, r7, lr}
16475 mov r7, r9
16476 mov r6, r8
16477 push {r6, r7}
16478 as part of the prolog. We have to undo that pushing here. */
16480 if (high_regs_pushed)
16482 unsigned long mask = live_regs_mask & 0xff;
16483 int next_hi_reg;
16485 /* The available low registers depend on the size of the value we are
16486 returning. */
16487 if (size <= 12)
16488 mask |= 1 << 3;
16489 if (size <= 8)
16490 mask |= 1 << 2;
16492 if (mask == 0)
16493 /* Oh dear! We have no low registers into which we can pop
16494 high registers! */
16495 internal_error
16496 ("no low registers available for popping high registers");
16498 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
16499 if (live_regs_mask & (1 << next_hi_reg))
16500 break;
16502 while (high_regs_pushed)
16504 /* Find lo register(s) into which the high register(s) can
16505 be popped. */
16506 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16508 if (mask & (1 << regno))
16509 high_regs_pushed--;
16510 if (high_regs_pushed == 0)
16511 break;
16514 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
16516 /* Pop the values into the low register(s). */
16517 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
16519 /* Move the value(s) into the high registers. */
16520 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
16522 if (mask & (1 << regno))
16524 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
16525 regno);
16527 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
16528 if (live_regs_mask & (1 << next_hi_reg))
16529 break;
16533 live_regs_mask &= ~0x0f00;
16536 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
16537 live_regs_mask &= 0xff;
16539 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
16541 /* Pop the return address into the PC. */
16542 if (had_to_push_lr)
16543 live_regs_mask |= 1 << PC_REGNUM;
16545 /* Either no argument registers were pushed or a backtrace
16546 structure was created which includes an adjusted stack
16547 pointer, so just pop everything. */
16548 if (live_regs_mask)
16549 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16550 live_regs_mask);
16552 /* We have either just popped the return address into the
16553 PC or it is was kept in LR for the entire function. */
16554 if (!had_to_push_lr)
16555 thumb_exit (asm_out_file, LR_REGNUM);
16557 else
16559 /* Pop everything but the return address. */
16560 if (live_regs_mask)
16561 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
16562 live_regs_mask);
16564 if (had_to_push_lr)
16566 if (size > 12)
16568 /* We have no free low regs, so save one. */
16569 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
16570 LAST_ARG_REGNUM);
16573 /* Get the return address into a temporary register. */
16574 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
16575 1 << LAST_ARG_REGNUM);
16577 if (size > 12)
16579 /* Move the return address to lr. */
16580 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
16581 LAST_ARG_REGNUM);
16582 /* Restore the low register. */
16583 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
16584 IP_REGNUM);
16585 regno = LR_REGNUM;
16587 else
16588 regno = LAST_ARG_REGNUM;
16590 else
16591 regno = LR_REGNUM;
16593 /* Remove the argument registers that were pushed onto the stack. */
16594 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
16595 SP_REGNUM, SP_REGNUM,
16596 current_function_pretend_args_size);
16598 thumb_exit (asm_out_file, regno);
16601 return "";
16604 /* Functions to save and restore machine-specific function data. */
16605 static struct machine_function *
16606 arm_init_machine_status (void)
16608 struct machine_function *machine;
16609 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
16611 #if ARM_FT_UNKNOWN != 0
16612 machine->func_type = ARM_FT_UNKNOWN;
16613 #endif
16614 return machine;
16617 /* Return an RTX indicating where the return address to the
16618 calling function can be found. */
16620 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
16622 if (count != 0)
16623 return NULL_RTX;
16625 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
16628 /* Do anything needed before RTL is emitted for each function. */
16629 void
16630 arm_init_expanders (void)
16632 /* Arrange to initialize and mark the machine per-function status. */
16633 init_machine_status = arm_init_machine_status;
16635 /* This is to stop the combine pass optimizing away the alignment
16636 adjustment of va_arg. */
16637 /* ??? It is claimed that this should not be necessary. */
16638 if (cfun)
16639 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
16643 /* Like arm_compute_initial_elimination offset. Simpler because there
16644 isn't an ABI specified frame pointer for Thumb. Instead, we set it
16645 to point at the base of the local variables after static stack
16646 space for a function has been allocated. */
16648 HOST_WIDE_INT
16649 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
16651 arm_stack_offsets *offsets;
16653 offsets = arm_get_frame_offsets ();
16655 switch (from)
16657 case ARG_POINTER_REGNUM:
16658 switch (to)
16660 case STACK_POINTER_REGNUM:
16661 return offsets->outgoing_args - offsets->saved_args;
16663 case FRAME_POINTER_REGNUM:
16664 return offsets->soft_frame - offsets->saved_args;
16666 case ARM_HARD_FRAME_POINTER_REGNUM:
16667 return offsets->saved_regs - offsets->saved_args;
16669 case THUMB_HARD_FRAME_POINTER_REGNUM:
16670 return offsets->locals_base - offsets->saved_args;
16672 default:
16673 gcc_unreachable ();
16675 break;
16677 case FRAME_POINTER_REGNUM:
16678 switch (to)
16680 case STACK_POINTER_REGNUM:
16681 return offsets->outgoing_args - offsets->soft_frame;
16683 case ARM_HARD_FRAME_POINTER_REGNUM:
16684 return offsets->saved_regs - offsets->soft_frame;
16686 case THUMB_HARD_FRAME_POINTER_REGNUM:
16687 return offsets->locals_base - offsets->soft_frame;
16689 default:
16690 gcc_unreachable ();
16692 break;
16694 default:
16695 gcc_unreachable ();
16699 /* Generate the rest of a function's prologue. */
16700 void
16701 thumb1_expand_prologue (void)
16703 rtx insn, dwarf;
16705 HOST_WIDE_INT amount;
16706 arm_stack_offsets *offsets;
16707 unsigned long func_type;
16708 int regno;
16709 unsigned long live_regs_mask;
16711 func_type = arm_current_func_type ();
16713 /* Naked functions don't have prologues. */
16714 if (IS_NAKED (func_type))
16715 return;
16717 if (IS_INTERRUPT (func_type))
16719 error ("interrupt Service Routines cannot be coded in Thumb mode");
16720 return;
16723 live_regs_mask = thumb1_compute_save_reg_mask ();
16724 /* Load the pic register before setting the frame pointer,
16725 so we can use r7 as a temporary work register. */
16726 if (flag_pic && arm_pic_register != INVALID_REGNUM)
16727 arm_load_pic_register (live_regs_mask);
16729 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
16730 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
16731 stack_pointer_rtx);
16733 offsets = arm_get_frame_offsets ();
16734 amount = offsets->outgoing_args - offsets->saved_regs;
16735 if (amount)
16737 if (amount < 512)
16739 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16740 GEN_INT (- amount)));
16741 RTX_FRAME_RELATED_P (insn) = 1;
16743 else
16745 rtx reg;
16747 /* The stack decrement is too big for an immediate value in a single
16748 insn. In theory we could issue multiple subtracts, but after
16749 three of them it becomes more space efficient to place the full
16750 value in the constant pool and load into a register. (Also the
16751 ARM debugger really likes to see only one stack decrement per
16752 function). So instead we look for a scratch register into which
16753 we can load the decrement, and then we subtract this from the
16754 stack pointer. Unfortunately on the thumb the only available
16755 scratch registers are the argument registers, and we cannot use
16756 these as they may hold arguments to the function. Instead we
16757 attempt to locate a call preserved register which is used by this
16758 function. If we can find one, then we know that it will have
16759 been pushed at the start of the prologue and so we can corrupt
16760 it now. */
16761 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
16762 if (live_regs_mask & (1 << regno)
16763 && !(frame_pointer_needed
16764 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
16765 break;
16767 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
16769 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
16771 /* Choose an arbitrary, non-argument low register. */
16772 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
16774 /* Save it by copying it into a high, scratch register. */
16775 emit_insn (gen_movsi (spare, reg));
16776 /* Add a USE to stop propagate_one_insn() from barfing. */
16777 emit_insn (gen_prologue_use (spare));
16779 /* Decrement the stack. */
16780 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16781 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16782 stack_pointer_rtx, reg));
16783 RTX_FRAME_RELATED_P (insn) = 1;
16784 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16785 plus_constant (stack_pointer_rtx,
16786 -amount));
16787 RTX_FRAME_RELATED_P (dwarf) = 1;
16788 REG_NOTES (insn)
16789 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16790 REG_NOTES (insn));
16792 /* Restore the low register's original value. */
16793 emit_insn (gen_movsi (reg, spare));
16795 /* Emit a USE of the restored scratch register, so that flow
16796 analysis will not consider the restore redundant. The
16797 register won't be used again in this function and isn't
16798 restored by the epilogue. */
16799 emit_insn (gen_prologue_use (reg));
16801 else
16803 reg = gen_rtx_REG (SImode, regno);
16805 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
16807 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
16808 stack_pointer_rtx, reg));
16809 RTX_FRAME_RELATED_P (insn) = 1;
16810 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
16811 plus_constant (stack_pointer_rtx,
16812 -amount));
16813 RTX_FRAME_RELATED_P (dwarf) = 1;
16814 REG_NOTES (insn)
16815 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
16816 REG_NOTES (insn));
16821 if (frame_pointer_needed)
16822 thumb_set_frame_pointer (offsets);
16824 /* If we are profiling, make sure no instructions are scheduled before
16825 the call to mcount. Similarly if the user has requested no
16826 scheduling in the prolog. Similarly if we want non-call exceptions
16827 using the EABI unwinder, to prevent faulting instructions from being
16828 swapped with a stack adjustment. */
16829 if (current_function_profile || !TARGET_SCHED_PROLOG
16830 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions))
16831 emit_insn (gen_blockage ());
16833 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
16834 if (live_regs_mask & 0xff)
16835 cfun->machine->lr_save_eliminated = 0;
16839 void
16840 thumb1_expand_epilogue (void)
16842 HOST_WIDE_INT amount;
16843 arm_stack_offsets *offsets;
16844 int regno;
16846 /* Naked functions don't have prologues. */
16847 if (IS_NAKED (arm_current_func_type ()))
16848 return;
16850 offsets = arm_get_frame_offsets ();
16851 amount = offsets->outgoing_args - offsets->saved_regs;
16853 if (frame_pointer_needed)
16855 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
16856 amount = offsets->locals_base - offsets->saved_regs;
16859 gcc_assert (amount >= 0);
16860 if (amount)
16862 if (amount < 512)
16863 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
16864 GEN_INT (amount)));
16865 else
16867 /* r3 is always free in the epilogue. */
16868 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
16870 emit_insn (gen_movsi (reg, GEN_INT (amount)));
16871 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
16875 /* Emit a USE (stack_pointer_rtx), so that
16876 the stack adjustment will not be deleted. */
16877 emit_insn (gen_prologue_use (stack_pointer_rtx));
16879 if (current_function_profile || !TARGET_SCHED_PROLOG)
16880 emit_insn (gen_blockage ());
16882 /* Emit a clobber for each insn that will be restored in the epilogue,
16883 so that flow2 will get register lifetimes correct. */
16884 for (regno = 0; regno < 13; regno++)
16885 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
16886 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
16888 if (! df_regs_ever_live_p (LR_REGNUM))
16889 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
16892 static void
16893 thumb1_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
16895 unsigned long live_regs_mask = 0;
16896 unsigned long l_mask;
16897 unsigned high_regs_pushed = 0;
16898 int cfa_offset = 0;
16899 int regno;
16901 if (IS_NAKED (arm_current_func_type ()))
16902 return;
16904 if (is_called_in_ARM_mode (current_function_decl))
16906 const char * name;
16908 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
16909 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
16910 == SYMBOL_REF);
16911 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
16913 /* Generate code sequence to switch us into Thumb mode. */
16914 /* The .code 32 directive has already been emitted by
16915 ASM_DECLARE_FUNCTION_NAME. */
16916 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
16917 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
16919 /* Generate a label, so that the debugger will notice the
16920 change in instruction sets. This label is also used by
16921 the assembler to bypass the ARM code when this function
16922 is called from a Thumb encoded function elsewhere in the
16923 same file. Hence the definition of STUB_NAME here must
16924 agree with the definition in gas/config/tc-arm.c. */
16926 #define STUB_NAME ".real_start_of"
16928 fprintf (f, "\t.code\t16\n");
16929 #ifdef ARM_PE
16930 if (arm_dllexport_name_p (name))
16931 name = arm_strip_name_encoding (name);
16932 #endif
16933 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
16934 fprintf (f, "\t.thumb_func\n");
16935 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
16938 if (current_function_pretend_args_size)
16940 /* Output unwind directive for the stack adjustment. */
16941 if (ARM_EABI_UNWIND_TABLES)
16942 fprintf (f, "\t.pad #%d\n",
16943 current_function_pretend_args_size);
16945 if (cfun->machine->uses_anonymous_args)
16947 int num_pushes;
16949 fprintf (f, "\tpush\t{");
16951 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
16953 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
16954 regno <= LAST_ARG_REGNUM;
16955 regno++)
16956 asm_fprintf (f, "%r%s", regno,
16957 regno == LAST_ARG_REGNUM ? "" : ", ");
16959 fprintf (f, "}\n");
16961 else
16962 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
16963 SP_REGNUM, SP_REGNUM,
16964 current_function_pretend_args_size);
16966 /* We don't need to record the stores for unwinding (would it
16967 help the debugger any if we did?), but record the change in
16968 the stack pointer. */
16969 if (dwarf2out_do_frame ())
16971 char *l = dwarf2out_cfi_label ();
16973 cfa_offset = cfa_offset + current_function_pretend_args_size;
16974 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
16978 /* Get the registers we are going to push. */
16979 live_regs_mask = thumb1_compute_save_reg_mask ();
16980 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
16981 l_mask = live_regs_mask & 0x40ff;
16982 /* Then count how many other high registers will need to be pushed. */
16983 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
16985 if (TARGET_BACKTRACE)
16987 unsigned offset;
16988 unsigned work_register;
16990 /* We have been asked to create a stack backtrace structure.
16991 The code looks like this:
16993 0 .align 2
16994 0 func:
16995 0 sub SP, #16 Reserve space for 4 registers.
16996 2 push {R7} Push low registers.
16997 4 add R7, SP, #20 Get the stack pointer before the push.
16998 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
16999 8 mov R7, PC Get hold of the start of this code plus 12.
17000 10 str R7, [SP, #16] Store it.
17001 12 mov R7, FP Get hold of the current frame pointer.
17002 14 str R7, [SP, #4] Store it.
17003 16 mov R7, LR Get hold of the current return address.
17004 18 str R7, [SP, #12] Store it.
17005 20 add R7, SP, #16 Point at the start of the backtrace structure.
17006 22 mov FP, R7 Put this value into the frame pointer. */
17008 work_register = thumb_find_work_register (live_regs_mask);
17010 if (ARM_EABI_UNWIND_TABLES)
17011 asm_fprintf (f, "\t.pad #16\n");
17013 asm_fprintf
17014 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
17015 SP_REGNUM, SP_REGNUM);
17017 if (dwarf2out_do_frame ())
17019 char *l = dwarf2out_cfi_label ();
17021 cfa_offset = cfa_offset + 16;
17022 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
17025 if (l_mask)
17027 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17028 offset = bit_count (l_mask) * UNITS_PER_WORD;
17030 else
17031 offset = 0;
17033 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17034 offset + 16 + current_function_pretend_args_size);
17036 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17037 offset + 4);
17039 /* Make sure that the instruction fetching the PC is in the right place
17040 to calculate "start of backtrace creation code + 12". */
17041 if (l_mask)
17043 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17044 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17045 offset + 12);
17046 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17047 ARM_HARD_FRAME_POINTER_REGNUM);
17048 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17049 offset);
17051 else
17053 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
17054 ARM_HARD_FRAME_POINTER_REGNUM);
17055 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17056 offset);
17057 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
17058 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17059 offset + 12);
17062 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
17063 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
17064 offset + 8);
17065 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
17066 offset + 12);
17067 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
17068 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
17070 /* Optimization: If we are not pushing any low registers but we are going
17071 to push some high registers then delay our first push. This will just
17072 be a push of LR and we can combine it with the push of the first high
17073 register. */
17074 else if ((l_mask & 0xff) != 0
17075 || (high_regs_pushed == 0 && l_mask))
17076 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
17078 if (high_regs_pushed)
17080 unsigned pushable_regs;
17081 unsigned next_hi_reg;
17083 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
17084 if (live_regs_mask & (1 << next_hi_reg))
17085 break;
17087 pushable_regs = l_mask & 0xff;
17089 if (pushable_regs == 0)
17090 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
17092 while (high_regs_pushed > 0)
17094 unsigned long real_regs_mask = 0;
17096 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
17098 if (pushable_regs & (1 << regno))
17100 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
17102 high_regs_pushed --;
17103 real_regs_mask |= (1 << next_hi_reg);
17105 if (high_regs_pushed)
17107 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
17108 next_hi_reg --)
17109 if (live_regs_mask & (1 << next_hi_reg))
17110 break;
17112 else
17114 pushable_regs &= ~((1 << regno) - 1);
17115 break;
17120 /* If we had to find a work register and we have not yet
17121 saved the LR then add it to the list of regs to push. */
17122 if (l_mask == (1 << LR_REGNUM))
17124 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
17125 1, &cfa_offset,
17126 real_regs_mask | (1 << LR_REGNUM));
17127 l_mask = 0;
17129 else
17130 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
17135 /* Handle the case of a double word load into a low register from
17136 a computed memory address. The computed address may involve a
17137 register which is overwritten by the load. */
17138 const char *
17139 thumb_load_double_from_address (rtx *operands)
17141 rtx addr;
17142 rtx base;
17143 rtx offset;
17144 rtx arg1;
17145 rtx arg2;
17147 gcc_assert (GET_CODE (operands[0]) == REG);
17148 gcc_assert (GET_CODE (operands[1]) == MEM);
17150 /* Get the memory address. */
17151 addr = XEXP (operands[1], 0);
17153 /* Work out how the memory address is computed. */
17154 switch (GET_CODE (addr))
17156 case REG:
17157 operands[2] = adjust_address (operands[1], SImode, 4);
17159 if (REGNO (operands[0]) == REGNO (addr))
17161 output_asm_insn ("ldr\t%H0, %2", operands);
17162 output_asm_insn ("ldr\t%0, %1", operands);
17164 else
17166 output_asm_insn ("ldr\t%0, %1", operands);
17167 output_asm_insn ("ldr\t%H0, %2", operands);
17169 break;
17171 case CONST:
17172 /* Compute <address> + 4 for the high order load. */
17173 operands[2] = adjust_address (operands[1], SImode, 4);
17175 output_asm_insn ("ldr\t%0, %1", operands);
17176 output_asm_insn ("ldr\t%H0, %2", operands);
17177 break;
17179 case PLUS:
17180 arg1 = XEXP (addr, 0);
17181 arg2 = XEXP (addr, 1);
17183 if (CONSTANT_P (arg1))
17184 base = arg2, offset = arg1;
17185 else
17186 base = arg1, offset = arg2;
17188 gcc_assert (GET_CODE (base) == REG);
17190 /* Catch the case of <address> = <reg> + <reg> */
17191 if (GET_CODE (offset) == REG)
17193 int reg_offset = REGNO (offset);
17194 int reg_base = REGNO (base);
17195 int reg_dest = REGNO (operands[0]);
17197 /* Add the base and offset registers together into the
17198 higher destination register. */
17199 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
17200 reg_dest + 1, reg_base, reg_offset);
17202 /* Load the lower destination register from the address in
17203 the higher destination register. */
17204 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
17205 reg_dest, reg_dest + 1);
17207 /* Load the higher destination register from its own address
17208 plus 4. */
17209 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
17210 reg_dest + 1, reg_dest + 1);
17212 else
17214 /* Compute <address> + 4 for the high order load. */
17215 operands[2] = adjust_address (operands[1], SImode, 4);
17217 /* If the computed address is held in the low order register
17218 then load the high order register first, otherwise always
17219 load the low order register first. */
17220 if (REGNO (operands[0]) == REGNO (base))
17222 output_asm_insn ("ldr\t%H0, %2", operands);
17223 output_asm_insn ("ldr\t%0, %1", operands);
17225 else
17227 output_asm_insn ("ldr\t%0, %1", operands);
17228 output_asm_insn ("ldr\t%H0, %2", operands);
17231 break;
17233 case LABEL_REF:
17234 /* With no registers to worry about we can just load the value
17235 directly. */
17236 operands[2] = adjust_address (operands[1], SImode, 4);
17238 output_asm_insn ("ldr\t%H0, %2", operands);
17239 output_asm_insn ("ldr\t%0, %1", operands);
17240 break;
17242 default:
17243 gcc_unreachable ();
17246 return "";
17249 const char *
17250 thumb_output_move_mem_multiple (int n, rtx *operands)
17252 rtx tmp;
17254 switch (n)
17256 case 2:
17257 if (REGNO (operands[4]) > REGNO (operands[5]))
17259 tmp = operands[4];
17260 operands[4] = operands[5];
17261 operands[5] = tmp;
17263 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
17264 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
17265 break;
17267 case 3:
17268 if (REGNO (operands[4]) > REGNO (operands[5]))
17270 tmp = operands[4];
17271 operands[4] = operands[5];
17272 operands[5] = tmp;
17274 if (REGNO (operands[5]) > REGNO (operands[6]))
17276 tmp = operands[5];
17277 operands[5] = operands[6];
17278 operands[6] = tmp;
17280 if (REGNO (operands[4]) > REGNO (operands[5]))
17282 tmp = operands[4];
17283 operands[4] = operands[5];
17284 operands[5] = tmp;
17287 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
17288 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
17289 break;
17291 default:
17292 gcc_unreachable ();
17295 return "";
17298 /* Output a call-via instruction for thumb state. */
17299 const char *
17300 thumb_call_via_reg (rtx reg)
17302 int regno = REGNO (reg);
17303 rtx *labelp;
17305 gcc_assert (regno < LR_REGNUM);
17307 /* If we are in the normal text section we can use a single instance
17308 per compilation unit. If we are doing function sections, then we need
17309 an entry per section, since we can't rely on reachability. */
17310 if (in_section == text_section)
17312 thumb_call_reg_needed = 1;
17314 if (thumb_call_via_label[regno] == NULL)
17315 thumb_call_via_label[regno] = gen_label_rtx ();
17316 labelp = thumb_call_via_label + regno;
17318 else
17320 if (cfun->machine->call_via[regno] == NULL)
17321 cfun->machine->call_via[regno] = gen_label_rtx ();
17322 labelp = cfun->machine->call_via + regno;
17325 output_asm_insn ("bl\t%a0", labelp);
17326 return "";
17329 /* Routines for generating rtl. */
17330 void
17331 thumb_expand_movmemqi (rtx *operands)
17333 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
17334 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
17335 HOST_WIDE_INT len = INTVAL (operands[2]);
17336 HOST_WIDE_INT offset = 0;
17338 while (len >= 12)
17340 emit_insn (gen_movmem12b (out, in, out, in));
17341 len -= 12;
17344 if (len >= 8)
17346 emit_insn (gen_movmem8b (out, in, out, in));
17347 len -= 8;
17350 if (len >= 4)
17352 rtx reg = gen_reg_rtx (SImode);
17353 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
17354 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
17355 len -= 4;
17356 offset += 4;
17359 if (len >= 2)
17361 rtx reg = gen_reg_rtx (HImode);
17362 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
17363 plus_constant (in, offset))));
17364 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
17365 reg));
17366 len -= 2;
17367 offset += 2;
17370 if (len)
17372 rtx reg = gen_reg_rtx (QImode);
17373 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
17374 plus_constant (in, offset))));
17375 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
17376 reg));
17380 void
17381 thumb_reload_out_hi (rtx *operands)
17383 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
17386 /* Handle reading a half-word from memory during reload. */
17387 void
17388 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
17390 gcc_unreachable ();
17393 /* Return the length of a function name prefix
17394 that starts with the character 'c'. */
17395 static int
17396 arm_get_strip_length (int c)
17398 switch (c)
17400 ARM_NAME_ENCODING_LENGTHS
17401 default: return 0;
17405 /* Return a pointer to a function's name with any
17406 and all prefix encodings stripped from it. */
17407 const char *
17408 arm_strip_name_encoding (const char *name)
17410 int skip;
17412 while ((skip = arm_get_strip_length (* name)))
17413 name += skip;
17415 return name;
17418 /* If there is a '*' anywhere in the name's prefix, then
17419 emit the stripped name verbatim, otherwise prepend an
17420 underscore if leading underscores are being used. */
17421 void
17422 arm_asm_output_labelref (FILE *stream, const char *name)
17424 int skip;
17425 int verbatim = 0;
17427 while ((skip = arm_get_strip_length (* name)))
17429 verbatim |= (*name == '*');
17430 name += skip;
17433 if (verbatim)
17434 fputs (name, stream);
17435 else
17436 asm_fprintf (stream, "%U%s", name);
17439 static void
17440 arm_file_start (void)
17442 int val;
17444 if (TARGET_UNIFIED_ASM)
17445 asm_fprintf (asm_out_file, "\t.syntax unified\n");
17447 if (TARGET_BPABI)
17449 const char *fpu_name;
17450 if (arm_select[0].string)
17451 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string);
17452 else if (arm_select[1].string)
17453 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
17454 else
17455 asm_fprintf (asm_out_file, "\t.cpu %s\n",
17456 all_cores[arm_default_cpu].name);
17458 if (TARGET_SOFT_FLOAT)
17460 if (TARGET_VFP)
17461 fpu_name = "softvfp";
17462 else
17463 fpu_name = "softfpa";
17465 else
17467 int set_float_abi_attributes = 0;
17468 switch (arm_fpu_arch)
17470 case FPUTYPE_FPA:
17471 fpu_name = "fpa";
17472 break;
17473 case FPUTYPE_FPA_EMU2:
17474 fpu_name = "fpe2";
17475 break;
17476 case FPUTYPE_FPA_EMU3:
17477 fpu_name = "fpe3";
17478 break;
17479 case FPUTYPE_MAVERICK:
17480 fpu_name = "maverick";
17481 break;
17482 case FPUTYPE_VFP:
17483 fpu_name = "vfp";
17484 set_float_abi_attributes = 1;
17485 break;
17486 case FPUTYPE_VFP3:
17487 fpu_name = "vfp3";
17488 set_float_abi_attributes = 1;
17489 break;
17490 case FPUTYPE_NEON:
17491 fpu_name = "neon";
17492 set_float_abi_attributes = 1;
17493 break;
17494 default:
17495 abort();
17497 if (set_float_abi_attributes)
17499 if (TARGET_HARD_FLOAT)
17500 asm_fprintf (asm_out_file, "\t.eabi_attribute 27, 3\n");
17501 if (TARGET_HARD_FLOAT_ABI)
17502 asm_fprintf (asm_out_file, "\t.eabi_attribute 28, 1\n");
17505 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
17507 /* Some of these attributes only apply when the corresponding features
17508 are used. However we don't have any easy way of figuring this out.
17509 Conservatively record the setting that would have been used. */
17511 /* Tag_ABI_PCS_wchar_t. */
17512 asm_fprintf (asm_out_file, "\t.eabi_attribute 18, %d\n",
17513 (int)WCHAR_TYPE_SIZE / BITS_PER_UNIT);
17515 /* Tag_ABI_FP_rounding. */
17516 if (flag_rounding_math)
17517 asm_fprintf (asm_out_file, "\t.eabi_attribute 19, 1\n");
17518 if (!flag_unsafe_math_optimizations)
17520 /* Tag_ABI_FP_denomal. */
17521 asm_fprintf (asm_out_file, "\t.eabi_attribute 20, 1\n");
17522 /* Tag_ABI_FP_exceptions. */
17523 asm_fprintf (asm_out_file, "\t.eabi_attribute 21, 1\n");
17525 /* Tag_ABI_FP_user_exceptions. */
17526 if (flag_signaling_nans)
17527 asm_fprintf (asm_out_file, "\t.eabi_attribute 22, 1\n");
17528 /* Tag_ABI_FP_number_model. */
17529 asm_fprintf (asm_out_file, "\t.eabi_attribute 23, %d\n",
17530 flag_finite_math_only ? 1 : 3);
17532 /* Tag_ABI_align8_needed. */
17533 asm_fprintf (asm_out_file, "\t.eabi_attribute 24, 1\n");
17534 /* Tag_ABI_align8_preserved. */
17535 asm_fprintf (asm_out_file, "\t.eabi_attribute 25, 1\n");
17536 /* Tag_ABI_enum_size. */
17537 asm_fprintf (asm_out_file, "\t.eabi_attribute 26, %d\n",
17538 flag_short_enums ? 1 : 2);
17540 /* Tag_ABI_optimization_goals. */
17541 if (optimize_size)
17542 val = 4;
17543 else if (optimize >= 2)
17544 val = 2;
17545 else if (optimize)
17546 val = 1;
17547 else
17548 val = 6;
17549 asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val);
17551 default_file_start();
17554 static void
17555 arm_file_end (void)
17557 int regno;
17559 if (NEED_INDICATE_EXEC_STACK)
17560 /* Add .note.GNU-stack. */
17561 file_end_indicate_exec_stack ();
17563 if (! thumb_call_reg_needed)
17564 return;
17566 switch_to_section (text_section);
17567 asm_fprintf (asm_out_file, "\t.code 16\n");
17568 ASM_OUTPUT_ALIGN (asm_out_file, 1);
17570 for (regno = 0; regno < LR_REGNUM; regno++)
17572 rtx label = thumb_call_via_label[regno];
17574 if (label != 0)
17576 targetm.asm_out.internal_label (asm_out_file, "L",
17577 CODE_LABEL_NUMBER (label));
17578 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
17583 #ifndef ARM_PE
17584 /* Symbols in the text segment can be accessed without indirecting via the
17585 constant pool; it may take an extra binary operation, but this is still
17586 faster than indirecting via memory. Don't do this when not optimizing,
17587 since we won't be calculating al of the offsets necessary to do this
17588 simplification. */
17590 static void
17591 arm_encode_section_info (tree decl, rtx rtl, int first)
17593 if (optimize > 0 && TREE_CONSTANT (decl))
17594 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
17596 default_encode_section_info (decl, rtl, first);
17598 #endif /* !ARM_PE */
17600 static void
17601 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
17603 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
17604 && !strcmp (prefix, "L"))
17606 arm_ccfsm_state = 0;
17607 arm_target_insn = NULL;
17609 default_internal_label (stream, prefix, labelno);
17612 /* Output code to add DELTA to the first argument, and then jump
17613 to FUNCTION. Used for C++ multiple inheritance. */
17614 static void
17615 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
17616 HOST_WIDE_INT delta,
17617 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
17618 tree function)
17620 static int thunk_label = 0;
17621 char label[256];
17622 char labelpc[256];
17623 int mi_delta = delta;
17624 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
17625 int shift = 0;
17626 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
17627 ? 1 : 0);
17628 if (mi_delta < 0)
17629 mi_delta = - mi_delta;
17630 /* When generating 16-bit thumb code, thunks are entered in arm mode. */
17631 if (TARGET_THUMB1)
17633 int labelno = thunk_label++;
17634 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
17635 fputs ("\tldr\tr12, ", file);
17636 assemble_name (file, label);
17637 fputc ('\n', file);
17638 if (flag_pic)
17640 /* If we are generating PIC, the ldr instruction below loads
17641 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
17642 the address of the add + 8, so we have:
17644 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
17645 = target + 1.
17647 Note that we have "+ 1" because some versions of GNU ld
17648 don't set the low bit of the result for R_ARM_REL32
17649 relocations against thumb function symbols. */
17650 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
17651 assemble_name (file, labelpc);
17652 fputs (":\n", file);
17653 fputs ("\tadd\tr12, pc, r12\n", file);
17656 /* TODO: Use movw/movt for large constants when available. */
17657 while (mi_delta != 0)
17659 if ((mi_delta & (3 << shift)) == 0)
17660 shift += 2;
17661 else
17663 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
17664 mi_op, this_regno, this_regno,
17665 mi_delta & (0xff << shift));
17666 mi_delta &= ~(0xff << shift);
17667 shift += 8;
17670 if (TARGET_THUMB1)
17672 fprintf (file, "\tbx\tr12\n");
17673 ASM_OUTPUT_ALIGN (file, 2);
17674 assemble_name (file, label);
17675 fputs (":\n", file);
17676 if (flag_pic)
17678 /* Output ".word .LTHUNKn-7-.LTHUNKPCn". */
17679 rtx tem = XEXP (DECL_RTL (function), 0);
17680 tem = gen_rtx_PLUS (GET_MODE (tem), tem, GEN_INT (-7));
17681 tem = gen_rtx_MINUS (GET_MODE (tem),
17682 tem,
17683 gen_rtx_SYMBOL_REF (Pmode,
17684 ggc_strdup (labelpc)));
17685 assemble_integer (tem, 4, BITS_PER_WORD, 1);
17687 else
17688 /* Output ".word .LTHUNKn". */
17689 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
17691 else
17693 fputs ("\tb\t", file);
17694 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
17695 if (NEED_PLT_RELOC)
17696 fputs ("(PLT)", file);
17697 fputc ('\n', file);
17702 arm_emit_vector_const (FILE *file, rtx x)
17704 int i;
17705 const char * pattern;
17707 gcc_assert (GET_CODE (x) == CONST_VECTOR);
17709 switch (GET_MODE (x))
17711 case V2SImode: pattern = "%08x"; break;
17712 case V4HImode: pattern = "%04x"; break;
17713 case V8QImode: pattern = "%02x"; break;
17714 default: gcc_unreachable ();
17717 fprintf (file, "0x");
17718 for (i = CONST_VECTOR_NUNITS (x); i--;)
17720 rtx element;
17722 element = CONST_VECTOR_ELT (x, i);
17723 fprintf (file, pattern, INTVAL (element));
17726 return 1;
17729 const char *
17730 arm_output_load_gr (rtx *operands)
17732 rtx reg;
17733 rtx offset;
17734 rtx wcgr;
17735 rtx sum;
17737 if (GET_CODE (operands [1]) != MEM
17738 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
17739 || GET_CODE (reg = XEXP (sum, 0)) != REG
17740 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
17741 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
17742 return "wldrw%?\t%0, %1";
17744 /* Fix up an out-of-range load of a GR register. */
17745 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
17746 wcgr = operands[0];
17747 operands[0] = reg;
17748 output_asm_insn ("ldr%?\t%0, %1", operands);
17750 operands[0] = wcgr;
17751 operands[1] = reg;
17752 output_asm_insn ("tmcr%?\t%0, %1", operands);
17753 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
17755 return "";
17758 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
17760 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
17761 named arg and all anonymous args onto the stack.
17762 XXX I know the prologue shouldn't be pushing registers, but it is faster
17763 that way. */
17765 static void
17766 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
17767 enum machine_mode mode ATTRIBUTE_UNUSED,
17768 tree type ATTRIBUTE_UNUSED,
17769 int *pretend_size,
17770 int second_time ATTRIBUTE_UNUSED)
17772 cfun->machine->uses_anonymous_args = 1;
17773 if (cum->nregs < NUM_ARG_REGS)
17774 *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
17777 /* Return nonzero if the CONSUMER instruction (a store) does not need
17778 PRODUCER's value to calculate the address. */
17781 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
17783 rtx value = PATTERN (producer);
17784 rtx addr = PATTERN (consumer);
17786 if (GET_CODE (value) == COND_EXEC)
17787 value = COND_EXEC_CODE (value);
17788 if (GET_CODE (value) == PARALLEL)
17789 value = XVECEXP (value, 0, 0);
17790 value = XEXP (value, 0);
17791 if (GET_CODE (addr) == COND_EXEC)
17792 addr = COND_EXEC_CODE (addr);
17793 if (GET_CODE (addr) == PARALLEL)
17794 addr = XVECEXP (addr, 0, 0);
17795 addr = XEXP (addr, 0);
17797 return !reg_overlap_mentioned_p (value, addr);
17800 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
17801 have an early register shift value or amount dependency on the
17802 result of PRODUCER. */
17805 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
17807 rtx value = PATTERN (producer);
17808 rtx op = PATTERN (consumer);
17809 rtx early_op;
17811 if (GET_CODE (value) == COND_EXEC)
17812 value = COND_EXEC_CODE (value);
17813 if (GET_CODE (value) == PARALLEL)
17814 value = XVECEXP (value, 0, 0);
17815 value = XEXP (value, 0);
17816 if (GET_CODE (op) == COND_EXEC)
17817 op = COND_EXEC_CODE (op);
17818 if (GET_CODE (op) == PARALLEL)
17819 op = XVECEXP (op, 0, 0);
17820 op = XEXP (op, 1);
17822 early_op = XEXP (op, 0);
17823 /* This is either an actual independent shift, or a shift applied to
17824 the first operand of another operation. We want the whole shift
17825 operation. */
17826 if (GET_CODE (early_op) == REG)
17827 early_op = op;
17829 return !reg_overlap_mentioned_p (value, early_op);
17832 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
17833 have an early register shift value dependency on the result of
17834 PRODUCER. */
17837 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
17839 rtx value = PATTERN (producer);
17840 rtx op = PATTERN (consumer);
17841 rtx early_op;
17843 if (GET_CODE (value) == COND_EXEC)
17844 value = COND_EXEC_CODE (value);
17845 if (GET_CODE (value) == PARALLEL)
17846 value = XVECEXP (value, 0, 0);
17847 value = XEXP (value, 0);
17848 if (GET_CODE (op) == COND_EXEC)
17849 op = COND_EXEC_CODE (op);
17850 if (GET_CODE (op) == PARALLEL)
17851 op = XVECEXP (op, 0, 0);
17852 op = XEXP (op, 1);
17854 early_op = XEXP (op, 0);
17856 /* This is either an actual independent shift, or a shift applied to
17857 the first operand of another operation. We want the value being
17858 shifted, in either case. */
17859 if (GET_CODE (early_op) != REG)
17860 early_op = XEXP (early_op, 0);
17862 return !reg_overlap_mentioned_p (value, early_op);
17865 /* Return nonzero if the CONSUMER (a mul or mac op) does not
17866 have an early register mult dependency on the result of
17867 PRODUCER. */
17870 arm_no_early_mul_dep (rtx producer, rtx consumer)
17872 rtx value = PATTERN (producer);
17873 rtx op = PATTERN (consumer);
17875 if (GET_CODE (value) == COND_EXEC)
17876 value = COND_EXEC_CODE (value);
17877 if (GET_CODE (value) == PARALLEL)
17878 value = XVECEXP (value, 0, 0);
17879 value = XEXP (value, 0);
17880 if (GET_CODE (op) == COND_EXEC)
17881 op = COND_EXEC_CODE (op);
17882 if (GET_CODE (op) == PARALLEL)
17883 op = XVECEXP (op, 0, 0);
17884 op = XEXP (op, 1);
17886 return (GET_CODE (op) == PLUS
17887 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
17890 /* We can't rely on the caller doing the proper promotion when
17891 using APCS or ATPCS. */
17893 static bool
17894 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
17896 return !TARGET_AAPCS_BASED;
17900 /* AAPCS based ABIs use short enums by default. */
17902 static bool
17903 arm_default_short_enums (void)
17905 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
17909 /* AAPCS requires that anonymous bitfields affect structure alignment. */
17911 static bool
17912 arm_align_anon_bitfield (void)
17914 return TARGET_AAPCS_BASED;
17918 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
17920 static tree
17921 arm_cxx_guard_type (void)
17923 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
17926 /* Return non-zero if the consumer (a multiply-accumulate instruction)
17927 has an accumulator dependency on the result of the producer (a
17928 multiplication instruction) and no other dependency on that result. */
17930 arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
17932 rtx mul = PATTERN (producer);
17933 rtx mac = PATTERN (consumer);
17934 rtx mul_result;
17935 rtx mac_op0, mac_op1, mac_acc;
17937 if (GET_CODE (mul) == COND_EXEC)
17938 mul = COND_EXEC_CODE (mul);
17939 if (GET_CODE (mac) == COND_EXEC)
17940 mac = COND_EXEC_CODE (mac);
17942 /* Check that mul is of the form (set (...) (mult ...))
17943 and mla is of the form (set (...) (plus (mult ...) (...))). */
17944 if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
17945 || (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
17946 || GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
17947 return 0;
17949 mul_result = XEXP (mul, 0);
17950 mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
17951 mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
17952 mac_acc = XEXP (XEXP (mac, 1), 1);
17954 return (reg_overlap_mentioned_p (mul_result, mac_acc)
17955 && !reg_overlap_mentioned_p (mul_result, mac_op0)
17956 && !reg_overlap_mentioned_p (mul_result, mac_op1));
17960 /* The EABI says test the least significant bit of a guard variable. */
17962 static bool
17963 arm_cxx_guard_mask_bit (void)
17965 return TARGET_AAPCS_BASED;
17969 /* The EABI specifies that all array cookies are 8 bytes long. */
17971 static tree
17972 arm_get_cookie_size (tree type)
17974 tree size;
17976 if (!TARGET_AAPCS_BASED)
17977 return default_cxx_get_cookie_size (type);
17979 size = build_int_cst (sizetype, 8);
17980 return size;
17984 /* The EABI says that array cookies should also contain the element size. */
17986 static bool
17987 arm_cookie_has_size (void)
17989 return TARGET_AAPCS_BASED;
17993 /* The EABI says constructors and destructors should return a pointer to
17994 the object constructed/destroyed. */
17996 static bool
17997 arm_cxx_cdtor_returns_this (void)
17999 return TARGET_AAPCS_BASED;
18002 /* The EABI says that an inline function may never be the key
18003 method. */
18005 static bool
18006 arm_cxx_key_method_may_be_inline (void)
18008 return !TARGET_AAPCS_BASED;
18011 static void
18012 arm_cxx_determine_class_data_visibility (tree decl)
18014 if (!TARGET_AAPCS_BASED)
18015 return;
18017 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
18018 is exported. However, on systems without dynamic vague linkage,
18019 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
18020 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
18021 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
18022 else
18023 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
18024 DECL_VISIBILITY_SPECIFIED (decl) = 1;
18027 static bool
18028 arm_cxx_class_data_always_comdat (void)
18030 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
18031 vague linkage if the class has no key function. */
18032 return !TARGET_AAPCS_BASED;
18036 /* The EABI says __aeabi_atexit should be used to register static
18037 destructors. */
18039 static bool
18040 arm_cxx_use_aeabi_atexit (void)
18042 return TARGET_AAPCS_BASED;
18046 void
18047 arm_set_return_address (rtx source, rtx scratch)
18049 arm_stack_offsets *offsets;
18050 HOST_WIDE_INT delta;
18051 rtx addr;
18052 unsigned long saved_regs;
18054 saved_regs = arm_compute_save_reg_mask ();
18056 if ((saved_regs & (1 << LR_REGNUM)) == 0)
18057 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18058 else
18060 if (frame_pointer_needed)
18061 addr = plus_constant(hard_frame_pointer_rtx, -4);
18062 else
18064 /* LR will be the first saved register. */
18065 offsets = arm_get_frame_offsets ();
18066 delta = offsets->outgoing_args - (offsets->frame + 4);
18069 if (delta >= 4096)
18071 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
18072 GEN_INT (delta & ~4095)));
18073 addr = scratch;
18074 delta &= 4095;
18076 else
18077 addr = stack_pointer_rtx;
18079 addr = plus_constant (addr, delta);
18081 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18086 void
18087 thumb_set_return_address (rtx source, rtx scratch)
18089 arm_stack_offsets *offsets;
18090 HOST_WIDE_INT delta;
18091 HOST_WIDE_INT limit;
18092 int reg;
18093 rtx addr;
18094 unsigned long mask;
18096 emit_insn (gen_rtx_USE (VOIDmode, source));
18098 mask = thumb1_compute_save_reg_mask ();
18099 if (mask & (1 << LR_REGNUM))
18101 offsets = arm_get_frame_offsets ();
18103 limit = 1024;
18104 /* Find the saved regs. */
18105 if (frame_pointer_needed)
18107 delta = offsets->soft_frame - offsets->saved_args;
18108 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
18109 if (TARGET_THUMB1)
18110 limit = 128;
18112 else
18114 delta = offsets->outgoing_args - offsets->saved_args;
18115 reg = SP_REGNUM;
18117 /* Allow for the stack frame. */
18118 if (TARGET_THUMB1 && TARGET_BACKTRACE)
18119 delta -= 16;
18120 /* The link register is always the first saved register. */
18121 delta -= 4;
18123 /* Construct the address. */
18124 addr = gen_rtx_REG (SImode, reg);
18125 if (delta > limit)
18127 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
18128 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
18129 addr = scratch;
18131 else
18132 addr = plus_constant (addr, delta);
18134 emit_move_insn (gen_frame_mem (Pmode, addr), source);
18136 else
18137 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
18140 /* Implements target hook vector_mode_supported_p. */
18141 bool
18142 arm_vector_mode_supported_p (enum machine_mode mode)
18144 /* Neon also supports V2SImode, etc. listed in the clause below. */
18145 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
18146 || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
18147 return true;
18149 if ((mode == V2SImode)
18150 || (mode == V4HImode)
18151 || (mode == V8QImode))
18152 return true;
18154 return false;
18157 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
18158 ARM insns and therefore guarantee that the shift count is modulo 256.
18159 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
18160 guarantee no particular behavior for out-of-range counts. */
18162 static unsigned HOST_WIDE_INT
18163 arm_shift_truncation_mask (enum machine_mode mode)
18165 return mode == SImode ? 255 : 0;
18169 /* Map internal gcc register numbers to DWARF2 register numbers. */
18171 unsigned int
18172 arm_dbx_register_number (unsigned int regno)
18174 if (regno < 16)
18175 return regno;
18177 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
18178 compatibility. The EABI defines them as registers 96-103. */
18179 if (IS_FPA_REGNUM (regno))
18180 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
18182 /* FIXME: VFPv3 register numbering. */
18183 if (IS_VFP_REGNUM (regno))
18184 return 64 + regno - FIRST_VFP_REGNUM;
18186 if (IS_IWMMXT_GR_REGNUM (regno))
18187 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
18189 if (IS_IWMMXT_REGNUM (regno))
18190 return 112 + regno - FIRST_IWMMXT_REGNUM;
18192 gcc_unreachable ();
18196 #ifdef TARGET_UNWIND_INFO
18197 /* Emit unwind directives for a store-multiple instruction or stack pointer
18198 push during alignment.
18199 These should only ever be generated by the function prologue code, so
18200 expect them to have a particular form. */
18202 static void
18203 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
18205 int i;
18206 HOST_WIDE_INT offset;
18207 HOST_WIDE_INT nregs;
18208 int reg_size;
18209 unsigned reg;
18210 unsigned lastreg;
18211 rtx e;
18213 e = XVECEXP (p, 0, 0);
18214 if (GET_CODE (e) != SET)
18215 abort ();
18217 /* First insn will adjust the stack pointer. */
18218 if (GET_CODE (e) != SET
18219 || GET_CODE (XEXP (e, 0)) != REG
18220 || REGNO (XEXP (e, 0)) != SP_REGNUM
18221 || GET_CODE (XEXP (e, 1)) != PLUS)
18222 abort ();
18224 offset = -INTVAL (XEXP (XEXP (e, 1), 1));
18225 nregs = XVECLEN (p, 0) - 1;
18227 reg = REGNO (XEXP (XVECEXP (p, 0, 1), 1));
18228 if (reg < 16)
18230 /* The function prologue may also push pc, but not annotate it as it is
18231 never restored. We turn this into a stack pointer adjustment. */
18232 if (nregs * 4 == offset - 4)
18234 fprintf (asm_out_file, "\t.pad #4\n");
18235 offset -= 4;
18237 reg_size = 4;
18238 fprintf (asm_out_file, "\t.save {");
18240 else if (IS_VFP_REGNUM (reg))
18242 reg_size = 8;
18243 fprintf (asm_out_file, "\t.vsave {");
18245 else if (reg >= FIRST_FPA_REGNUM && reg <= LAST_FPA_REGNUM)
18247 /* FPA registers are done differently. */
18248 asm_fprintf (asm_out_file, "\t.save %r, %wd\n", reg, nregs);
18249 return;
18251 else
18252 /* Unknown register type. */
18253 abort ();
18255 /* If the stack increment doesn't match the size of the saved registers,
18256 something has gone horribly wrong. */
18257 if (offset != nregs * reg_size)
18258 abort ();
18260 offset = 0;
18261 lastreg = 0;
18262 /* The remaining insns will describe the stores. */
18263 for (i = 1; i <= nregs; i++)
18265 /* Expect (set (mem <addr>) (reg)).
18266 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
18267 e = XVECEXP (p, 0, i);
18268 if (GET_CODE (e) != SET
18269 || GET_CODE (XEXP (e, 0)) != MEM
18270 || GET_CODE (XEXP (e, 1)) != REG)
18271 abort ();
18273 reg = REGNO (XEXP (e, 1));
18274 if (reg < lastreg)
18275 abort ();
18277 if (i != 1)
18278 fprintf (asm_out_file, ", ");
18279 /* We can't use %r for vfp because we need to use the
18280 double precision register names. */
18281 if (IS_VFP_REGNUM (reg))
18282 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
18283 else
18284 asm_fprintf (asm_out_file, "%r", reg);
18286 #ifdef ENABLE_CHECKING
18287 /* Check that the addresses are consecutive. */
18288 e = XEXP (XEXP (e, 0), 0);
18289 if (GET_CODE (e) == PLUS)
18291 offset += reg_size;
18292 if (GET_CODE (XEXP (e, 0)) != REG
18293 || REGNO (XEXP (e, 0)) != SP_REGNUM
18294 || GET_CODE (XEXP (e, 1)) != CONST_INT
18295 || offset != INTVAL (XEXP (e, 1)))
18296 abort ();
18298 else if (i != 1
18299 || GET_CODE (e) != REG
18300 || REGNO (e) != SP_REGNUM)
18301 abort ();
18302 #endif
18304 fprintf (asm_out_file, "}\n");
18307 /* Emit unwind directives for a SET. */
18309 static void
18310 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
18312 rtx e0;
18313 rtx e1;
18314 unsigned reg;
18316 e0 = XEXP (p, 0);
18317 e1 = XEXP (p, 1);
18318 switch (GET_CODE (e0))
18320 case MEM:
18321 /* Pushing a single register. */
18322 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
18323 || GET_CODE (XEXP (XEXP (e0, 0), 0)) != REG
18324 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
18325 abort ();
18327 asm_fprintf (asm_out_file, "\t.save ");
18328 if (IS_VFP_REGNUM (REGNO (e1)))
18329 asm_fprintf(asm_out_file, "{d%d}\n",
18330 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
18331 else
18332 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
18333 break;
18335 case REG:
18336 if (REGNO (e0) == SP_REGNUM)
18338 /* A stack increment. */
18339 if (GET_CODE (e1) != PLUS
18340 || GET_CODE (XEXP (e1, 0)) != REG
18341 || REGNO (XEXP (e1, 0)) != SP_REGNUM
18342 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18343 abort ();
18345 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
18346 -INTVAL (XEXP (e1, 1)));
18348 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
18350 HOST_WIDE_INT offset;
18352 if (GET_CODE (e1) == PLUS)
18354 if (GET_CODE (XEXP (e1, 0)) != REG
18355 || GET_CODE (XEXP (e1, 1)) != CONST_INT)
18356 abort ();
18357 reg = REGNO (XEXP (e1, 0));
18358 offset = INTVAL (XEXP (e1, 1));
18359 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
18360 HARD_FRAME_POINTER_REGNUM, reg,
18361 INTVAL (XEXP (e1, 1)));
18363 else if (GET_CODE (e1) == REG)
18365 reg = REGNO (e1);
18366 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
18367 HARD_FRAME_POINTER_REGNUM, reg);
18369 else
18370 abort ();
18372 else if (GET_CODE (e1) == REG && REGNO (e1) == SP_REGNUM)
18374 /* Move from sp to reg. */
18375 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
18377 else if (GET_CODE (e1) == PLUS
18378 && GET_CODE (XEXP (e1, 0)) == REG
18379 && REGNO (XEXP (e1, 0)) == SP_REGNUM
18380 && GET_CODE (XEXP (e1, 1)) == CONST_INT)
18382 /* Set reg to offset from sp. */
18383 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
18384 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
18386 else if (GET_CODE (e1) == UNSPEC && XINT (e1, 1) == UNSPEC_STACK_ALIGN)
18388 /* Stack pointer save before alignment. */
18389 reg = REGNO (e0);
18390 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
18391 reg + 0x90, reg);
18393 else
18394 abort ();
18395 break;
18397 default:
18398 abort ();
18403 /* Emit unwind directives for the given insn. */
18405 static void
18406 arm_unwind_emit (FILE * asm_out_file, rtx insn)
18408 rtx pat;
18410 if (!ARM_EABI_UNWIND_TABLES)
18411 return;
18413 if (GET_CODE (insn) == NOTE || !RTX_FRAME_RELATED_P (insn))
18414 return;
18416 pat = find_reg_note (insn, REG_FRAME_RELATED_EXPR, NULL_RTX);
18417 if (pat)
18418 pat = XEXP (pat, 0);
18419 else
18420 pat = PATTERN (insn);
18422 switch (GET_CODE (pat))
18424 case SET:
18425 arm_unwind_emit_set (asm_out_file, pat);
18426 break;
18428 case SEQUENCE:
18429 /* Store multiple. */
18430 arm_unwind_emit_sequence (asm_out_file, pat);
18431 break;
18433 default:
18434 abort();
18439 /* Output a reference from a function exception table to the type_info
18440 object X. The EABI specifies that the symbol should be relocated by
18441 an R_ARM_TARGET2 relocation. */
18443 static bool
18444 arm_output_ttype (rtx x)
18446 fputs ("\t.word\t", asm_out_file);
18447 output_addr_const (asm_out_file, x);
18448 /* Use special relocations for symbol references. */
18449 if (GET_CODE (x) != CONST_INT)
18450 fputs ("(TARGET2)", asm_out_file);
18451 fputc ('\n', asm_out_file);
18453 return TRUE;
18455 #endif /* TARGET_UNWIND_INFO */
18458 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
18459 stack alignment. */
18461 static void
18462 arm_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
18464 rtx unspec = SET_SRC (pattern);
18465 gcc_assert (GET_CODE (unspec) == UNSPEC);
18467 switch (index)
18469 case UNSPEC_STACK_ALIGN:
18470 /* ??? We should set the CFA = (SP & ~7). At this point we haven't
18471 put anything on the stack, so hopefully it won't matter.
18472 CFA = SP will be correct after alignment. */
18473 dwarf2out_reg_save_reg (label, stack_pointer_rtx,
18474 SET_DEST (pattern));
18475 break;
18476 default:
18477 gcc_unreachable ();
18482 /* Output unwind directives for the start/end of a function. */
18484 void
18485 arm_output_fn_unwind (FILE * f, bool prologue)
18487 if (!ARM_EABI_UNWIND_TABLES)
18488 return;
18490 if (prologue)
18491 fputs ("\t.fnstart\n", f);
18492 else
18493 fputs ("\t.fnend\n", f);
18496 static bool
18497 arm_emit_tls_decoration (FILE *fp, rtx x)
18499 enum tls_reloc reloc;
18500 rtx val;
18502 val = XVECEXP (x, 0, 0);
18503 reloc = INTVAL (XVECEXP (x, 0, 1));
18505 output_addr_const (fp, val);
18507 switch (reloc)
18509 case TLS_GD32:
18510 fputs ("(tlsgd)", fp);
18511 break;
18512 case TLS_LDM32:
18513 fputs ("(tlsldm)", fp);
18514 break;
18515 case TLS_LDO32:
18516 fputs ("(tlsldo)", fp);
18517 break;
18518 case TLS_IE32:
18519 fputs ("(gottpoff)", fp);
18520 break;
18521 case TLS_LE32:
18522 fputs ("(tpoff)", fp);
18523 break;
18524 default:
18525 gcc_unreachable ();
18528 switch (reloc)
18530 case TLS_GD32:
18531 case TLS_LDM32:
18532 case TLS_IE32:
18533 fputs (" + (. - ", fp);
18534 output_addr_const (fp, XVECEXP (x, 0, 2));
18535 fputs (" - ", fp);
18536 output_addr_const (fp, XVECEXP (x, 0, 3));
18537 fputc (')', fp);
18538 break;
18539 default:
18540 break;
18543 return TRUE;
18546 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
18548 static void
18549 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
18551 gcc_assert (size == 4);
18552 fputs ("\t.word\t", file);
18553 output_addr_const (file, x);
18554 fputs ("(tlsldo)", file);
18557 bool
18558 arm_output_addr_const_extra (FILE *fp, rtx x)
18560 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
18561 return arm_emit_tls_decoration (fp, x);
18562 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
18564 char label[256];
18565 int labelno = INTVAL (XVECEXP (x, 0, 0));
18567 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
18568 assemble_name_raw (fp, label);
18570 return TRUE;
18572 else if (GET_CODE (x) == CONST_VECTOR)
18573 return arm_emit_vector_const (fp, x);
18575 return FALSE;
18578 /* Output assembly for a shift instruction.
18579 SET_FLAGS determines how the instruction modifies the condition codes.
18580 0 - Do not set condition codes.
18581 1 - Set condition codes.
18582 2 - Use smallest instruction. */
18583 const char *
18584 arm_output_shift(rtx * operands, int set_flags)
18586 char pattern[100];
18587 static const char flag_chars[3] = {'?', '.', '!'};
18588 const char *shift;
18589 HOST_WIDE_INT val;
18590 char c;
18592 c = flag_chars[set_flags];
18593 if (TARGET_UNIFIED_ASM)
18595 shift = shift_op(operands[3], &val);
18596 if (shift)
18598 if (val != -1)
18599 operands[2] = GEN_INT(val);
18600 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
18602 else
18603 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
18605 else
18606 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
18607 output_asm_insn (pattern, operands);
18608 return "";
18611 /* Output a Thumb-2 casesi instruction. */
18612 const char *
18613 thumb2_output_casesi (rtx *operands)
18615 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
18617 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
18619 output_asm_insn ("cmp\t%0, %1", operands);
18620 output_asm_insn ("bhi\t%l3", operands);
18621 switch (GET_MODE(diff_vec))
18623 case QImode:
18624 return "tbb\t[%|pc, %0]";
18625 case HImode:
18626 return "tbh\t[%|pc, %0, lsl #1]";
18627 case SImode:
18628 if (flag_pic)
18630 output_asm_insn ("adr\t%4, %l2", operands);
18631 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
18632 output_asm_insn ("add\t%4, %4, %5", operands);
18633 return "bx\t%4";
18635 else
18637 output_asm_insn ("adr\t%4, %l2", operands);
18638 return "ldr\t%|pc, [%4, %0, lsl #2]";
18640 default:
18641 gcc_unreachable ();
18645 /* A table and a function to perform ARM-specific name mangling for
18646 NEON vector types in order to conform to the AAPCS (see "Procedure
18647 Call Standard for the ARM Architecture", Appendix A). To qualify
18648 for emission with the mangled names defined in that document, a
18649 vector type must not only be of the correct mode but also be
18650 composed of NEON vector element types (e.g. __builtin_neon_qi). */
18651 typedef struct
18653 enum machine_mode mode;
18654 const char *element_type_name;
18655 const char *aapcs_name;
18656 } arm_mangle_map_entry;
18658 static arm_mangle_map_entry arm_mangle_map[] = {
18659 /* 64-bit containerized types. */
18660 { V8QImode, "__builtin_neon_qi", "15__simd64_int8_t" },
18661 { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
18662 { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
18663 { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
18664 { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
18665 { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
18666 { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
18667 { V8QImode, "__builtin_neon_poly8", "16__simd64_poly8_t" },
18668 { V4HImode, "__builtin_neon_poly16", "17__simd64_poly16_t" },
18669 /* 128-bit containerized types. */
18670 { V16QImode, "__builtin_neon_qi", "16__simd128_int8_t" },
18671 { V16QImode, "__builtin_neon_uqi", "17__simd128_uint8_t" },
18672 { V8HImode, "__builtin_neon_hi", "17__simd128_int16_t" },
18673 { V8HImode, "__builtin_neon_uhi", "18__simd128_uint16_t" },
18674 { V4SImode, "__builtin_neon_si", "17__simd128_int32_t" },
18675 { V4SImode, "__builtin_neon_usi", "18__simd128_uint32_t" },
18676 { V4SFmode, "__builtin_neon_sf", "19__simd128_float32_t" },
18677 { V16QImode, "__builtin_neon_poly8", "17__simd128_poly8_t" },
18678 { V8HImode, "__builtin_neon_poly16", "18__simd128_poly16_t" },
18679 { VOIDmode, NULL, NULL }
18682 const char *
18683 arm_mangle_type (const_tree type)
18685 arm_mangle_map_entry *pos = arm_mangle_map;
18687 if (TREE_CODE (type) != VECTOR_TYPE)
18688 return NULL;
18690 /* Check the mode of the vector type, and the name of the vector
18691 element type, against the table. */
18692 while (pos->mode != VOIDmode)
18694 tree elt_type = TREE_TYPE (type);
18696 if (pos->mode == TYPE_MODE (type)
18697 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
18698 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
18699 pos->element_type_name))
18700 return pos->aapcs_name;
18702 pos++;
18705 /* Use the default mangling for unrecognized (possibly user-defined)
18706 vector types. */
18707 return NULL;
18710 #include "gt-arm.h"